In [18]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

# Scatter plot
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# Heatmap
import folium
from folium.plugins import HeatMap

# Time series
from plotly.subplots import make_subplots


In [19]:
df = pd.read_csv('1Day2022.csv')
df

Unnamed: 0,Time(UTC),Lat(°),Lon(°),PM1(μg/m³),PM2.5(μg/m³),PM10(μg/m³),NO2(μg/m³),Sensor_number
0,2022-02-06,51.4379,5.3581,6.79,10.16,17.46,19.0,2
1,2022-02-26,51.4379,5.3581,7.39,11.79,19.50,18.0,2
2,2022-02-17,51.4379,5.3581,4.22,6.13,9.91,9.0,2
3,2022-02-09,51.4379,5.3581,6.37,8.77,14.87,15.0,2
4,2022-02-23,51.4379,5.3581,11.64,19.33,32.62,18.0,2
...,...,...,...,...,...,...,...,...
13972,2022-12-04,51.4904,5.3941,33.83,40.65,40.97,5.0,58
13973,2022-12-03,51.4904,5.3941,33.10,41.69,44.02,8.0,58
13974,2022-12-02,51.4904,5.3941,33.91,41.37,42.81,11.0,58
13975,2022-12-30,51.4904,5.3941,4.99,7.07,10.85,7.0,58


## 1. Time series line chart

Show the trend of each pollutant over time.

### Look at each individual pollutant

In [20]:
# Convert the Time column to a datetime object
df['Time(UTC)'] = pd.to_datetime(df['Time(UTC)'])

# Set the Time column as the index
df.set_index('Time(UTC)', inplace=True)

# Sort values
df.sort_values(by='Time(UTC)', inplace = True)

# Create an interactive line chart using Plotly
fig = go.Figure()

# Add traces for each pollutant
fig.add_trace(go.Scatter(x=df.index, y=df['PM1(μg/m³)'], name='PM1', opacity=0.5))
fig.add_trace(go.Scatter(x=df.index, y=df['PM2.5(μg/m³)'], name='PM2.5', opacity=0.5))
fig.add_trace(go.Scatter(x=df.index, y=df['PM10(μg/m³)'], name='PM10', opacity=0.5))
fig.add_trace(go.Scatter(x=df.index, y=df['NO2(μg/m³)'], name='NO2', opacity=0.5))

# Create the dropdown menu
dropdown_buttons = [
    {'label': 'All', 'method': 'update', 'args': [{'visible': [True, True, True, True]}]},
    {'label': 'PM1', 'method': 'update', 'args': [{'visible': [True, False, False, False]}]},
    {'label': 'PM2.5', 'method': 'update', 'args': [{'visible': [False, True, False, False]}]},
    {'label': 'PM10', 'method': 'update', 'args': [{'visible': [False, False, True, False]}]},
    {'label': 'NO2', 'method': 'update', 'args': [{'visible': [False, False, False, True]}]}
]

fig.update_layout(
    title='Pollutant Concentrations over Time',
    xaxis_title='Time',
    yaxis_title='Pollutant Concentration (μg/m³)',
    updatemenus=[{'buttons': dropdown_buttons}]
)

fig.show()


In [21]:
df.reset_index(inplace=True)

## 2. Scatter plot

Show the relationship between two variables to see correlation between air pollution levels.

The scatter plot can help visualize any correlation between different air pollutant levels. For example, if the plot shows a strong positive correlation between PM2.5 and PM10, this could indicate that these pollutants are coming from similar sources, such as traffic or industry.

In [22]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output


# Initialize the Dash app
app = dash.Dash(__name__)

# Define the dropdown options for the x-axis
x_dropdown_options = [
    {'label': 'PM1', 'value': 'PM1(μg/m³)'},
    {'label': 'PM2.5', 'value': 'PM2.5(μg/m³)'},
    {'label': 'PM10', 'value': 'PM10(μg/m³)'},
    {'label': 'NO2', 'value': 'NO2(μg/m³)'}
]

# Define the dropdown options for the y-axis
y_dropdown_options = [
    {'label': 'PM1', 'value': 'PM1(μg/m³)'},
    {'label': 'PM2.5', 'value': 'PM2.5(μg/m³)'},
    {'label': 'PM10', 'value': 'PM10(μg/m³)'},
    {'label': 'NO2', 'value': 'NO2(μg/m³)'}
]

# Define the layout of the app
app.layout = html.Div([
    # Add the dropdown menu for the x-axis
    dcc.Dropdown(
        id='x-axis-dropdown',
        options=x_dropdown_options,
        value='PM2.5(μg/m³)'
    ),
    # Add the dropdown menu for the y-axis
    dcc.Dropdown(
        id='y-axis-dropdown',
        options=y_dropdown_options,
        value='PM1(μg/m³)'
    ),
    # Add the scatter plot
    dcc.Graph(
        id='pollutant-scatter-plot',
        figure={}
    )
])

# Define the callback function to update the scatter plot
@app.callback(
    dash.dependencies.Output('pollutant-scatter-plot', 'figure'),
    [dash.dependencies.Input('x-axis-dropdown', 'value'),
     dash.dependencies.Input('y-axis-dropdown', 'value')]
)
def update_pollutant_scatter_plot(selected_pollutant, selected_yaxis):
    # Create the scatter plot with the selected pollutant on the X-axis and selected_yaxis on the Y-axis
    fig = px.scatter(df, x=selected_pollutant, y=selected_yaxis, color="Sensor_number", hover_name="Time(UTC)")
    fig.update_layout(
        title=f"{selected_pollutant} vs. {selected_yaxis}",
        xaxis_title=f"{selected_pollutant} (μg/m³)",
        yaxis_title=f"{selected_yaxis} (μg/m³)",
    )
    return fig

# # Run the app
# if __name__ == '__main__':
#     app.run_server(debug=True, use_reloader=False)


### Show where the sensors are located

In [42]:
import plotly.express as px

# Define the center of the map
center_lat = df['Lat(°)'].mean()
center_lon = df['Lon(°)'].mean()

# Create the scatter plot with the Eindhoven map as the background
fig = px.scatter_mapbox(df, lat='Lat(°)', lon='Lon(°)', color='PM1(μg/m³)', range_color=(0,10),
                        hover_name='Time(UTC)', zoom=10, center={'lat': center_lat, 'lon': center_lon},
                        mapbox_style="carto-positron",
                        width=1200, height=800)


# Update the size and opacity of the markers
fig.update_traces(marker=dict(size=10, opacity=0.8))
fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))


# Show the figure
fig.show()



## 3. Heatmap

In [24]:
def create_heatmap(df, particle_type):
    # create a map centered on the median latitude and longitude
    map_center = [df['Lat(°)'].median(), df['Lon(°)'].median()]
    m = folium.Map(location=map_center, zoom_start=10)

    # create a HeatMap layer using the specified particle type and add it to the map
    heat_data = [[row['Lat(°)'], row['Lon(°)'], row[f'{particle_type}(μg/m³)']] for index, row in df.iterrows()]
    HeatMap(heat_data).add_to(m)

    return m


In [25]:
# create heatmaps.
pm25_heatmap = create_heatmap(df, 'PM2.5')
pm1_heatmap = create_heatmap(df, 'PM1')
pm10_heatmap = create_heatmap(df, 'PM10')
NO2_heatmap = create_heatmap(df, 'NO2')

# display the heatmap
NO2_heatmap



In [26]:
pm25_heatmap

## 4. Time Series Analysis

In [27]:
# Convert the Time(UTC) column to datetime format
df['Time(UTC)'] = pd.to_datetime(df['Time(UTC)'])

# Set the Time(UTC) column as the index of the dataframe
df.set_index('Time(UTC)', inplace=True)

# Create a list of the pollutants to be analyzed
pollutants = ['PM1(μg/m³)', 'PM2.5(μg/m³)', 'PM10(μg/m³)', 'NO2(μg/m³)']


In [28]:
# Create the dashboard layout
fig = make_subplots(rows=2, cols=2, subplot_titles=pollutants)

# Add time series line charts for each pollutant
for i, pollutant in enumerate(pollutants):
    row = i // 2 + 1
    col = i % 2 + 1
    fig.add_trace(go.Scatter(x=df.index, y=df[pollutant], name=pollutant),
                  row=row, col=col)

# Set the axis labels and titles
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Pollutant level (μg/m³)')
fig.update_layout(title='Historical Pollutant Levels', height=800, width=1000)

# Add a range slider for the x-axis
fig.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
    dict(count=1, label="1d", step="day", stepmode="backward"),
    dict(count=7, label="1w", step="day", stepmode="backward"),
    dict(count=1, label="1m", step="month", stepmode="backward"),
    dict(count=6, label="6m", step="month", stepmode="backward"),
    dict(count=1, label="YTD", step="year", stepmode="todate"),
    dict(count=1, label="1y", step="year", stepmode="backward"),
    dict(step="all")
])), rangeslider=dict(visible=True), type="date"))

# Show the dashboard
fig.show()


In [29]:
df.reset_index(inplace=True)