# INTRODUCTION TO THE NILU DATASET

1. Read data into a pandas dataframe
2. Analyze some of the properties of the dataset

In [9]:
import pandas as pd

# Datafiles: 
NILU = "./data/NILU_Dataset_Trondheim_2014-2019.csv"
YR = "./YR_Dataset_Trondheim_2014-2019.json" 

In [10]:
nilu_df = pd.read_csv(NILU, header=[0,1])
index = nilu_df.loc(axis=1)["TIMESTAMP"]
nilu_df = nilu_df.drop(["TIMESTAMP"], axis=1)
index.columns = ["timestamp"]
nilu_df.index = pd.to_datetime(index["timestamp"])


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



In [11]:
nilu_df

Unnamed: 0_level_0,Bakke kirke,Bakke kirke,Bakke kirke,Bakke kirke,Bakke kirke,E6-Tiller,E6-Tiller,E6-Tiller,E6-Tiller,E6-Tiller,...,Elgeseter,Torvet,Torvet,Torvet,weather,weather,weather,weather,weather,weather
Unnamed: 0_level_1,NO,NO2,NOx,PM10,PM2.5,NO,NO2,NOx,PM10,PM2.5,...,PM2.5,NO2,PM10,PM2.5,humidity,pressure,rain,temperature,wind_from_direction,wind_speed
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2013-12-31 23:00:00,8.940542,31.059213,44.720362,208.450000,155.400000,6.907422,20.473368,31.027908,135.520000,125.900000,...,,29.8,71.2,64.4,,,,,,
2014-01-01 00:00:00,16.235312,48.711927,73.519484,196.790000,154.100000,10.189859,27.745088,43.315193,53.130000,19.100000,...,,27.3,72.7,67.6,86.0,988.9,0.0,1.3,219.000000,1.100000
2014-01-01 01:00:00,20.389393,53.541931,84.696924,83.050000,66.000000,11.384362,27.742818,45.138124,19.250000,5.200000,...,,38.5,59.1,56.2,75.0,988.9,0.0,1.7,159.000000,1.200000
2014-01-01 02:00:00,19.406617,47.354385,77.007696,44.990000,34.900000,3.332265,11.385401,16.477102,4.730000,2.800000,...,,36.1,31.2,30.1,83.0,988.8,0.0,0.4,208.000000,0.800000
2014-01-01 03:00:00,9.294306,41.211849,55.413548,18.260000,14.400000,6.763848,18.429417,28.764576,3.630000,2.500000,...,,35.9,19.3,18.8,67.0,988.1,0.0,1.8,213.000000,2.200000
2014-01-01 04:00:00,8.469626,22.364628,35.306216,4.950000,4.000000,7.660073,24.342749,36.047341,3.740000,2.400000,...,,30.4,9.5,9.3,69.0,987.2,0.0,1.2,70.000000,1.400000
2014-01-01 05:00:00,10.302699,38.687741,54.430265,13.310000,10.400000,1.397618,5.932445,8.068005,3.410000,3.500000,...,,23.2,6.2,6.2,71.0,986.8,0.0,1.5,85.000000,1.700000
2014-01-01 06:00:00,5.487047,30.157564,38.541772,6.820000,5.700000,4.083512,13.206114,19.445721,5.060000,3.600000,...,,24.9,4.4,4.4,64.0,986.6,0.0,3.1,113.000000,1.800000
2014-01-01 07:00:00,5.823155,34.614854,43.512635,6.930000,5.500000,1.400389,8.663652,10.803446,5.720000,4.300000,...,,18.9,3.6,3.6,62.0,986.3,0.0,4.7,197.000000,2.300000
2014-01-01 08:00:00,1.997733,15.875737,18.928273,7.260000,5.100000,0.000000,2.525471,2.388528,5.280000,4.700000,...,,2.0,1.2,1.2,59.0,986.4,0.0,5.8,199.000000,5.200000


## Extra data from the NILU API and sensor placement 

In [12]:
import requests
# Downloading Extra information about the stations located in trondheim:
stations = requests.get("https://api.nilu.no/lookup/stations?area=Trondheim").json()

# Title of each station we have data from:
stations = [st for st in stations if st['station'] in nilu_df.columns.levels[0]]
print(", ".join([st['station'] for st in stations]))

Bakke kirke, E6-Tiller, Elgeseter, Torvet


In [13]:
import ipyleaflet as leaflet
markers = []
for station in stations:
    markers += [leaflet.Marker(
        location=(station['latitude'], station['longitude']), 
        title=station['station'], 
        draggable=False
    )]

trd = leaflet.Map(
    # Sets map center:
    center=(stations[2]['latitude'], stations[2]['longitude']),
    
    # Style map with cool theme. 
    # More themes: https://leaflet-extras.github.io/leaflet-providers/preview/
    basemap=leaflet.basemaps.Stamen.Toner,
    
    # Map zoom level:
    zoom=11
)

# You can add heatmap of the sensor data using: 
# leaflet.Heatmap()

for marker in markers:
    trd.add_layer(marker)
trd

Map(basemap={'url': 'http://stamen-tiles-a.a.ssl.fastly.net/toner/{z}/{x}/{y}.png', 'attribution': 'Map tiles …

## DATASET PROPERTIES:

The data contained in the dataframe is both the NILU sensor data for measuring particles in the air. In the end, you will also find some weather columns describing the weather for that hour of the day. 

Not all fields are complete. You will find NaN values within the dataframe. You will need to handle this in your model. 

In [24]:
import plotly as py
from plotly.offline import iplot, init_notebook_mode
import plotly.figure_factory as ff
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [52]:
plot_df = nilu_df.groupby(nilu_df.index.weekday_name).mean()
NOx = [(st['station'], plot_df.loc(axis=1)[(st['station'], "NOx")]) for st in stations[:3]]

## Plotting mean NOx levels grouped by weekday: 

In [88]:
data = [go.Bar(x=plot_df.index, y=data, name=st) for st, data in NOx]
iplot(data)

ValueError: too many values to unpack (expected 2)