In [59]:
import pandas as pd

In [60]:
air_quality_no2 = pd.read_csv('data/air_quality_no2_long.csv', parse_dates=True)[
    ['date.utc', 'location', 'parameter', 'value']
]
air_quality_no2.head()

Unnamed: 0,date.utc,location,parameter,value
0,2019-06-21 00:00:00+00:00,FR04014,no2,20.0
1,2019-06-20 23:00:00+00:00,FR04014,no2,21.8
2,2019-06-20 22:00:00+00:00,FR04014,no2,26.5
3,2019-06-20 21:00:00+00:00,FR04014,no2,24.9
4,2019-06-20 20:00:00+00:00,FR04014,no2,21.4


In [61]:
air_quality_pm25 = pd.read_csv('data/air_quality_pm25_long.csv', parse_dates=True)[
    ['date.utc', 'location', 'parameter', 'value']
]
air_quality_pm25.head()

Unnamed: 0,date.utc,location,parameter,value
0,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0
1,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5
2,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5
3,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0
4,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5


In [62]:
air_quality = pd.concat([air_quality_pm25, air_quality_no2], axis=0, keys=['PM25', 'NO2'])

print(f"Shape of the `air_quality_pm25` {air_quality_pm25.shape}")
print(f"Shape of the `air_quality_no2` {air_quality_no2.shape}")
print(f"Shape of the `air_quality` {air_quality.shape}")

Shape of the `air_quality_pm25` (1110, 4)
Shape of the `air_quality_no2` (2068, 4)
Shape of the `air_quality` (3178, 4)


In [63]:
air_quality = air_quality.sort_values('date.utc')
air_quality.head()

Unnamed: 0,Unnamed: 1,date.utc,location,parameter,value
NO2,2067,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0
NO2,1003,2019-05-07 01:00:00+00:00,FR04014,no2,25.0
PM25,100,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5
NO2,1098,2019-05-07 01:00:00+00:00,BETR801,no2,50.5
PM25,1109,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0


In [64]:
stations_cord = pd.read_csv('data/air_quality_stations.csv')
stations_cord.head()

Unnamed: 0,location,coordinates.latitude,coordinates.longitude
0,BELAL01,51.23619,4.38522
1,BELHB23,51.1703,4.341
2,BELLD01,51.10998,5.00486
3,BELLD02,51.12038,5.02155
4,BELR833,51.32766,4.36226


In [65]:
air_quality = pd.merge(air_quality, stations_cord, how="left", on="location")
air_quality.head()

Unnamed: 0,date.utc,location,parameter,value,coordinates.latitude,coordinates.longitude
0,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,51.49467,-0.13193
1,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,48.83724,2.3939
2,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,48.83722,2.3939
3,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,51.20966,4.43182
4,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,51.20966,4.43182


In [66]:
air_quality_parameters = pd.read_csv('data/air_quality_parameters.csv')
air_quality_parameters.head()

Unnamed: 0,id,description,name
0,bc,Black Carbon,BC
1,co,Carbon Monoxide,CO
2,no2,Nitrogen Dioxide,NO2
3,o3,Ozone,O3
4,pm10,Particulate matter less than 10 micrometers in...,PM10


In [67]:
air_quality = pd.merge(air_quality, air_quality_parameters, how="left", left_on="parameter", right_on="id")
air_quality.head()

Unnamed: 0,date.utc,location,parameter,value,coordinates.latitude,coordinates.longitude,id,description,name
0,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,51.49467,-0.13193,no2,Nitrogen Dioxide,NO2
1,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,48.83724,2.3939,no2,Nitrogen Dioxide,NO2
2,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,48.83722,2.3939,no2,Nitrogen Dioxide,NO2
3,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,51.20966,4.43182,pm25,Particulate matter less than 2.5 micrometers i...,PM2.5
4,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,51.20966,4.43182,no2,Nitrogen Dioxide,NO2
