## 1. Import Libraries

In [46]:
import pandas as pd
import numpy as np
import sklearn
from pandas_profiling import ProfileReport
from functools import reduce

# Increase maximum columns displayed by pandas
pd.options.display.max_columns = 100

## 2. Load Data

### 2.1  Emissions by Link
- LAEI2013_MajorRoads_EmissionsbyLink_2013.xlsx
    - Sheets: 2013 LTS Rds and 2013 Other Major Rds

In [6]:
%%time

df_2013_EmissionsbyLink = pd.read_excel("LAEI2013_MajorRoads_EmissionsbyLink_2013.xlsx", sheet_name=[0, 1])

# The above file contains two sheets
df_2013_LTS_Rds = df_2013_EmissionsbyLink[0]
df_2013_Other_Major_Rds = df_2013_EmissionsbyLink[1]

print(df_2013_LTS_Rds.shape)
print(df_2013_Other_Major_Rds.shape)

Wall time: 12min 43s


In [74]:
df_2013_LTS_Rds.columns

Index(['GridId', 'Toid', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'Lts', 'Length (m)', 'Emissions', 'Year',
       'Pollutant', 'Emissions Unit', 'Motorcycle', 'Taxi', 'Car',
       'BusAndCoach', 'Lgv', 'Rigid', 'Artic', 'Rigid2Axle', 'Rigid3Axle',
       'Rigid4Axle', 'Artic3Axle', 'Artic5Axle', 'Artic6Axle', 'PetrolCar',
       'DieselCar', 'PetrolLgv', 'DieselLgv', 'LtBus', 'Coach', 'ElectricCar',
       'ElectricLgv'],
      dtype='object')

In [73]:
df_2013_Other_Major_Rds.columns

# Lts (for LTS_Rds) vs. DotRef (for Other_Major_Rds)

Index(['GridId', 'Toid', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'DotRef', 'Length (m)', 'Emissions', 'Year',
       'Pollutant', 'Emissions Unit', 'Motorcycle', 'Taxi', 'Car',
       'BusAndCoach', 'Lgv', 'Rigid', 'Artic', 'Rigid2Axle', 'Rigid3Axle',
       'Rigid4Axle', 'Artic3Axle', 'Artic5Axle', 'Artic6Axle', 'PetrolCar',
       'DieselCar', 'PetrolLgv', 'DieselLgv', 'LtBus', 'Coach', 'ElectricCar',
       'ElectricLgv'],
      dtype='object')

In [105]:
df_2013_LTS_Rds.head()

Unnamed: 0,GridId,Toid,GRID_ExactCut_ID,Location_ExactCut,BoroughName_ExactCut,Lts,Length (m),Emissions,Year,Pollutant,Emissions Unit,Motorcycle,Taxi,Car,BusAndCoach,Lgv,Rigid,Artic,Rigid2Axle,Rigid3Axle,Rigid4Axle,Artic3Axle,Artic5Axle,Artic6Axle,PetrolCar,DieselCar,PetrolLgv,DieselLgv,LtBus,Coach,ElectricCar,ElectricLgv
0,6253,4000000027908919,24,External,NonGLA,18898,50.761449,DFT,2013,CO2,tonnes/year,0.15829,0.374854,13.572217,0.0,1.772671,1.538737,0.609006,1.045237,0.178038,0.315462,0.177074,0.241372,0.19056,8.761443,4.810774,0.03755,1.735121,0.0,0.0,0.0,0.0
1,6253,4000000027947931,24,External,NonGLA,18895,28.592125,DFT,2013,CO2,tonnes/year,0.0,0.0,0.024111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015535,0.008576,0.0,0.0,0.0,0.0,0.0,0.0
2,6253,4000000028013383,24,External,NonGLA,15816,5.101391,DFT,2013,CO2,tonnes/year,0.016974,0.03954,1.457712,0.0,0.188471,0.169197,0.068765,0.114091,0.020032,0.035074,0.019985,0.027271,0.021509,0.939028,0.518684,0.004055,0.184415,0.0,0.0,0.0,0.0
3,6253,4000000028025820,24,External,NonGLA,15816,3.757501,DFT,2013,CO2,tonnes/year,0.012503,0.029124,1.073698,0.0,0.138821,0.124625,0.05065,0.084036,0.014755,0.025834,0.01472,0.020087,0.015843,0.691654,0.382044,0.002987,0.135834,0.0,0.0,0.0,0.0
4,6253,4000000028029388,24,External,NonGLA,15816,1.624593,DFT,2013,CO2,tonnes/year,0.005406,0.012592,0.464224,0.0,0.06002,0.053883,0.021899,0.036334,0.006379,0.01117,0.006365,0.008685,0.00685,0.299044,0.16518,0.001292,0.058729,0.0,0.0,0.0,0.0


In [106]:
df_2013_Other_Major_Rds.head()

Unnamed: 0,GridId,Toid,GRID_ExactCut_ID,Location_ExactCut,BoroughName_ExactCut,DotRef,Length (m),Emissions,Year,Pollutant,Emissions Unit,Motorcycle,Taxi,Car,BusAndCoach,Lgv,Rigid,Artic,Rigid2Axle,Rigid3Axle,Rigid4Axle,Artic3Axle,Artic5Axle,Artic6Axle,PetrolCar,DieselCar,PetrolLgv,DieselLgv,LtBus,Coach,ElectricCar,ElectricLgv
0,5911,4000000027989878,2,External,NonGLA,28440,9.714495,DFT,2013,CO2,tonne/year,0.109479,0.489228,38.421925,0.744254,11.284972,6.154057,16.431225,3.69368,1.346705,1.113672,0.875312,3.006694,12.549219,18.791658,19.630267,0.279151,11.00582,0.0,0.744254,0.0,0.0
1,5911,4000000027989880,2,External,NonGLA,28440,0.0,DFT,2013,CO2,tonne/year,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5911,4000000027989882,2,External,NonGLA,57226,8.577192,DFT,2013,CO2,tonne/year,0.206466,0.365668,29.778628,1.623398,7.854346,3.257626,3.673683,1.737763,0.563588,0.956275,0.466739,0.760333,2.446611,19.478135,10.300493,0.120149,7.734197,0.754408,0.86899,0.0,0.0
3,5911,4000000028014332,2,External,NonGLA,57226,9.347936,DFT,2013,CO2,tonne/year,0.203719,0.349104,30.727094,1.717707,7.542684,3.523312,3.977384,1.871444,0.612446,1.039422,0.505633,0.82313,2.648621,20.173154,10.55394,0.123945,7.418739,0.820669,0.897038,0.0,0.0
4,5911,4000000027888882,2,External,NonGLA,28440,0.0,DFT,2013,CO2,tonne/year,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 2.1.1 Concatenante the two sheets into one dataframe

In [68]:
%%time 
df_merged_EmissionsbyLink = pd.concat([df_2013_LTS_Rds, df_2013_Other_Major_Rds])

print(df_merged_EmissionsbyLink.shape)

(879960, 33)
Wall time: 679 ms


In [70]:
df_merged_EmissionsbyLink.columns

Index(['GridId', 'Toid', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'Lts', 'Length (m)', 'Emissions', 'Year',
       'Pollutant', 'Emissions Unit', 'Motorcycle', 'Taxi', 'Car',
       'BusAndCoach', 'Lgv', 'Rigid', 'Artic', 'Rigid2Axle', 'Rigid3Axle',
       'Rigid4Axle', 'Artic3Axle', 'Artic5Axle', 'Artic6Axle', 'PetrolCar',
       'DieselCar', 'PetrolLgv', 'DieselLgv', 'LtBus', 'Coach', 'ElectricCar',
       'ElectricLgv', 'DotRef'],
      dtype='object')

In [107]:
df_merged_EmissionsbyLink.head()

Unnamed: 0,GridId,Toid,GRID_ExactCut_ID,Location_ExactCut,BoroughName_ExactCut,Lts,Length (m),Emissions,Year,Pollutant,Emissions Unit,Motorcycle,Taxi,Car,BusAndCoach,Lgv,Rigid,Artic,Rigid2Axle,Rigid3Axle,Rigid4Axle,Artic3Axle,Artic5Axle,Artic6Axle,PetrolCar,DieselCar,PetrolLgv,DieselLgv,LtBus,Coach,ElectricCar,ElectricLgv,DotRef
0,6253,4000000027908919,24,External,NonGLA,18898.0,50.761449,DFT,2013,CO2,tonnes/year,0.15829,0.374854,13.572217,0.0,1.772671,1.538737,0.609006,1.045237,0.178038,0.315462,0.177074,0.241372,0.19056,8.761443,4.810774,0.03755,1.735121,0.0,0.0,0.0,0.0,
1,6253,4000000027947931,24,External,NonGLA,18895.0,28.592125,DFT,2013,CO2,tonnes/year,0.0,0.0,0.024111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015535,0.008576,0.0,0.0,0.0,0.0,0.0,0.0,
2,6253,4000000028013383,24,External,NonGLA,15816.0,5.101391,DFT,2013,CO2,tonnes/year,0.016974,0.03954,1.457712,0.0,0.188471,0.169197,0.068765,0.114091,0.020032,0.035074,0.019985,0.027271,0.021509,0.939028,0.518684,0.004055,0.184415,0.0,0.0,0.0,0.0,
3,6253,4000000028025820,24,External,NonGLA,15816.0,3.757501,DFT,2013,CO2,tonnes/year,0.012503,0.029124,1.073698,0.0,0.138821,0.124625,0.05065,0.084036,0.014755,0.025834,0.01472,0.020087,0.015843,0.691654,0.382044,0.002987,0.135834,0.0,0.0,0.0,0.0,
4,6253,4000000028029388,24,External,NonGLA,15816.0,1.624593,DFT,2013,CO2,tonnes/year,0.005406,0.012592,0.464224,0.0,0.06002,0.053883,0.021899,0.036334,0.006379,0.01117,0.006365,0.008685,0.00685,0.299044,0.16518,0.001292,0.058729,0.0,0.0,0.0,0.0,


### 2.1.2 Pivoting the Pollutants column to have a single record per Toid (Link ID)

In [175]:
pivot_index = ['GridId', 'Toid', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'Lts', 'Length (m)', 'Year', 'DotRef']
pivot_columns = "Pollutant"
pivot_values = ['Motorcycle', 'Taxi', 'Car',
       'BusAndCoach', 'Lgv', 'Rigid', 'Artic', 'Rigid2Axle', 'Rigid3Axle',
       'Rigid4Axle', 'Artic3Axle', 'Artic5Axle', 'Artic6Axle', 'PetrolCar',
       'DieselCar', 'PetrolLgv', 'DieselLgv', 'LtBus', 'Coach', 'ElectricCar',
       'ElectricLgv']

In [176]:
df_merged_EmissionsbyLink_pivoted = df_merged_EmissionsbyLink.pivot(index=pivot_index, columns=pivot_columns, values=pivot_values)

In [177]:
df_merged_EmissionsbyLink_pivoted.shape

(87996, 210)

### 2.2 Concentrations (Modelled at 20m resolution)

- PostLAEI2013_2013_NO2.csv
- PostLAEI2013_2013_NOx.csv
- PostLAEI2013_2013_PM10.csv
- PostLAEI2013_2013_PM10d.csv
- PostLAEI2013_2013_PM25.csv

In [39]:
%%time

df_NO2_concentrations = pd.read_csv("PostLAEI2013_2013_NO2.csv")
print(df_NO2_concentrations.shape)

df_NOx_concentrations = pd.read_csv("PostLAEI2013_2013_NOx.csv")
print(df_NOx_concentrations.shape)

df_PM10_concentrations = pd.read_csv("PostLAEI2013_2013_PM10.csv")
print(df_PM10_concentrations.shape)

df_PM10d_concentrations = pd.read_csv("PostLAEI2013_2013_PM10d.csv")
print(df_PM10d_concentrations.shape)

df_PM25_concentrations = pd.read_csv("PostLAEI2013_2013_PM25.csv")
print(df_PM25_concentrations.shape)

(5856428, 4)
(5856428, 4)
(5856428, 4)
(5856428, 4)
(5856428, 4)
Wall time: 7.75 s


In [51]:
%%time

# Rename conct column to to include the pollutant name (for joining the dataframes)
df_NO2_concentrations.rename(columns={"conct":"NO2_conct"}, inplace=True)
df_NOx_concentrations.rename(columns={"conct":"NOx_conct"}, inplace=True)
df_PM10_concentrations.rename(columns={"conct":"PM10_conct"}, inplace=True)
df_PM10d_concentrations.rename(columns={"conct":"PM10d_conct"}, inplace=True)
df_PM25_concentrations.rename(columns={"conct":"PM25_conct"}, inplace=True)

Wall time: 0 ns


#### 2.2.1 Joining All the Concentrations Dataframes

In [54]:
%%time

concetrations_dataframes = [df_NO2_concentrations, df_NOx_concentrations, df_PM10_concentrations, df_PM10d_concentrations, df_PM25_concentrations]

df_merged_concentrations = reduce(lambda left, right: pd.merge(left, right, on=["x", "y", "year"]), concetrations_dataframes)
print(df_merged_concentrations.shape)

(5856428, 8)
Wall time: 7.18 s


In [55]:
df_merged_concentrations.head()

Unnamed: 0,x,y,NO2_conct,year,NOx_conct,PM10_conct,PM10d_conct,PM25_conct
0,501460,170580,31.31919,2013,49.21837,23.74403,9.30235,14.90253
1,501460,170600,31.55455,2013,49.78666,23.78645,9.387261,14.919
2,501460,170620,31.79392,2013,50.36728,23.82961,9.473991,14.93577
3,501460,170640,32.03141,2013,50.94635,23.87249,9.560539,14.95249
4,501460,170660,32.26324,2013,51.51179,23.91457,9.645805,14.96894


### 2.3 Emissions Summary (1 km grid square resolution)
- LAEI2013_Emissions_Summary-CO2_v1.1.xlsx
- LAEI2013_Emissions_Summary-NOx_v1.1.xlsx
- LAEI2013_Emissions_Summary-PM2.5_v1.1.xlsx
- LAEI2013_Emissions_Summary-PM10_v1.1.xlsx
- LAEI2013_Emissions_Summary-OtherPollutantns_v1.0.xlsb

In [None]:
# Not required at the moment

### 2.4 Road Traffic Data
- LAEI2013_2013_AADT-VKM.xlsx
    - Sheets: MajorGrid_AADTandVKM_2013 and MinorGrid_VKM_2013

In [82]:
%%time

df_2013_AADT = pd.read_excel("LAEI2013_2013_AADT-VKM.xlsx", sheet_name=[1, 2])

# The above contains two sheets
df_2013_MajorGrid_AADT = df_2013_AADT[1]
df_2013_MinorGrid_VKM = df_2013_AADT[2]

print(df_2013_MajorGrid_AADT.shape)
print(df_2013_MinorGrid_VKM.shape)

(87999, 44)
(3355, 21)
Wall time: 40.2 s


In [83]:
df_2013_MajorGrid_AADT.columns

Index(['RowID', 'Year', 'Toid', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'TLRN', 'MotorwayNumber', 'AADT Motorcycle',
       'AADT Taxi', 'AADT Pcar', 'AADT Dcar', 'AADT PLgv', 'AADT DLgv',
       'AADT LtBus', 'AADT Coach', 'AADT Rigid2Axle', 'AADT Rigid3Axle',
       'AADT Rigid4Axle', 'AADT Artic3Axle', 'AADT Artic5Axle',
       'AADT Artic6Axle', 'AADT ElectricCar', 'AADT ElectricLgv', 'AADT TOTAL',
       'Speed (kph)', 'Length (m)', 'VKM_Motorcycle', 'VKM_Taxi', 'VKM_Pcar',
       'VKM_Dcar', 'VKM_PLgv', 'VKM_DLgv', 'VKM_LtBus', 'VKM_Coach',
       'VKM_Rigid2Axle', 'VKM_Rigid3Axle', 'VKM_Rigid4Axle', 'VKM_Artic3Axle',
       'VKM_Artic5Axle', 'VKM_Artic6Axle', 'VKM_ElectricCar',
       'VKM_ElectricLgv', 'VKM_TOTAL'],
      dtype='object')

In [84]:
df_2013_MinorGrid_VKM.columns

Index(['Year', 'ID', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'Easting', 'Northing', 'speed (kph)',
       'VKM_Motorcycle', 'VKM_Taxi', 'VKM_Bus', 'VKM_Coach', 'VKM_Rigid',
       'VKM_Artic', 'VKM_Petrolcar', 'VKM_Dieselcar', 'VKM_Electriccar',
       'VKM_Petrollgv', 'VKM_Diesellgv', 'VKM_Electriclgv', 'VKM_Total'],
      dtype='object')

In [108]:
df_2013_MajorGrid_AADT.head()

Unnamed: 0,RowID,Year,Toid,GRID_ExactCut_ID,Location_ExactCut,BoroughName_ExactCut,TLRN,MotorwayNumber,AADT Motorcycle,AADT Taxi,AADT Pcar,AADT Dcar,AADT PLgv,AADT DLgv,AADT LtBus,AADT Coach,AADT Rigid2Axle,AADT Rigid3Axle,AADT Rigid4Axle,AADT Artic3Axle,AADT Artic5Axle,AADT Artic6Axle,AADT ElectricCar,AADT ElectricLgv,AADT TOTAL,Speed (kph),Length (m),VKM_Motorcycle,VKM_Taxi,VKM_Pcar,VKM_Dcar,VKM_PLgv,VKM_DLgv,VKM_LtBus,VKM_Coach,VKM_Rigid2Axle,VKM_Rigid3Axle,VKM_Rigid4Axle,VKM_Artic3Axle,VKM_Artic5Axle,VKM_Artic6Axle,VKM_ElectricCar,VKM_ElectricLgv,VKM_TOTAL
0,1.0,2013.0,4000000000000000.0,836.0,Outer,Hillingdon,Other,Other,88.301916,77.11258,4093.961441,2429.165893,21.502284,1080.377347,235.453345,74.723988,147.036213,28.026842,19.54154,8.106493,5.492629,1.999639,2.170702,0.618469,8313.591321,36.9382,5.472146,176.368343,154.019511,8177.004838,4851.853527,42.947224,2157.873473,470.278768,149.248696,293.6803,55.978941,39.030966,16.191367,10.970609,3.993946,4.335614,1.235289,16605.011414
1,2.0,2013.0,4000000000000000.0,2217.0,Outer,Hillingdon,Other,Other,88.301916,77.11258,4093.961441,2429.165893,21.502284,1080.377347,235.125653,74.723988,147.036213,28.026842,19.54154,8.106493,5.492629,1.999639,2.170702,0.618469,8313.263629,35.285178,3.605559,116.207872,101.482382,5387.771477,3196.852461,28.297627,1421.807786,309.43215,98.338925,193.503902,36.884134,25.717231,10.668379,7.228458,2.631583,2.856706,0.813924,10940.494996
2,3.0,2013.0,4000000000000000.0,282.0,External,NonGLA,Other,Other,310.363572,100.322495,10087.319861,5985.345419,39.934745,2006.512158,53.436368,39.957689,312.273405,72.61417,69.001679,41.253397,47.428538,39.717406,5.348502,1.148642,19211.978046,49.065141,113.618491,12871.009867,4160.449042,418328.713029,248216.759321,1656.123809,83211.56265,2216.046236,1657.075319,12950.212101,3011.364039,2861.551314,1710.809301,1966.897025,1647.110606,221.80638,47.635028,796735.125068
3,4.0,2013.0,4000000000000000.0,873.0,Outer,Hillingdon,Other,Other,39.473081,144.548284,7709.574508,4574.502157,27.580811,1385.791535,16.741434,6.123638,507.392837,106.440958,53.151045,24.428295,42.324228,101.675382,4.087773,0.793306,14744.629271,49.1731,52.797356,760.68713,2785.595031,148571.479776,88155.390937,531.510766,26705.637104,322.624763,118.008843,9777.985094,2051.227418,1024.275647,470.758531,815.631678,1959.389833,78.775616,15.287825,284144.265992
4,5.0,2013.0,4000000000000000.0,2930.0,Outer,Hillingdon,Other,Other,39.473081,144.548284,7709.574508,4574.502157,27.580811,1385.791535,16.741434,6.123638,507.392837,106.440958,53.151045,24.428295,42.324228,101.675382,4.087773,0.793306,14744.629271,49.1731,179.504952,2586.248957,9470.703474,505125.983487,299718.213847,1807.075618,90796.101831,1096.887173,401.216526,33244.027352,6973.937855,3482.419671,1600.524988,2773.054115,6661.700602,267.828056,51.97685,966057.900401


In [90]:
df_2013_MinorGrid_VKM.head()

Unnamed: 0,Year,ID,GRID_ExactCut_ID,Location_ExactCut,BoroughName_ExactCut,Easting,Northing,speed (kph),VKM_Motorcycle,VKM_Taxi,...,VKM_Coach,VKM_Rigid,VKM_Artic,VKM_Petrolcar,VKM_Dieselcar,VKM_Electriccar,VKM_Petrollgv,VKM_Diesellgv,VKM_Electriclgv,VKM_Total
0,2013,5910,1,External,NonGLA,510500,203500,30,7223.168225,3303.116745,...,4513.072361,7680.054517,853.427764,309599.517097,183701.922502,164.155952,1083.285552,54429.435225,31.158503,572582.314443
1,2013,5911,2,External,NonGLA,511500,203500,30,4742.141375,2167.935313,...,2962.995481,5041.590112,559.699517,203270.767843,120611.398853,107.77829,711.243897,35736.287198,20.457482,375932.29536
2,2013,5912,3,External,NonGLA,512500,203500,30,7278.285529,3327.995491,...,4548.852095,7740.195812,860.663552,312015.14593,185135.244055,165.436767,1091.752757,54854.868005,31.402045,577049.842037
3,2013,5915,4,External,NonGLA,515500,203500,30,7204.22478,3294.143071,...,4499.043482,7654.621079,847.495192,308578.002296,183095.803234,163.614324,1079.68319,54248.43538,31.054888,570696.120915
4,2013,5916,5,External,NonGLA,516500,203500,30,6499.471249,2972.477278,...,4057.870123,6911.448474,766.513286,278614.89455,165317.091727,147.727276,974.89339,48983.295782,28.040823,515273.723959


## 3. Joining Road Traffic Data (MajorGrid only) with Emissions by Link Data

In [178]:
%%time
df_merged_AADT_Emissions = pd.merge(df_2013_MajorGrid_AADT, df_merged_EmissionsbyLink_pivoted, on=["Toid", "GRID_ExactCut_ID"])



Wall time: 538 ms


In [179]:
df_merged_AADT_Emissions.head()

Unnamed: 0,RowID,Year,Toid,GRID_ExactCut_ID,Location_ExactCut,BoroughName_ExactCut,TLRN,MotorwayNumber,AADT Motorcycle,AADT Taxi,AADT Pcar,AADT Dcar,AADT PLgv,AADT DLgv,AADT LtBus,AADT Coach,AADT Rigid2Axle,AADT Rigid3Axle,AADT Rigid4Axle,AADT Artic3Axle,AADT Artic5Axle,AADT Artic6Axle,AADT ElectricCar,AADT ElectricLgv,AADT TOTAL,Speed (kph),Length (m),VKM_Motorcycle,VKM_Taxi,VKM_Pcar,VKM_Dcar,VKM_PLgv,VKM_DLgv,VKM_LtBus,VKM_Coach,VKM_Rigid2Axle,VKM_Rigid3Axle,VKM_Rigid4Axle,VKM_Artic3Axle,VKM_Artic5Axle,VKM_Artic6Axle,VKM_ElectricCar,VKM_ElectricLgv,VKM_TOTAL,"(Motorcycle, CO2)","(Motorcycle, NOx)","(Motorcycle, PM10_Brake)","(Motorcycle, PM10_Exhaust)","(Motorcycle, PM10_Resusp)","(Motorcycle, PM10_Tyre)",...,"(DieselLgv, CO2)","(DieselLgv, NOx)","(DieselLgv, PM10_Brake)","(DieselLgv, PM10_Exhaust)","(DieselLgv, PM10_Resusp)","(DieselLgv, PM10_Tyre)","(DieselLgv, PM25_Brake)","(DieselLgv, PM25_Exhaust)","(DieselLgv, PM25_Resusp)","(DieselLgv, PM25_Tyre)","(LtBus, CO2)","(LtBus, NOx)","(LtBus, PM10_Brake)","(LtBus, PM10_Exhaust)","(LtBus, PM10_Resusp)","(LtBus, PM10_Tyre)","(LtBus, PM25_Brake)","(LtBus, PM25_Exhaust)","(LtBus, PM25_Resusp)","(LtBus, PM25_Tyre)","(Coach, CO2)","(Coach, NOx)","(Coach, PM10_Brake)","(Coach, PM10_Exhaust)","(Coach, PM10_Resusp)","(Coach, PM10_Tyre)","(Coach, PM25_Brake)","(Coach, PM25_Exhaust)","(Coach, PM25_Resusp)","(Coach, PM25_Tyre)","(ElectricCar, CO2)","(ElectricCar, NOx)","(ElectricCar, PM10_Brake)","(ElectricCar, PM10_Exhaust)","(ElectricCar, PM10_Resusp)","(ElectricCar, PM10_Tyre)","(ElectricCar, PM25_Brake)","(ElectricCar, PM25_Exhaust)","(ElectricCar, PM25_Resusp)","(ElectricCar, PM25_Tyre)","(ElectricLgv, CO2)","(ElectricLgv, NOx)","(ElectricLgv, PM10_Brake)","(ElectricLgv, PM10_Exhaust)","(ElectricLgv, PM10_Resusp)","(ElectricLgv, PM10_Tyre)","(ElectricLgv, PM25_Brake)","(ElectricLgv, PM25_Exhaust)","(ElectricLgv, PM25_Resusp)","(ElectricLgv, PM25_Tyre)"
0,1.0,2013.0,4000000000000000.0,836.0,Outer,Hillingdon,Other,Other,88.301916,77.11258,4093.961441,2429.165893,21.502284,1080.377347,235.453345,74.723988,147.036213,28.026842,19.54154,8.106493,5.492629,1.999639,2.170702,0.618469,8313.591321,36.9382,5.472146,176.368343,154.019511,8177.004838,4851.853527,42.947224,2157.873473,470.278768,149.248696,293.6803,55.978941,39.030966,16.191367,10.970609,3.993946,4.335614,1.235289,16605.011414,0.017454,2.1e-05,4e-06,2e-06,0.0,7.325696e-07,...,0.410987,0.001737,0.000148,5.6e-05,1.7e-05,3.3e-05,5.9e-05,5.4e-05,6.755162e-07,2.3e-05,0.396773,0.003145,9.1e-05,1.5e-05,0.000205,1e-05,3.6e-05,1.4e-05,7e-06,7e-06,0.140865,0.001042,2.8e-05,1.1e-05,6.5e-05,3e-06,1.1e-05,1.1e-05,2e-06,2e-06,0.0,0.0,1.898888e-07,0.0,3.393129e-08,4.192366e-08,7.556801e-08,0.0,1.357252e-09,2.934656e-08,0.0,0.0,8.445862e-08,0.0,9.667597e-09,1.887162e-08,3.361109e-08,0.0,3.867039e-10,1.321013e-08
1,2.0,2013.0,4000000000000000.0,2217.0,Outer,Hillingdon,Other,Other,88.301916,77.11258,4093.961441,2429.165893,21.502284,1080.377347,235.125653,74.723988,147.036213,28.026842,19.54154,8.106493,5.492629,1.999639,2.170702,0.618469,8313.263629,35.285178,3.605559,116.207872,101.482382,5387.771477,3196.852461,28.297627,1421.807786,309.43215,98.338925,193.503902,36.884134,25.717231,10.668379,7.228458,2.631583,2.856706,0.813924,10940.494996,0.011589,1.4e-05,3e-06,2e-06,0.0,4.859432e-07,...,0.273476,0.001161,9.9e-05,3.8e-05,1.1e-05,2.2e-05,3.9e-05,3.6e-05,4.450929e-07,1.5e-05,0.268432,0.002119,6e-05,1e-05,0.000135,6e-06,2.4e-05,1e-05,5e-06,4e-06,0.09612,0.000717,1.9e-05,8e-06,4.3e-05,2e-06,8e-06,7e-06,2e-06,1e-06,0.0,0.0,1.268723e-07,0.0,2.235709e-08,2.77897e-08,5.048998e-08,0.0,8.942837e-10,1.945279e-08,0.0,0.0,5.639578e-08,0.0,6.369912e-09,1.250605e-08,2.244322e-08,0.0,2.547965e-10,8.754238e-09
2,3.0,2013.0,4000000000000000.0,282.0,External,NonGLA,Other,Other,310.363572,100.322495,10087.319861,5985.345419,39.934745,2006.512158,53.436368,39.957689,312.273405,72.61417,69.001679,41.253397,47.428538,39.717406,5.348502,1.148642,19211.978046,49.065141,113.618491,12871.009867,4160.449042,418328.713029,248216.759321,1656.123809,83211.56265,2216.046236,1657.075319,12950.212101,3011.364039,2861.551314,1710.809301,1966.897025,1647.110606,221.80638,47.635028,796735.125068,1.173993,0.001752,0.000245,0.00017,0.0,5.05222e-05,...,14.827206,0.059904,0.004965,0.001827,0.000651,0.001202,0.001976,0.001736,2.604914e-05,0.000841,1.26555,0.00866,0.000362,5.3e-05,0.000967,4.2e-05,0.000144,5.1e-05,3.5e-05,3e-05,1.297882,0.009104,0.000271,0.000102,0.000723,3.2e-05,0.000108,9.7e-05,2.6e-05,2.2e-05,0.0,0.0,8.439281e-06,0.0,1.735896e-06,2.0244e-06,3.358489e-06,0.0,6.943586e-08,1.41708e-06,0.0,0.0,2.842531e-06,0.0,3.728003e-07,6.881313e-07,1.131211e-06,0.0,1.491201e-08,4.816919e-07
3,4.0,2013.0,4000000000000000.0,873.0,Outer,Hillingdon,Other,Other,39.473081,144.548284,7709.574508,4574.502157,27.580811,1385.791535,16.741434,6.123638,507.392837,106.440958,53.151045,24.428295,42.324228,101.675382,4.087773,0.793306,14744.629271,49.1731,52.797356,760.68713,2785.595031,148571.479776,88155.390937,531.510766,26705.637104,322.624763,118.008843,9777.985094,2051.227418,1024.275647,470.758531,815.631678,1959.389833,78.775616,15.287825,284144.265992,0.069276,0.000104,1.4e-05,1e-05,0.0,2.981696e-06,...,4.754269,0.019196,0.00159,0.000585,0.000209,0.000385,0.000633,0.000556,8.360124e-06,0.00027,0.385928,0.003286,6.3e-05,1.1e-05,0.000141,7e-06,2.5e-05,1e-05,5e-06,5e-06,0.092712,0.000651,1.9e-05,7e-06,5.2e-05,2e-06,8e-06,7e-06,2e-06,2e-06,0.0,0.0,2.989013e-06,0.0,6.165121e-07,7.181943e-07,1.189505e-06,0.0,2.466048e-08,5.02736e-07,0.0,0.0,9.099437e-07,0.0,1.196453e-07,2.20623e-07,3.621205e-07,0.0,4.78581e-09,1.544361e-07
4,5.0,2013.0,4000000000000000.0,2930.0,Outer,Hillingdon,Other,Other,39.473081,144.548284,7709.574508,4574.502157,27.580811,1385.791535,16.741434,6.123638,507.392837,106.440958,53.151045,24.428295,42.324228,101.675382,4.087773,0.793306,14744.629271,49.1731,179.504952,2586.248957,9470.703474,505125.983487,299718.213847,1807.075618,90796.101831,1096.887173,401.216526,33244.027352,6973.937855,3482.419671,1600.524988,2773.054115,6661.700602,267.828056,51.97685,966057.900401,0.235529,0.000353,4.9e-05,3.4e-05,0.0,1.013742e-05,...,16.163971,0.065266,0.005404,0.001988,0.000711,0.00131,0.002151,0.001889,2.842346e-05,0.000917,1.312112,0.011171,0.000213,3.7e-05,0.000479,2.2e-05,8.5e-05,3.5e-05,1.7e-05,1.6e-05,0.31521,0.002213,6.6e-05,2.5e-05,0.000175,8e-06,2.6e-05,2.4e-05,6e-06,5e-06,0.0,0.0,1.01623e-05,0.0,2.09607e-06,2.441778e-06,4.044182e-06,0.0,8.384282e-08,1.709245e-06,0.0,0.0,3.093704e-06,0.0,4.067801e-07,7.500927e-07,1.231168e-06,0.0,1.627121e-08,5.250649e-07


In [195]:
# Update columns names to be all strings (some columns names were in tuples after pivoting the pollutant column)
df_merged_AADT_Emissions.columns = ['_'.join(x) if type(x) == tuple else x for x in df_merged_AADT_Emissions.columns]

In [196]:
df_merged_AADT_Emissions.columns

Index(['RowID', 'Year', 'Toid', 'GRID_ExactCut_ID', 'Location_ExactCut',
       'BoroughName_ExactCut', 'TLRN', 'MotorwayNumber', 'AADT Motorcycle',
       'AADT Taxi',
       ...
       'ElectricLgv_CO2', 'ElectricLgv_NOx', 'ElectricLgv_PM10_Brake',
       'ElectricLgv_PM10_Exhaust', 'ElectricLgv_PM10_Resusp',
       'ElectricLgv_PM10_Tyre', 'ElectricLgv_PM25_Brake',
       'ElectricLgv_PM25_Exhaust', 'ElectricLgv_PM25_Resusp',
       'ElectricLgv_PM25_Tyre'],
      dtype='object', length=254)

## 4. Creating a Profiling Report for the final dataframe

In [None]:
%%time

# Create a profiling report for the dataframe
profile = ProfileReport(df_merged_AADT_Emissions, title="Pandas profiling report for the processed Emissions by Link data joined with Major Grid Road Traffic Data", explorative=True)

# Save the report as an HTML file
profile.to_file("Profiling_Report_df_merged_AADT_Emissions.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

## 5. Save the final dataframe

In [200]:
%%time
df_merged_AADT_Emissions.to_csv("df_merged_AADT_Emissions.csv")

Wall time: 34.3 s
