***
## __Imports__
***

In [1]:
# -*- coding: utf-8 -*-
import os

import pandas as pd

from aqmd_pylib.aqmd_lib import data_toolkit as dtk
from aqmd_pylib.aqmd_lib import util
from aqmd_pylib.aqmd_lib.util import nLoop

from PyScripts import InnovCorridor_CodeKit as ic

***
## __Settings and File Paths__
***

In [2]:
SAVE_TABLES = False
SAVE_GRAPHS = False

fPath_background_pm25 = r'../data/~originals/CARB/RUBIDOUX_PM25HR_PICKDATA_2021-10-31.csv'
fPath_historicalClarity = r'../data/~originals/Clarity_Originals/historical_pm25cal_highres.csv'
fPath_metadata = r'../data/metadata/metadata.xlsx'
fPath_weatherData = r'..\data\~originals\OpenWeather Data\OpenWeather_September_2020_to_September_2021.csv'
fpath_no2 = r'../data/~originals/Clarity_Originals/ucr_no2_unaveraged.csv'
fpath_background_no2 = r'../data/~originals/CARB/NO2_PICKDATA_2021-11-30.csv'

FPath_analysis = r'..\Analysis'
FPath_travelTime_csv = r'..\data\~originals\travel_time_csv'
FPath_clarity_originals_HighRes = r'../data/~originals/Clarity_Originals/HighRes'
FPath_clarity_originals_hourly = r'../data/~originals/Clarity_Originals/Hourly'
FPath_PEMS = '../data/~originals/pems_freeway/'
FPath_GridSmart_csv = '../data/~originals/Gridsmart_originals/gridsmart_csv/'

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

***
## __Metadata__
***

In [3]:
clarity_metadata = pd.read_excel(fPath_metadata, sheet_name='ClaritySensors', header=0)
clarity_metadata

Unnamed: 0,sensorID,nickname,Cross Streets,Latitude,Longitude,Cardinal Relation
0,ARHF78W6,Magnolia,Magnolia & Tyler,33.911928,-117.461394,
1,AY3HFT4R,Iowa,Iowa & University,33.975591,-117.339897,
2,AGJ1PPLJ,Chicago,Chicago & University,33.975566,-117.348524,
3,A4T789S6,Cranford,Cranford & University,33.975624,-117.344182,


In [4]:
travelPath_metadata = pd.read_excel(fPath_metadata, sheet_name='TravelPaths', header=0)
travelPath_metadata

Unnamed: 0,pathID,Name,Distance(m),Start Node,Start Addreess,Start Latitude,Start Longitude,End Node,End Address,End Latitude,End Longitude,Path Info,Cardinal Relation
0,HEW1,Local 1,836,H6,"948 University Ave, Riverside, CA 92507",33.9758,-117.33385,H5,"1426 University Ave, Riverside, CA 92507",33.9757,-117.34262,WB Innovation Corridor,
1,HEW2,Local 2,893,H5,"1426 University Ave, Riverside, CA 92507",33.9757,-117.34262,H4,"1889 University Ave STE 109, Riverside, CA 92507",33.97614,-117.35254,WB Innovation Corridor,
2,HEW3,Local 3,829,H4,"1889 University Ave STE 109, Riverside, CA 92507",33.97614,-117.35254,H3,"2460 University Ave, Riverside, CA 92507",33.97574,-117.35254,WB Innovation Corridor,
3,HEW4,Local 4,490,H3,"2460 University Ave, Riverside, CA 92507",33.97574,-117.35254,H2,"2980 University Ave, Riverside, CA 92507",33.97768,-117.36593,WB Innovation Corridor,
4,HEW5,Local 5,929,H2,"2980 University Ave, Riverside, CA 92507",33.97768,-117.36593,H1,"Cesar E. Chavez Memorial, 3746 University Ave, Riverside, CA 92501",33.98211,-117.37477,WB Innovation Corridor,
5,HWE1,Local 6,931,H1,"Cesar E. Chavez Memorial, 3746 University Ave, Riverside, CA 92501",33.98211,-117.37477,H2,"2980 University Ave, Riverside, CA 92507",33.97768,-117.36593,EB Innnovation Corridor,
6,HWE2,Local 7,491,H2,"2980 University Ave, Riverside, CA 92507",33.97768,-117.36593,H3,"2460 University Ave, Riverside, CA 92507",33.97574,-117.35254,EB Innnovation Corridor,
7,HWE3,Local 8,808,H3,"2460 University Ave, Riverside, CA 92507",33.97574,-117.35254,H4,"1889 University Ave STE 109, Riverside, CA 92507",33.97614,-117.35254,EB Innnovation Corridor,
8,HWE4,Local 9,918,H4,"1889 University Ave STE 109, Riverside, CA 92507",33.97614,-117.35254,H5,"1426 University Ave, Riverside, CA 92507",33.9757,-117.34262,EB Innnovation Corridor,
9,HWE5,Local 10,836,H5,"1426 University Ave, Riverside, CA 92507",33.9757,-117.34262,H6,"948 University Ave, Riverside, CA 92507",33.9758,-117.33385,EB Innnovation Corridor,


In [5]:
travelNode_metadata = pd.read_excel(fPath_metadata, sheet_name='TravelNodes', header=0)
travelNode_metadata

Unnamed: 0,nodeID,Address,Latitude,Longitude
0,H1,"Cesar E. Chavez Memorial, 3746 University Ave, Riverside, CA 92501",33.98211,-117.37477
1,H2,"2980 University Ave, Riverside, CA 92507",33.97768,-117.36593
2,H3,"2460 University Ave, Riverside, CA 92507",33.97574,-117.35254
3,H4,"1889 University Ave STE 109, Riverside, CA 92507",33.97614,-117.35254
4,H5,"1426 University Ave, Riverside, CA 92507",33.9757,-117.34262
5,H6,"948 University Ave, Riverside, CA 92507",33.9758,-117.33385
6,F1,"888 Martin Luther King Blvd, Riverside, CA 92507",33.96839,-117.33035
7,F2,"ARCO, 1360 W Blaine St, Riverside, CA 92507",33.98279,-117.34147
8,F3,"3219 Spruce St, Riverside, CA 92501",33.99191,-117.36048
9,F4,"Subway, 3315 14th St, Riverside CA 92501",33.97471,-117.37386


***
# __Background Concentration__
Source: California Air Research Board (CARB)
    Hourly: The hourly average of the pollutant
    HighRes: The average concentration of the pollutant for the hour resampled and forward filled across 15 minute intervals
***
> ## __Background PM2.5__
***

In [6]:
carb_pm25 = pd.read_csv(fPath_background_pm25)
carb_pm25 = ic.carb_PM25(carb_pm25)
carb_pm25_data = carb_pm25.get_data()
if SAVE_TABLES:
    carb_pm25_data.to_csv('../data/CARB/Rubidoux_Background_PM2.5_Hourly.csv')
carb_pm25_data.info()
carb_pm25_data

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6662 entries, 2021-01-01 00:00:00-08:00 to 2021-10-31 23:00:00-07:00
Data columns (total 1 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Background PM2.5 [ug/m3]  6662 non-null   float64
dtypes: float64(1)
memory usage: 104.1 KB


Unnamed: 0_level_0,Background PM2.5 [ug/m3]
datetime-America/Los_Angeles,Unnamed: 1_level_1
2021-01-01 00:00:00-08:00,2.0
2021-01-01 01:00:00-08:00,1.0
2021-01-01 02:00:00-08:00,-2.0
2021-01-01 03:00:00-08:00,-1.0
2021-01-01 04:00:00-08:00,1.0
...,...
2021-10-31 19:00:00-07:00,8.0
2021-10-31 20:00:00-07:00,17.0
2021-10-31 21:00:00-07:00,19.0
2021-10-31 22:00:00-07:00,16.0


In [7]:
carb_pm25_data_interp = carb_pm25_data.resample('15T')
carb_pm25_data_interp = carb_pm25_data_interp.ffill()
carb_pm25_data_interp = carb_pm25_data_interp[~carb_pm25_data_interp.index.duplicated()]
if SAVE_TABLES:
    carb_pm25_data_interp.to_csv('../data/CARB/Rubidoux_Background_pm25_HighRes.csv')
carb_pm25_data_interp.info()
carb_pm25_data_interp

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 29177 entries, 2021-01-01 00:00:00-08:00 to 2021-10-31 23:00:00-07:00
Freq: 15T
Data columns (total 1 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Background PM2.5 [ug/m3]  29177 non-null  float64
dtypes: float64(1)
memory usage: 455.9 KB


Unnamed: 0_level_0,Background PM2.5 [ug/m3]
datetime-America/Los_Angeles,Unnamed: 1_level_1
2021-01-01 00:00:00-08:00,2.0
2021-01-01 00:15:00-08:00,2.0
2021-01-01 00:30:00-08:00,2.0
2021-01-01 00:45:00-08:00,2.0
2021-01-01 01:00:00-08:00,1.0
...,...
2021-10-31 22:00:00-07:00,16.0
2021-10-31 22:15:00-07:00,16.0
2021-10-31 22:30:00-07:00,16.0
2021-10-31 22:45:00-07:00,16.0


***
> ## __Background NO2__
***

In [8]:
carb_no2 = pd.read_csv(fpath_background_no2)
carb_no2 = ic.carb_NO2(carb_no2)
carb_no2_data = carb_no2.get_data()
if SAVE_TABLES:
    carb_no2_data.to_csv('../data/CARB/Rubidoux_Background_NO2_Hourly.csv')
carb_no2_data.info()
carb_no2_data

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7600 entries, 2021-01-01 00:00:00-08:00 to 2021-11-30 23:00:00-08:00
Data columns (total 1 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Background NO2 [ppb]  7600 non-null   float64
dtypes: float64(1)
memory usage: 118.8 KB


Unnamed: 0_level_0,Background NO2 [ppb]
datetime-America/Los_Angeles,Unnamed: 1_level_1
2021-01-01 00:00:00-08:00,1.3
2021-01-01 01:00:00-08:00,1.3
2021-01-01 02:00:00-08:00,1.5
2021-01-01 03:00:00-08:00,2.0
2021-01-01 04:00:00-08:00,2.2
...,...
2021-11-30 19:00:00-08:00,13.5
2021-11-30 20:00:00-08:00,39.9
2021-11-30 21:00:00-08:00,39.3
2021-11-30 22:00:00-08:00,34.9


In [9]:
carb_no2_data_interp = carb_no2_data.resample('15T').ffill()
carb_no2_data_interp = carb_no2_data_interp[~carb_no2_data_interp.index.duplicated()]
if SAVE_TABLES:
    carb_no2_data_interp.to_csv('../data/CARB/Rubidoux_Background_NO2_HighRes.csv')
carb_no2_data_interp.info()
carb_no2_data_interp

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 32061 entries, 2021-01-01 00:00:00-08:00 to 2021-11-30 23:00:00-08:00
Freq: 15T
Data columns (total 1 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Background NO2 [ppb]  32061 non-null  float64
dtypes: float64(1)
memory usage: 501.0 KB


Unnamed: 0_level_0,Background NO2 [ppb]
datetime-America/Los_Angeles,Unnamed: 1_level_1
2021-01-01 00:00:00-08:00,1.3
2021-01-01 00:15:00-08:00,1.3
2021-01-01 00:30:00-08:00,1.3
2021-01-01 00:45:00-08:00,1.3
2021-01-01 01:00:00-08:00,1.3
...,...
2021-11-30 22:00:00-08:00,34.9
2021-11-30 22:15:00-08:00,34.9
2021-11-30 22:30:00-08:00,34.9
2021-11-30 22:45:00-08:00,34.9


***
# __Hyperlocals Emmissions (Clarity)__
***
> ### __Historical Data__
***

In [10]:
historical_clarity = pd.read_csv(fPath_historicalClarity)
historical_clarity = ic.clarity_historical(historical_clarity)
historical_clarity_data = historical_clarity.get_data()

historical_clarity_data.info()
historical_clarity_data

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 88847 entries, 2020-09-29 12:05:00-07:00 to 2021-06-22 23:58:00-07:00
Data columns (total 9 columns):
 #   Column                                                          Non-Null Count  Dtype  
---  ------                                                          --------------  -----  
 0   Sensor Name                                                     88847 non-null  object 
 1   Intersection                                                    88847 non-null  object 
 2   Device ID                                                       88847 non-null  object 
 3   Date                                                            88847 non-null  object 
 4   PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3]  88847 non-null  float64
 5   Rel. Humidity Internal Highest Resolution [%]                   88847 non-null  float64
 6   Temperature Internal Highest Resolution [degC]                  88847 non-null  float64
 7   Lat

Unnamed: 0_level_0,Sensor Name,Intersection,Device ID,Date,PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3],Rel. Humidity Internal Highest Resolution [%],Temperature Internal Highest Resolution [degC],Latitude,Longitude
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-09-29 12:05:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,9/29/2020 12:05,3.573135,25.265888,34.912262,33.911928,-117.461394
2020-09-29 13:06:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,9/29/2020 13:06,2.654986,22.885481,38.290230,33.911928,-117.461394
2020-09-29 13:24:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,9/29/2020 13:24,2.318490,22.088961,38.768219,33.911928,-117.461394
2020-09-29 13:41:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,9/29/2020 13:41,2.023389,23.118944,38.981842,33.911928,-117.461394
2020-09-29 13:58:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,9/29/2020 13:58,1.755854,21.947052,39.166092,33.911928,-117.461394
...,...,...,...,...,...,...,...,...,...
2021-06-22 23:40:00-07:00,Chicago,Chicago & University,AGJ1PPLJ,6/22/2021 23:40,7.240612,52.887772,23.197525,33.975566,-117.348524
2021-06-22 23:44:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,6/22/2021 23:44,6.212309,56.778820,22.420464,33.911928,-117.461394
2021-06-22 23:50:00-07:00,Iowa,Iowa & University,AY3HFT4R,6/22/2021 23:50,6.210450,52.457466,23.152130,33.975591,-117.339897
2021-06-22 23:57:00-07:00,Cranford,Cranford & University,A4T789S6,6/22/2021 23:57,6.478580,53.345539,22.794312,33.975624,-117.344182


***
> ### __Original Clarity Data__
***

In [11]:
clarity_original_HighRes = util.csv_folder2table(FPath_clarity_originals_HighRes)
clarity_HighRes = ic.clarity_original(clarity_original_HighRes)
clarity_HighRes_data = clarity_HighRes.get_data()
clarity_HighRes_data.info()
clarity_HighRes_data

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 83608 entries, 2021-01-01 00:07:14.361000-08:00 to 2021-07-10 23:58:39.211000-07:00
Data columns (total 10 columns):
 #   Column                                                          Non-Null Count  Dtype              
---  ------                                                          --------------  -----              
 0   Sensor Name                                                     83608 non-null  object             
 1   Intersection                                                    83608 non-null  object             
 2   Device ID                                                       83608 non-null  object             
 3   datetime-UTC                                                    83608 non-null  datetime64[ns, UTC]
 4   PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3]  63278 non-null  float64            
 5   NO2 Highest Resolution Concentration Calibrated [ppb]           0 non-null      float64   

Unnamed: 0_level_0,Sensor Name,Intersection,Device ID,datetime-UTC,PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3],NO2 Highest Resolution Concentration Calibrated [ppb],Temperature Internal Highest Resolution [degC],Rel. Humidity Internal Highest Resolution [%],Latitude,Longitude
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-01-01 00:07:14.361000-08:00,Iowa,Iowa & University,AY3HFT4R,2021-01-01 08:07:14.361000+00:00,,,15.00,29.89,33.975591,-117.339897
2021-01-01 00:09:41.191000-08:00,Magnolia,Magnolia & Tyler,ARHF78W6,2021-01-01 08:09:41.191000+00:00,,,15.99,28.41,33.911928,-117.461394
2021-01-01 00:11:40.746000-08:00,Cranford,Cranford & University,A4T789S6,2021-01-01 08:11:40.746000+00:00,,,15.07,29.28,33.975624,-117.344182
2021-01-01 00:12:36.634000-08:00,Chicago,Chicago & University,AGJ1PPLJ,2021-01-01 08:12:36.634000+00:00,,,15.27,29.02,33.975566,-117.348524
2021-01-01 00:24:25.849000-08:00,Iowa,Iowa & University,AY3HFT4R,2021-01-01 08:24:25.849000+00:00,,,14.91,29.69,33.975591,-117.339897
...,...,...,...,...,...,...,...,...,...,...
2021-07-10 23:55:03.070000-07:00,Chicago,Chicago & University,AGJ1PPLJ,2021-07-11 06:55:03.070000+00:00,9.01,,25.78,41.70,33.975566,-117.348524
2021-07-10 23:57:16.538000-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,2021-07-11 06:57:16.538000+00:00,10.61,,25.62,41.46,33.911928,-117.461394
2021-07-10 23:57:30.107000-07:00,Cranford,Cranford & University,A4T789S6,2021-07-11 06:57:30.107000+00:00,8.43,,25.46,42.16,33.975624,-117.344182
2021-07-10 23:57:39.588000-07:00,Iowa,Iowa & University,AY3HFT4R,2021-07-11 06:57:39.588000+00:00,10.53,,25.70,42.25,33.975591,-117.339897


***
> ### __Clarity PM2.5 Measurements__
***

In [12]:
clarity_HighRes_pm25 = clarity_HighRes.get_PM25()
clarity_HighRes_pm25.info()
clarity_HighRes_pm25

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 63278 entries, 2021-03-02 12:39:24.550000-08:00 to 2021-07-10 23:58:39.211000-07:00
Data columns (total 8 columns):
 #   Column                                                          Non-Null Count  Dtype  
---  ------                                                          --------------  -----  
 0   Sensor Name                                                     63278 non-null  object 
 1   Intersection                                                    63278 non-null  object 
 2   Device ID                                                       63278 non-null  object 
 3   PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3]  63278 non-null  float64
 4   Temperature Internal Highest Resolution [degC]                  63278 non-null  float64
 5   Rel. Humidity Internal Highest Resolution [%]                   63278 non-null  float64
 6   Latitude                                                        63278 non-null  fl

Unnamed: 0_level_0,Sensor Name,Intersection,Device ID,PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3],Temperature Internal Highest Resolution [degC],Rel. Humidity Internal Highest Resolution [%],Latitude,Longitude
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-03-02 12:39:24.550000-08:00,Cranford,Cranford & University,A4T789S6,11.81,26.84,9.31,33.975624,-117.344182
2021-03-02 12:42:19.270000-08:00,Magnolia,Magnolia & Tyler,ARHF78W6,7.67,27.60,7.05,33.911928,-117.461394
2021-03-02 12:47:51.159000-08:00,Iowa,Iowa & University,AY3HFT4R,7.81,26.31,7.96,33.975591,-117.339897
2021-03-02 12:50:54.283000-08:00,Chicago,Chicago & University,AGJ1PPLJ,8.84,25.72,7.94,33.975566,-117.348524
2021-03-02 12:56:17.661000-08:00,Cranford,Cranford & University,A4T789S6,16.63,26.95,8.66,33.975624,-117.344182
...,...,...,...,...,...,...,...,...
2021-07-10 23:55:03.070000-07:00,Chicago,Chicago & University,AGJ1PPLJ,9.01,25.78,41.70,33.975566,-117.348524
2021-07-10 23:57:16.538000-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,10.61,25.62,41.46,33.911928,-117.461394
2021-07-10 23:57:30.107000-07:00,Cranford,Cranford & University,A4T789S6,8.43,25.46,42.16,33.975624,-117.344182
2021-07-10 23:57:39.588000-07:00,Iowa,Iowa & University,AY3HFT4R,10.53,25.70,42.25,33.975591,-117.339897


In [13]:
clarity_historical_pm25 = historical_clarity.get_PM25()
clarity_historical_pm25.info()
clarity_historical_pm25

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 88847 entries, 2020-09-29 12:05:00-07:00 to 2021-06-22 23:58:00-07:00
Data columns (total 8 columns):
 #   Column                                                          Non-Null Count  Dtype  
---  ------                                                          --------------  -----  
 0   Sensor Name                                                     88847 non-null  object 
 1   Intersection                                                    88847 non-null  object 
 2   Device ID                                                       88847 non-null  object 
 3   PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3]  88847 non-null  float64
 4   Temperature Internal Highest Resolution [degC]                  88847 non-null  float64
 5   Rel. Humidity Internal Highest Resolution [%]                   88847 non-null  float64
 6   Latitude                                                        88847 non-null  float64
 7   Lon

Unnamed: 0_level_0,Sensor Name,Intersection,Device ID,PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3],Temperature Internal Highest Resolution [degC],Rel. Humidity Internal Highest Resolution [%],Latitude,Longitude
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-09-29 12:05:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,3.573135,34.912262,25.265888,33.911928,-117.461394
2020-09-29 13:06:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,2.654986,38.290230,22.885481,33.911928,-117.461394
2020-09-29 13:24:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,2.318490,38.768219,22.088961,33.911928,-117.461394
2020-09-29 13:41:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,2.023389,38.981842,23.118944,33.911928,-117.461394
2020-09-29 13:58:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,1.755854,39.166092,21.947052,33.911928,-117.461394
...,...,...,...,...,...,...,...,...
2021-06-22 23:40:00-07:00,Chicago,Chicago & University,AGJ1PPLJ,7.240612,23.197525,52.887772,33.975566,-117.348524
2021-06-22 23:44:00-07:00,Magnolia,Magnolia & Tyler,ARHF78W6,6.212309,22.420464,56.778820,33.911928,-117.461394
2021-06-22 23:50:00-07:00,Iowa,Iowa & University,AY3HFT4R,6.210450,23.152130,52.457466,33.975591,-117.339897
2021-06-22 23:57:00-07:00,Cranford,Cranford & University,A4T789S6,6.478580,22.794312,53.345539,33.975624,-117.344182


In [14]:
clarity_pm25 = pd.concat([clarity_historical_pm25, clarity_HighRes_pm25], sort=True)
clarity_pm25_pivot = pd.pivot(clarity_pm25.reset_index(), index='datetime-America/Los_Angeles',
                              values='PM2.5 Highest Resolution Mass Concentration Calibrated [ug/m3]',
                              columns='Sensor Name')
if SAVE_TABLES:
    clarity_pm25_pivot.to_csv('../data/clarity_data/pm25_Hourly.csv')
clarity_pm25_pivot.rename(columns=lambda x: x + f' PM2.5 [ug/m3]', inplace=True)
clarity_pm25_pivot

Sensor Name,Chicago PM2.5 [ug/m3],Cranford PM2.5 [ug/m3],Iowa PM2.5 [ug/m3],Magnolia PM2.5 [ug/m3]
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-29 12:05:00-07:00,,,,3.573135
2020-09-29 13:06:00-07:00,,,,2.654986
2020-09-29 13:24:00-07:00,,,,2.318490
2020-09-29 13:41:00-07:00,,,,2.023389
2020-09-29 13:58:00-07:00,,,,1.755854
...,...,...,...,...
2021-07-10 23:55:03.070000-07:00,9.01,,,
2021-07-10 23:57:16.538000-07:00,,,,10.610000
2021-07-10 23:57:30.107000-07:00,,8.43,,
2021-07-10 23:57:39.588000-07:00,,,10.53,


***
> ### __Clarity PM2.5 Resampling__
***

In [15]:

clarity_pm25_interp = clarity_pm25_pivot.resample(
    '15T').interpolate('cubic').dropna()
if SAVE_TABLES:
    clarity_pm25_interp.to_csv('../data/clarity_data/pm25_HighRes.csv')
clarity_pm25_interp.info()
clarity_pm25_interp

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 25505 entries, 2020-09-30 05:15:00-07:00 to 2021-06-22 21:15:00-07:00
Freq: 15T
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Chicago PM2.5 [ug/m3]   25505 non-null  float64
 1   Cranford PM2.5 [ug/m3]  25505 non-null  float64
 2   Iowa PM2.5 [ug/m3]      25505 non-null  float64
 3   Magnolia PM2.5 [ug/m3]  25505 non-null  float64
dtypes: float64(4)
memory usage: 996.3 KB


Sensor Name,Chicago PM2.5 [ug/m3],Cranford PM2.5 [ug/m3],Iowa PM2.5 [ug/m3],Magnolia PM2.5 [ug/m3]
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-30 05:15:00-07:00,7.236761,10.467560,12.949262,10.601163
2020-09-30 05:30:00-07:00,7.399722,10.613835,12.446988,10.765489
2020-09-30 05:45:00-07:00,7.581021,10.769737,12.029491,10.932227
2020-09-30 06:00:00-07:00,7.780169,10.935291,11.690902,11.100720
2020-09-30 06:15:00-07:00,7.996672,11.110053,11.425349,11.270312
...,...,...,...,...
2021-06-22 20:15:00-07:00,6.869979,6.232989,6.948963,7.900897
2021-06-22 20:30:00-07:00,6.889317,6.143695,6.761457,7.840708
2021-06-22 20:45:00-07:00,6.898994,6.091687,6.581780,7.732686
2021-06-22 21:00:00-07:00,6.892677,6.079960,6.413283,7.571443


In [16]:
clarity_pm25_hourly = clarity_pm25_interp.resample('H').sum()
if SAVE_TABLES:
    clarity_pm25_hourly.to_csv('../data/clarity_data/pm25_Hourly.csv')
clarity_pm25_hourly

Sensor Name,Chicago PM2.5 [ug/m3],Cranford PM2.5 [ug/m3],Iowa PM2.5 [ug/m3],Magnolia PM2.5 [ug/m3]
datetime-America/Los_Angeles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-30 05:00:00-07:00,22.217504,31.851132,37.425742,32.298879
2020-09-30 06:00:00-07:00,32.486659,44.824336,45.433091,45.421543
2020-09-30 07:00:00-07:00,36.726757,48.010764,44.009098,48.111598
2020-09-30 08:00:00-07:00,41.896480,51.629579,45.193180,50.640859
2020-09-30 09:00:00-07:00,47.506506,55.566698,47.491486,52.837350
...,...,...,...,...
2021-06-22 17:00:00-07:00,31.820847,35.808417,34.252036,27.638758
2021-06-22 18:00:00-07:00,28.332710,31.185440,33.013936,29.480512
2021-06-22 19:00:00-07:00,27.317495,27.243216,30.455774,31.191621
2021-06-22 20:00:00-07:00,27.505599,24.824947,27.433145,31.392932


***
> ## __NO2 Data__
***

In [None]:
historical_clarity_no2 = pd.read_csv(fpath_no2)
historical_clarity_no2.info()
historical_clarity_no2

In [None]:
historical_clarity_no2 = ic.historical_no2(historical_clarity_no2)
clarity_no2_data = historical_clarity_no2.get_data()
clarity_no2_data

***
>> ### __Interpolating NO2 Data__
Method: Resampling to 15 Minute intervals with linear interpolation
***

In [None]:
no2_pivot = pd.pivot_table(clarity_no2_data, values='Nitrogen Dioxide [ppb]', index='datetime-America/Los_Angeles',
                           columns='Sensor Name')
no2_pivot = no2_pivot.resample('15T').interpolate('cubic').dropna()
no2_pivot.rename(columns=lambda x: x + ' NO2 [ppb]', inplace=True)
if SAVE_TABLES:
    no2_pivot.to_csv('../date/clarity_data/no2_HighRes.csv')
no2_pivot

In [None]:
no2_hourly = no2_pivot.resample('H').sum()
if SAVE_TABLES:
    no2_hourly.to_csv('../data/clarity_data/no2_Hourly.csv')
no2_hourly

***
# __Meteorological Data__
***
> ## __Weather Data__
***

In [None]:
openWeather = pd.read_csv(fPath_weatherData)

openWeather = ic.OpenWeather(openWeather)
openWeather_hourly = openWeather.get_data()
if SAVE_TABLES:
    openWeather_hourly.to_csv('../data/weather_data/OpenWeather_Hourly.csv')
openWeather_hourly.info()
openWeather_hourly

In [None]:
openWeather_data_HighRes = openWeather_hourly[~openWeather_hourly.index.duplicated()].resample('15T').ffill()
openWeather_data_HighRes = openWeather_data_HighRes[~openWeather_data_HighRes.index.duplicated()]
openWeather_data_HighRes.rename(columns={'temp'      : 'Temperature [degC]',
                                         'pressure'  : 'Pressure [mbar]',
                                         'humidity'  : 'Humidity [%]',
                                         'wind_deg'  : 'Wind Direction [degrees]',
                                         'wind_speed': 'Wind Speed [mph]'}, inplace=True)
if SAVE_TABLES:
    openWeather_data_HighRes.to_csv('../data/weather_data/OpenWeather_HighRes.csv')

openWeather_data_HighRes.info()
openWeather_data_HighRes

***
# __Traffic Density__
***
$$v:\text{average speed }(\frac{miles}{hour})$$
$$k:\text{density }(\frac{\text{# of vehicles}}{miles})$$
$$q=kv:\text{Traffic Flow }(\frac{\text{# of vehicles}}{hour})$$
Rearranging for k gives us :
$$k = \frac{q}{v}$$


In [None]:
miles_per_meter = 0.0006213712
path_length_m = {
    'Local 1' : 836,
    'Local 2' : 893,
    'Local 9' : 918,
    'Local 10': 836,
    'Fwy 1'   : 2326,
    'Fwy 2'   : 2508,
    'Fwy 3'   : 2651,
    'Fwy 4'   : 2450
}

***
> ## __GridSmart Data__
***

In [None]:
gs_files = os.listdir(FPath_GridSmart_csv)
for i in nLoop(gs_files):
    gs_files[i] = os.path.join(FPath_GridSmart_csv, gs_files[i])
full_table = pd.read_csv(gs_files[0])
gridsmart = ic.GridSmart_csv(full_table)

for i in range(1, len(gs_files)):
    table = pd.read_csv(gs_files[i])
    gridsmart.add_data(table)

gs_data = gridsmart.get_data()
gs_data.info()
gs_data

In [None]:
gs_data.rename(columns={
    'Northbound': 'GS-NB # of Vehicles/15 min',
    'Eastbound' : 'GS-EB # of Vehicles/15 min',
    'Southbound': 'GS-SB # of Vehicles/15 min',
    'Westbound' : 'GS-WB # of Vehicles/15 min'
}, inplace=True)
if SAVE_TABLES:
    gs_data.to_csv('../data/gridsmart/gridsmart_processed_HighRes.csv')
gs_data.info()
gs_data

In [None]:
gs_data_hourly = gs_data.resample('H').sum()
gs_data_hourly.rename(inplace=True, columns={
    'GS-EB # of Vehicles/15 min': 'GS-WB # of Vehicles/Hr',
    'GS-WB # of Vehicles/15 min': 'GS-WB # of Vehicles/Hr'
})
gs_data_hourly

***
> ## __PEMS Freeway Data__
***
PEMS Data gives Vehicle Miles Traveled (VMT) which is flow times the length traveled.
$$VMT = qL$$

Rearranging gives us,
$$q=\frac{VMY}{L}$$
$$k=\frac{VMT}{L*v}$$

PEMS freeway data is the aggregated sum of VMT for the hour. Upscaling resamples that sum and spaces it over 15 minutes intervals.



In [None]:
pems_length_miles = 5.3  # miles

In [None]:
pems_north = util.excel_folder2table(FPath_PEMS + "/I215_northbound/", 'Report Data', axis=1)
pems_north = pems_north.iloc[:, ~pems_north.columns.duplicated()]
pems_south = util.excel_folder2table(FPath_PEMS + "/I215_southbound/", 'Report Data', axis=1)
pems_south = pems_south.iloc[:, ~pems_south.columns.duplicated()]
pems = ic.PEMS(pems_north, pems_south)
pems_data = pems.get_data()
pems_data.info()
pems_data

In [None]:
pems_data_15min = pems_data/4
pems_data_15min = pems_data_15min.resample('15T').ffill()
if SAVE_TABLES:
    pems_data_15min.to_csv('../data/pems/PEMS_I215_VMT_HighRes.csv')
pems_data_15min.info()
pems_data_15min

***
> ## __Travel Time__
***

In [None]:
tt_file_list = os.listdir(FPath_travelTime_csv)
for i in util.nLoop(tt_file_list):
    tt_file_list[i] = FPath_travelTime_csv + "\\" + tt_file_list[i]
tt_csvData = ic.readTravelTimeCSV(tt_file_list[0])
for fpath in range(1, len(tt_file_list)):
    temp = ic.readTravelTimeCSV(tt_file_list[fpath])
    tt_csvData = pd.concat([tt_csvData, temp], axis=0)
tt_csvData.info()
tt_csvData

In [None]:
travel_time = ic.travel_time(tt_csvData)
tt_data = travel_time.get_data()
tt_data = tt_data[~tt_data.index.duplicated()]
if SAVE_TABLES:
    tt_data.to_csv('../data/travel_time/travelTime_HighRes.csv')

tt_data.info()
tt_data.head(len(tt_data))

***
> ## __Calculating Traffic Density__
***
Similar road conditions exist for local 1 and 2, as well as local 3 and 4 so we assume gridsmart sensor counts can apply to both paths

In [None]:
traffic_density = pd.merge(tt_data, gs_data, left_index=True, right_index=True)
traffic_density = pd.merge(traffic_density, pems_data_15min, left_index=True, right_index=True)
traffic_density = traffic_density[(traffic_density > 0).all(1)]
traffic_density['Density Local 1 (#Vehicles/mile)'] = (
        (traffic_density['GS-WB # of Vehicles/15 min']*4)
        /traffic_density['Local 1 (mph)']
)
traffic_density['Density Local 2 (#Vehicles/mile)'] = (
        (traffic_density['GS-WB # of Vehicles/15 min']*4)
        /traffic_density['Local 2 (mph)']
)
traffic_density['Density Local 9 (#Vehicles/mile)'] = (
        (traffic_density['GS-EB # of Vehicles/15 min']*4)
        /traffic_density['Local 9 (mph)']
)
traffic_density['Density Local 10 (#Vehicles/mile)'] = (
        (traffic_density['GS-EB # of Vehicles/15 min']*4)
        /traffic_density['Local 10 (mph)']
)

traffic_density['Density Fwy 1 (#Vehicles/mile)'] = (
        traffic_density['Northbound (VMT)']
        /pems_length_miles*traffic_density['Fwy 4 (mph)']
)
traffic_density['Density Fwy 4 (#Vehicles/mile)'] = (
        traffic_density['Southbound (VMT)']
        /pems_length_miles*traffic_density['Fwy 4 (mph)']
)
traffic_density.rename(columns={
    'Northbound (VMT)': 'PEMS Northbound (VMT)',
    'Southbound (VMT)': 'PEMS Southbound (VMT)',
}, inplace=True)
traffic_density = traffic_density.iloc[:, -6:]
if SAVE_TABLES:
    traffic_density.to_csv('../data/traffic_density/traffic_density_HighRes.csv')
traffic_density.info()
traffic_density

***
# __Final Dataset__
***
> ## __PM2.5 Datasets__
***

In [None]:
TRAFFIC_DENSITY_FEATS = [
    'Density Local 1 (#Vehicles/mile)',
    'Density Local 2 (#Vehicles/mile)',
    'Density Local 9 (#Vehicles/mile)',
    'Density Local 10 (#Vehicles/mile)',
    'Density Fwy 1 (#Vehicles/mile)',
    'Density Fwy 4 (#Vehicles/mile)'
]
TRAVEL_TIME_FEATS = [
    'Local 1 (mph)',
    'Local 2 (mph)',
    'Local 9 (mph)',
    'Local 10 (mph)',
    'Fwy 1 (mph)',
    'Fwy 4 (mph)'
]
METEOROLOGICAL_FEATS = [
    'Temperature [degC]',
    'Pressure [mbar]',
    'Humidity [%]',
    'Wind Speed [mph]',
    'Wind Direction [degrees]'
]
BACKGROUND_PM25_FEAT = ['Background PM2.5 [ug/m3]']
BACKROUND_NO2_FEAT = ['Background NO2 [ppb]']
DT_FEATS = [
    'year',
    'month',
    'day',
    'dayofweek_Sunday',
    'dayofweek_Monday',
    'dayofweek_Tuesday',
    'dayofweek_Wednesday',
    'dayofweek_Thursday',
    'dayofweek_Friday',
    'dayofweek_Saturday'
]
PM25_FEATURES = [
    'Iowa PM2.5 [ug/m3]',
    'Chicago PM2.5 [ug/m3]',
    'Cranford PM2.5 [ug/m3]',
    'Magnolia PM2.5 [ug/m3]'
]
NO2_FEATURES = [
    'Iowa NO2 [ppb]',
    'Chicago NO2 [ppb]',
    'Cranford NO2 [ppb]',
    'Magnolia NO2 [ppb]'
]

In [34]:
final_pm25_tDensity_HighRes = clarity_HighRes_pm25.merge(openWeather_data_HighRes, left_index=True, right_index=True)
print(
    f'Data Points: {len(clarity_HighRes_pm25)} -> {len(final_pm25_tDensity_HighRes)} -Merged OpenWeather with Clarity')
final_pm25_tDensity_HighRes = final_pm25_tDensity_HighRes.merge(carb_pm25_data_interp, left_index=True,
                                                                right_index=True)
print(f'Data Points: -> {len(final_pm25_tDensity_HighRes)} -Merged Carb')
final_pm25_tDensity_HighRes = final_pm25_tDensity_HighRes.merge(traffic_density, left_index=True,
                                                                right_index=True)
print(f'Data Points: {len(final_pm25_tDensity_HighRes)} -Merged Travel Time')
final_pm25_tDensity_HighRes = dtk.df_indexMerge(
    [clarity_HighRes_pm25, openWeather, carb_pm25_data_interp, traffic_density], verbose=True)
final_pm25_tDensity_HighRes.reset_index('datetime-America/Los_Angeles', inplace=True)
final_pm25_tDensity_HighRes = dtk.df_decomposeDT(final_pm25_tDensity_HighRes, 0)
final_pm25_tDensity_HighRes.set_index('datetime-America/Los_Angeles', inplace=True)
final_pm25_tDensity_HighRes = pd.get_dummies(final_pm25_tDensity_HighRes, columns=['dayofweek'])
pm25_TrafficDensity_col_vals = PM25_FEATURES + DT_FEATS + BACKGROUND_PM25_FEAT + METEOROLOGICAL_FEATS + TRAFFIC_DENSITY_FEATS
final_pm25_tDensity_HighRes = final_pm25_tDensity_HighRes.loc[:, pm25_TrafficDensity_col_vals]
if SAVE_TABLES:
    final_pm25_tDensity_HighRes.to_csv(r'../data/complete_sets/full_pm25_TrafficDensity.csv')
final_pm25_tDensity_HighRes.info()
final_pm25_tDensity_HighRes

Data Points: 63278 -> 0 -Merged OpenWeather with Clarity
Data Points: -> 0 -Merged Carb
Data Points: 0 -Merged Travel Time


AttributeError: module 'aqmd_pylib.aqmd_lib.data_toolkit' has no attribute 'df_indexMerge'

In [None]:
final_pm25_TravelTime = pm25_pivot.merge(openWeather_data_HighRes, left_index=True, right_index=True)
print(f'Data Points: {len(final_pm25_TravelTime)} -Merged OpenWeather with Clarity')
final_pm25_TravelTime = final_pm25_TravelTime.merge(carb_pm25_data_interp, left_index=True, right_index=True)
print(f'Data Points: {len(final_pm25_TravelTime)} -Merged Carb')
final_pm25_TravelTime = final_pm25_TravelTime.merge(tt_data, left_index=True, right_index=True)
print(f'Data Points: {len(final_pm25_TravelTime)} -Merged Travel Time')
final_pm25_TravelTime.reset_index('datetime-America/Los_Angeles', inplace=True)
final_pm25_TravelTime = dtk.df_decomposeDT(final_pm25_TravelTime, 0)
final_pm25_TravelTime.drop(
    columns=[
        'timezone', 'week of year',
    ],
    inplace=True
)
final_pm25_TravelTime.set_index('datetime-America/Los_Angeles', inplace=True)
final_pm25_TravelTime = pd.get_dummies(final_pm25_TravelTime, columns=['dayofweek'])
pm25_TravelTime_col_vals = [
    'Iowa PM2.5 [ug/m3]',
    'Chicago PM2.5 [ug/m3]',
    'Cranford PM2.5 [ug/m3]',
    'Magnolia PM2.5 [ug/m3]',
    'year',
    'month',
    'day',
    'dayofweek_Sunday',
    'dayofweek_Monday',
    'dayofweek_Tuesday',
    'dayofweek_Wednesday',
    'dayofweek_Thursday',
    'dayofweek_Friday',
    'dayofweek_Saturday',
    'Temperature [degC]',
    'Pressure [mbar]',
    'Humidity [%]',
    'Wind Speed [mph]',
    'Wind Direction [degrees]',
    'Background PM2.5 [ug/m3]',

]
final_pm25_TravelTime = final_pm25_TravelTime.loc[:, pm25_TravelTime_col_vals]
if SAVE_TABLES:
    final_pm25_TravelTime.to_csv('../data/complete_sets/full_pm25_TravelTime.csv')

final_pm25_TravelTime.info()
final_pm25_TravelTime

***
> ## __NO2 Datasets__
***

In [None]:
print(f'Starting Data Points: {len(no2_pivot)}')
final_no2_TrafficDensity = no2_pivot.merge(openWeather_data_HighRes,
                                           left_index=True, right_index=True)
print(f'    Data Points: {len(final_no2_TrafficDensity)} - Merged OpenWeather with NO2 Data')
final_no2_TrafficDensity = final_no2_TrafficDensity.merge(carb_no2_data_interp, left_index=True, right_index=True)
print(f'    Data Points: {len(final_no2_TrafficDensity)} - Merged Carb with NO2 Data')
final_no2_TrafficDensity = final_no2_TrafficDensity.merge(traffic_density,
                                                          left_index=True, right_index=True)
print(f'    Data Points: {len(final_no2_TrafficDensity)} - Merged Carb Trraffic Density with NO2 Data')
final_no2_TrafficDensity.reset_index('datetime-America/Los_Angeles', inplace=True)
final_no2_TrafficDensity = dtk.df_decomposeDT(final_no2_TrafficDensity, 0)
final_no2_TrafficDensity.drop(
    columns=['timezone', 'week of year'],
    inplace=True
)
final_no2_TrafficDensity.set_index('datetime-America/Los_Angeles', inplace=True)
final_no2_TrafficDensity = pd.get_dummies(final_no2_TrafficDensity, columns=['dayofweek'])
no2_TrafficDensity_col_vals = [
    'Iowa NO2 [ppb]',
    'Chicago NO2 [ppb]',
    'Cranford NO2 [ppb]',
    'Magnolia NO2 [ppb]',
    'year',
    'month',
    'day',
    'dayofweek_Sunday',
    'dayofweek_Monday',
    'dayofweek_Tuesday',
    'dayofweek_Wednesday',
    'dayofweek_Thursday',
    'dayofweek_Friday',
    'dayofweek_Saturday',
    'Temperature [degC]',
    'Pressure [mbar]',
    'Humidity [%]',
    'Wind Speed [mph]',
    'Wind Direction [degrees]',
    'Background NO2 [ppb]',
    'Density Local 1 (#Vehicles/mile)',
    'Density Local 2 (#Vehicles/mile)',
    'Density Local 9 (#Vehicles/mile)',
    'Density Local 10 (#Vehicles/mile)',
    'Density Fwy 1 (#Vehicles/mile)',
    'Density Fwy 4 (#Vehicles/mile)',
]
final_no2_TrafficDensity = final_no2_TrafficDensity.loc[:, no2_TrafficDensity_col_vals]
if SAVE_TABLES:
    final_no2_TrafficDensity.to_csv(r'../data/complete_sets/full_NO2_TrafficDensity.csv')
final_no2_TrafficDensity.info()
final_no2_TrafficDensity

***
## __Split by Sensor__
***

In [None]:
chicago_drop_pm25 = ['Cranford PM2.5 [ug/m3]', 'Iowa PM2.5 [ug/m3]', 'Magnolia PM2.5 [ug/m3]']
chicago_drop_no2 = ['Cranford NO2 [ppb]', 'Iowa NO2 [ppb]', 'Magnolia NO2 [ppb]']
chicago_pm25_TrafficDensity = final_pm25_tDensity_HighRes.drop(columns=chicago_drop_pm25)
chicago_pm25_TravelTime = final_pm25_TravelTime.drop(columns=chicago_drop_pm25)
chicago_no2_TrafficDensity = final_no2_TrafficDensity.drop(columns=chicago_drop_no2)
if SAVE_TABLES:
    chicago_pm25_TrafficDensity.to_csv('../data/complete_sets/chicago/chicago_full_pm25_TrafficDensity.csv')
    chicago_pm25_TravelTime.to_csv('../data/complete_sets/chicago/chicago_full_pm25_TravelTime.csv')
    chicago_no2_TrafficDensity.to_csv('../data/complete_sets/chicago/chicago_full_NO2_TrafficDensity.csv')

In [None]:
cranford_drop_pm25 = ['Chicago PM2.5 [ug/m3]', 'Iowa PM2.5 [ug/m3]', 'Magnolia PM2.5 [ug/m3]']
cranford_drop_no2 = ['Chicago NO2 [ppb]', 'Iowa NO2 [ppb]', 'Magnolia NO2 [ppb]']
cranford_pm25_TrafficDensity = final_pm25_tDensity_HighRes.drop(columns=cranford_drop_pm25)
cranford_pm25_TravelTime = final_pm25_TravelTime.drop(columns=cranford_drop_pm25)
cranford_NO2_TrafficDensity = final_no2_TrafficDensity.drop(columns=cranford_drop_no2)
if SAVE_TABLES:
    cranford_pm25_TrafficDensity.to_csv('../data/complete_sets/cranford/cranford_full_pm25_TrafficDensity.csv')
    cranford_pm25_TravelTime.to_csv('../data/complete_sets/cranford/cranford_full_pm25_TravelTime.csv')
    cranford_NO2_TrafficDensity.to_csv('../data/complete_sets/cranford/cranford_full_NO2_TrafficDensity.csv')

In [None]:
iowa_drop_pm25 = ['Chicago PM2.5 [ug/m3]', 'Cranford PM2.5 [ug/m3]', 'Magnolia PM2.5 [ug/m3]']
iowa_drop_no2 = ['Chicago NO2 [ppb]', 'Cranford NO2 [ppb]', 'Magnolia NO2 [ppb]']
iowa_pm25_TrafficDensity = final_pm25_tDensity_HighRes.drop(columns=iowa_drop_pm25)
iowa_pm25_TravelTime = final_pm25_TravelTime.drop(columns=iowa_drop_pm25)
iowa_no2_TrafficDensity = final_no2_TrafficDensity.drop(columns=iowa_drop_no2)
if SAVE_TABLES:
    iowa_pm25_TrafficDensity.to_csv('../data/complete_sets/iowa/iowa_full_pm25_TrafficDensity.csv')
    iowa_pm25_TravelTime.to_csv('../data/complete_sets/iowa/iowa_full_pm25_TravelTime.csv')
    iowa_no2_TrafficDensity.to_csv('../data/complete_sets/iowa/iowa_full_NO2_TrafficDensity.csv')

In [None]:
magnolia_drop_pm25 = ['Iowa PM2.5 [ug/m3]', 'Cranford PM2.5 [ug/m3]', 'Chicago PM2.5 [ug/m3]']
magnolia_drop_no2 = ['Iowa NO2 [ppb]', 'Cranford NO2 [ppb]', 'Chicago NO2 [ppb]']
magnolia_pm25_TrafficDensity = final_pm25_tDensity_HighRes.drop(columns=magnolia_drop_pm25)
magnolia_pm25_TravelTime = final_pm25_TravelTime.drop(columns=magnolia_drop_pm25)
magnolia_no2_TrafficDensity = final_no2_TrafficDensity.drop(columns=magnolia_drop_no2)
if SAVE_TABLES:
    magnolia_pm25_TrafficDensity.to_csv('../data/complete_sets/magnolia/magnolia_full_pm25_TrafficDensity.csv')
    magnolia_pm25_TravelTime.to_csv('../data/complete_sets/magnolia/magnolia_full_pm25_TravelTime.csv')
    magnolia_no2_TrafficDensity.to_csv('../data/complete_sets/magnolia/magnolia_full_NO2_TrafficDensity.csv')