# Part 1: Gathering Data


In [65]:
from pathlib import Path
import time

import requests
import pandas as pd

def fetch_and_cache(data_url, file, data_dir="data", force=False):
    """
    Download and cache a url and return the file object.

    data_url: the web address to download
    file: the file in which to save the results.
    data_dir: (default="data") the location to save the data
    force: if true the file is always re-downloaded

    return: The pathlib.Path object representing the file.
    """

    data_dir = Path(data_dir)
    data_dir.mkdir(exist_ok = True)
    file_path = data_dir / Path(file)
    # If the file already exists and we want to force a download then
    # delete the file first so that the creation date is correct.
    if force and file_path.exists():
        file_path.unlink()
    if force or not file_path.exists():
        print('Downloading...', end=' ')
        resp = requests.get(data_url)
        with file_path.open('wb') as f:
            f.write(resp.content)
        print('Done!')
        last_modified_time = time.ctime(file_path.stat().st_mtime)
    else:
        last_modified_time = time.ctime(file_path.stat().st_mtime)
        print("Using cached version that was downloaded (UTC):", last_modified_time)
    return file_path

def fetch_and_cache_gdrive(gdrive_id, file, data_dir="data", force=False):
    """
    Download and cache a url and return the file object.

    data_url: the web address to download
    file: the file in which to save the results.
    data_dir: (default="data") the location to save the data
    force: if true the file is always re-downloaded

    return: The pathlib.Path object representing the file.
    """

    data_dir = Path(data_dir)
    data_dir.mkdir(exist_ok = True)
    file_path = data_dir / Path(file)
    # If the file already exists and we want to force a download then
    # delete the file first so that the creation date is correct.
    if force and file_path.exists():
        file_path.unlink()
    if force or not file_path.exists():
        print('Downloading...', end=' ')
        download_file_from_google_drive(gdrive_id, file_path)
        print('Done!')
        last_modified_time = time.ctime(file_path.stat().st_mtime)
    else:
        last_modified_time = time.ctime(file_path.stat().st_mtime)
        print("Using cached version that was downloaded (UTC):", last_modified_time)
    return file_path



# https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)

    print(response)
    needs_confirm = needs_confirmation(response)

    if needs_confirm:
        params = { 'id' : id, 'confirm' : 't' ,}
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)

def needs_confirmation(response):

    print(response.__dict__)
    print(response.headers["Content-Type"])
    print(response.raw.__dict__)
    print(response.raw._connection.__dict__)
    print(response.raw._original_response.__dict__)
    print(response.raw._original_response.headers.__dict__)
    print(response.raw.connection.__dict__)

    return response.headers["Content-Type"] == 'text/html; charset=utf-8'

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)


In [66]:
# download required datasets

file_dict = {
    "aqs_sites.csv": "1fMfkw-NJ03VrQxYpDjM_4T6VDaWhvegi",
    "greenhouse_gas_emmitter_facilities.csv": "1yjTRv1OrsdWk-xNW4ZbFYB7_8Tt_x_fV",
    "greenhouse_gas_emmiter_gas_types.csv":"1akEokx_wqsgYqGNnNJsgebA6DDGHJLR2",
    "2020_daily_global_weather.csv":"15pjzsCiIE0uL69a4tZYgXoog8TKjIJNJ",
    "daily_wind_2020.csv": "18MqsjkN0EYPuLb0iR0U9sZYmNVCdd94h",
    "daily_temperature_2020.csv": "1Q62JlGtc65L2eU9FfQZcP9UyMCdcem1T",
    "traffic_volumes.csv": "1sZyjZSWz1xEoB26u_OrhKokZYhTIJRMD",
    "county_aqi_2020_daily.csv": "1uNH90XRceOfb16ctuUsYXVIEDeR2yaeC"
    }

for k, v in file_dict.items():
    path = f"./datasets/{k}"
    if not os.path.exists(path):
        download_file_from_google_drive(v,path)



<Response [200]>
{'_content': False, '_content_consumed': False, '_next': None, 'status_code': 200, 'headers': {'Content-Type': 'text/html; charset=utf-8', 'Vary': 'Sec-Fetch-Dest, Sec-Fetch-Mode, Sec-Fetch-Site', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Pragma': 'no-cache', 'Expires': 'Mon, 01 Jan 1990 00:00:00 GMT', 'Date': 'Sun, 24 Apr 2022 22:48:56 GMT', 'Strict-Transport-Security': 'max-age=31536000', 'Cross-Origin-Opener-Policy': 'same-origin; report-to="DriveUntrustedContentHttp"', 'Content-Security-Policy': "script-src 'nonce-vMMXfBbp7OELbTh3ugc3Vw' 'unsafe-inline';object-src 'none';base-uri 'self';report-uri /_/DriveUntrustedContentHttp/cspreport;worker-src 'self', require-trusted-types-for 'script';report-uri /_/DriveUntrustedContentHttp/cspreport", 'Report-To': '{"group":"DriveUntrustedContentHttp","max_age":2592000,"endpoints":[{"url":"https://csp.withgoogle.com/csp/report-to/DriveUntrustedContentHttp/external"}]}', 'Accept-CH': 'Sec-CH-UA-Arch, S

## Creating Dataframes

In [21]:
aqs_site_df = pd.read_csv(f"./datasets/aqs_sites.csv")
gh_gas_facil_df = pd.read_csv(f"./datasets/greenhouse_gas_emmitter_facilities.csv")
gh_gas_type_df = pd.read_csv(f"./datasets/greenhouse_gas_emmiter_gas_types.csv")
weather_2020_df = pd.read_csv(f"./datasets/2020_daily_global_weather.csv")
wind_2020_df = pd.read_csv(f"./datasets/daily_wind_2020.csv")
temp_2020_df = pd.read_csv(f"./datasets/daily_temperature_2020.csv")
traffic_volumes_df = pd.read_csv(f"./datasets/traffic_volumes.csv")
aqi_2020 = pd.read_csv(f"./datasets/county_aqi_2020_daily.csv")

  gh_gas_facil_df = pd.read_csv(f"./datasets/greenhouse_gas_emmitter_facilities.csv")


### aqs site data

In [22]:
print(aqs_site_df.columns)
display(aqs_site_df.sample(10))

Index(['State Code', 'County Code', 'Site Number', 'Latitude', 'Longitude',
       'Datum', 'Elevation', 'Land Use', 'Location Setting',
       'Site Established Date', 'Site Closed Date', 'Met Site State Code',
       'Met Site County Code', 'Met Site Site Number', 'Met Site Type',
       'Met Site Distance', 'Met Site Direction', 'GMT Offset',
       'Owning Agency', 'Local Site Name', 'Address', 'Zip Code', 'State Name',
       'County Name', 'City Name', 'CBSA Name', 'Tribe Name',
       'Extraction Date'],
      dtype='object')


Unnamed: 0,State Code,County Code,Site Number,Latitude,Longitude,Datum,Elevation,Land Use,Location Setting,Site Established Date,...,Owning Agency,Local Site Name,Address,Zip Code,State Name,County Name,City Name,CBSA Name,Tribe Name,Extraction Date
17224,48,61,2002,26.135173,-97.630039,WGS84,10.1,RESIDENTIAL,URBAN AND CENTER CITY,1983-01-01,...,Texas Commission On Environmental Quality,San Benito,195 ADELE STREET,78586.0,Texas,Cameron,San Benito,"Brownsville-Harlingen, TX",,2021-05-18
7711,24,15,1001,39.609001,-75.82966,WGS84,8.0,COMMERCIAL,URBAN AND CENTER CITY,1967-01-01,...,Maryland Department of the Environment,,COUNTY COURT HOUSE,,Maryland,Cecil,Elkton,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",,2021-05-18
11457,34,5,7,39.958901,-74.788766,WGS84,5.0,AGRICULTURAL,RURAL,1979-01-01,...,US EPA National Exposure Research Lab,,NEWBOLD CORNER ROAD,8060.0,New Jersey,Burlington,Not in a City,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",,2021-05-18
9173,27,53,1007,45.03972,-93.29874,WGS84,263.0,RESIDENTIAL,SUBURBAN,1966-01-01,...,"Minnesota Pollution Control Agency, Division O...",Humboldt Avenue,4646 North Humboldt,55412.0,Minnesota,Hennepin,Minneapolis,"Minneapolis-St. Paul-Bloomington, MN-WI",,2021-05-18
11198,32,23,15,36.193469,-116.007584,WGS84,823.0,RESIDENTIAL,RURAL,2009-10-07,...,Nevada Division Of Environmental Protection,Glen Oaks,410 SOUTH GLENOAKS STREET,89048.0,Nevada,Nye,Pahrump,"Pahrump, NV",,2021-05-18
1169,6,19,9,36.808838,-119.792923,WGS84,90.0,COMMERCIAL,URBAN AND CENTER CITY,1993-10-27,...,California Air Resources Board,FORMERLY FRESNO-OLIVE - LOCATED AT OLIVE AND F...,"1145 FISHER STREET, FRESNO",,California,Fresno,Fresno,"Fresno, CA",,2021-05-18
4784,17,157,3,38.21866,-89.795657,WGS84,450.0,INDUSTRIAL,RURAL,1989-01-01,...,Illinois Environmental Protection Agency,,"SITE B, CNTY RDS 00.0 N & 25.0 E",,Illinois,Randolph,Not in a City,,,2021-05-18
16865,47,139,2,35.202888,-84.339358,WGS84,512.0,AGRICULTURAL,RURAL,1973-01-01,...,Tennessee Division Of Air Pollution Control,,57 MILES S OF BULL RUN POWER PLANT,37385.0,Tennessee,Polk,Not in a City,"Cleveland, TN",,2021-05-18
11082,32,3,8,36.057754,-115.002494,WGS84,518.0,COMMERCIAL,SUBURBAN,1983-01-01,...,"Clark County, NV DAQEM",,"1239 N BOULDER HWY, HENDERSON",,Nevada,Clark,Henderson,"Las Vegas-Henderson-Paradise, NV",,2021-05-18
10359,30,29,13,48.389607,-114.137649,WGS84,945.0,INDUSTRIAL,RURAL,1982-01-01,...,"MT Dept Of Environmental Quality, Air Quality ...",,(ANACONDA SOUTH) COLUMBIA FALLS MT,59912.0,Montana,Flathead,Not in a City,"Kalispell, MT",,2021-05-18


### gh gas facility data

In [23]:
print(gh_gas_facil_df.columns)
display(gh_gas_facil_df.sample(10))

Index(['V_GHG_EMITTER_FACILITIES.ADDRESS1',
       'V_GHG_EMITTER_FACILITIES.ADDRESS2',
       'V_GHG_EMITTER_FACILITIES.CEMS_USED', 'V_GHG_EMITTER_FACILITIES.CITY',
       'V_GHG_EMITTER_FACILITIES.COUNTY',
       'V_GHG_EMITTER_FACILITIES.COUNTY_FIPS',
       'V_GHG_EMITTER_FACILITIES.FACILITY_ID',
       'V_GHG_EMITTER_FACILITIES.LATITUDE',
       'V_GHG_EMITTER_FACILITIES.LONGITUDE',
       'V_GHG_EMITTER_FACILITIES.PRIMARY_NAICS_CODE',
       'V_GHG_EMITTER_FACILITIES.STATE', 'V_GHG_EMITTER_FACILITIES.STATE_NAME',
       'V_GHG_EMITTER_FACILITIES.YEAR', 'V_GHG_EMITTER_FACILITIES.ZIP',
       'V_GHG_EMITTER_FACILITIES.FACILITY_NAME',
       'V_GHG_EMITTER_FACILITIES.SECONDARY_NAICS_CODE',
       'V_GHG_EMITTER_FACILITIES.ADDITIONAL_NAICS_CODES',
       'V_GHG_EMITTER_FACILITIES.COGENERATION_UNIT_EMISS_IND',
       'V_GHG_EMITTER_FACILITIES.EPA_VERIFIED',
       'V_GHG_EMITTER_FACILITIES.PARENT_COMPANY',
       'V_GHG_EMITTER_FACILITIES.PLANT_CODE_INDICATOR'],
      dtype='object')


Unnamed: 0,V_GHG_EMITTER_FACILITIES.ADDRESS1,V_GHG_EMITTER_FACILITIES.ADDRESS2,V_GHG_EMITTER_FACILITIES.CEMS_USED,V_GHG_EMITTER_FACILITIES.CITY,V_GHG_EMITTER_FACILITIES.COUNTY,V_GHG_EMITTER_FACILITIES.COUNTY_FIPS,V_GHG_EMITTER_FACILITIES.FACILITY_ID,V_GHG_EMITTER_FACILITIES.LATITUDE,V_GHG_EMITTER_FACILITIES.LONGITUDE,V_GHG_EMITTER_FACILITIES.PRIMARY_NAICS_CODE,...,V_GHG_EMITTER_FACILITIES.STATE_NAME,V_GHG_EMITTER_FACILITIES.YEAR,V_GHG_EMITTER_FACILITIES.ZIP,V_GHG_EMITTER_FACILITIES.FACILITY_NAME,V_GHG_EMITTER_FACILITIES.SECONDARY_NAICS_CODE,V_GHG_EMITTER_FACILITIES.ADDITIONAL_NAICS_CODES,V_GHG_EMITTER_FACILITIES.COGENERATION_UNIT_EMISS_IND,V_GHG_EMITTER_FACILITIES.EPA_VERIFIED,V_GHG_EMITTER_FACILITIES.PARENT_COMPANY,V_GHG_EMITTER_FACILITIES.PLANT_CODE_INDICATOR
23622,216 Oakley Pebble Road,,,Owingsville,BATH COUNTY,21011.0,1002586.0,38.233848,-83.716902,486210.0,...,KENTUCKY,2014.0,40360.0,Owingsville,,,N,,TEXAS EASTERN TRANSMISSION L.P. (100%),N
33979,851 ROBISON ROAD EAST,,,ERIE,ERIE,42049.0,1007649.0,42.059457,-80.014484,562212.0,...,PENNSYLVANIA,2011.0,16509.0,LAKE VIEW LDFL,,,N,,WASTE MANAGEMENT INC. (100%),
13916,1990 TOMOKA FARMS RD,,,PORT ORANGE,VOLUSIA COUNTY,12127.0,1005328.0,29.1314,-81.0984,562212.0,...,FLORIDA,2015.0,32128.0,VOLUSIA SOLID WASTE MANAGEMENT DIVISION,,,N,,VOLUSIA COUNTY BOARD OF COUNTY COMMISSIONERS (...,N
45349,13000 Bay Park Road,,,Pasadena,HARRIS COUNTY,48201.0,1003570.0,29.64174,-95.0654,325188.0,...,TEXAS,2010.0,77507.0,Albemarle Corporation Bayport Plant,,,N,,ALBEMARLE CORPORATION (100%),
1152,11203 South River Road,,,Taylor,TAYLOR COUNTY,55119.0,1012924.0,45.14727,-90.46916,212322.0,...,WISCONSIN,2018.0,54659.0,Hi-Crush Blair LLC,,,N,,HI-CRUSH PARTNERS LP (100%),N
46901,,,,Roaring Springs,DICKENS COUNTY,48125.0,1013208.0,33.7783,-100.8766,221121.0,...,TEXAS,2019.0,79220.0,Cottonwood Substation,,,N,,WETT HOLDINGS LLC (100%),N
48536,,,,Barstow,WARD COUNTY,48475.0,1005215.0,31.5225,-103.465278,211112.0,...,TEXAS,2017.0,79719.0,Mivida Treater Plant,,,N,,ENERGY TRANSFER PARTNERS LP (100%),N
69671,600 A ST,,,DIBOLL,ANGELINA COUNTY,48005.0,1002387.0,31.191713,-94.788946,321219.0,...,TEXAS,2014.0,75941.0,DIBOLL COMPLEX,321113.0,,N,,KOCH INDUSTRIES INC (100%),N
70106,10300 South Kent Drive SW,,,BYRON CENTER,KENT COUNTY,26081.0,1003464.0,42.775556,-85.678889,562212.0,...,MICHIGAN,2015.0,49315.0,KENT COUNTY DPW SOUTH KENT LANDFILL,,,N,,KENT COUNTY MICHIGAN DEPARTMENT OF PUBLIC WORK...,N
54384,54741 TESORO ROAD,,,KENAI,KENAI PENINSULA BOROUGH,2122.0,1007741.0,60.683603,-151.367204,324110.0,...,ALASKA,2013.0,99611.0,TESORO ALASKA PETROLEUM CO,,,Y,Y,TESORO CORP (100%),N


### gh gas type data

In [24]:
print(gh_gas_type_df.columns)
display(gh_gas_type_df.sample(10))

Index(['V_GHG_EMITTER_GAS.ADDRESS1', 'V_GHG_EMITTER_GAS.ADDRESS2',
       'V_GHG_EMITTER_GAS.CITY', 'V_GHG_EMITTER_GAS.CO2E_EMISSION',
       'V_GHG_EMITTER_GAS.COUNTY', 'V_GHG_EMITTER_GAS.FACILITY_ID',
       'V_GHG_EMITTER_GAS.GAS_CODE', 'V_GHG_EMITTER_GAS.GAS_NAME',
       'V_GHG_EMITTER_GAS.LATITUDE', 'V_GHG_EMITTER_GAS.LONGITUDE',
       'V_GHG_EMITTER_GAS.STATE', 'V_GHG_EMITTER_GAS.STATE_NAME',
       'V_GHG_EMITTER_GAS.YEAR', 'V_GHG_EMITTER_GAS.ZIP',
       'V_GHG_EMITTER_GAS.FACILITY_NAME', 'V_GHG_EMITTER_GAS.COUNTY_FIPS'],
      dtype='object')


Unnamed: 0,V_GHG_EMITTER_GAS.ADDRESS1,V_GHG_EMITTER_GAS.ADDRESS2,V_GHG_EMITTER_GAS.CITY,V_GHG_EMITTER_GAS.CO2E_EMISSION,V_GHG_EMITTER_GAS.COUNTY,V_GHG_EMITTER_GAS.FACILITY_ID,V_GHG_EMITTER_GAS.GAS_CODE,V_GHG_EMITTER_GAS.GAS_NAME,V_GHG_EMITTER_GAS.LATITUDE,V_GHG_EMITTER_GAS.LONGITUDE,V_GHG_EMITTER_GAS.STATE,V_GHG_EMITTER_GAS.STATE_NAME,V_GHG_EMITTER_GAS.YEAR,V_GHG_EMITTER_GAS.ZIP,V_GHG_EMITTER_GAS.FACILITY_NAME,V_GHG_EMITTER_GAS.COUNTY_FIPS
89987,10340 68TH STREET NORTHWEST,,TIOGA,3240.5,WILLIAMS COUNTY,1001894,CH4,Methane,48.40152,-102.91418,ND,NORTH DAKOTA,2016,58852,TIOGA GAS PROCESSING PLANT,38105.0
113873,3149 LOUISIANA HWY 10,,WASHINGTON,49919.6,ST. LANDRY PARISH,1007048,CO2,Carbon Dioxide,30.671703,-92.125385,LA,LOUISIANA,2019,70589,TRANSCO STATION 54,22097.0
17935,1795 BURT ST,,BEAUMONT,1845.0,Jefferson,1007959,CH4,Methane,30.0639,-94.0703,TX,TEXAS,2011,77701,Exxonmobil Beaumont Refinery,48245.0
206172,35863 FAIRVIEW RD,,HINKLEY,58875.3,SAN BERNARDINO COUNTY,1004272,CO2,Carbon Dioxide,34.902694,-117.160594,CA,CALIFORNIA,2015,92347,PG&E HINKLEY COMPRESSOR STATION,6071.0
94487,300 INTERNATIONAL BLVD.,,CLARKSVILLE,25.33,MONTGOMERY COUNTY,1005263,N2O,Nitrous Oxide,36.60373,-87.25869,TN,TENNESSEE,2018,37040,FLORIM USA INC,47125.0
48930,,,ROCK SPRINGS,27034.5,SWEETWATER COUNTY,1005989,CH4,Methane,41.5222,-109.3128,WY,WYOMING,2016,82901,Dominion Energy Questar - Rock Springs Station...,56037.0
22376,,,RIDLEY PARK,22933.1,DELAWARE COUNTY,1000638,CO2,Carbon Dioxide,39.862773,-75.321425,PA,PENNSYLVANIA,2015,19078,BOEING HELICOPTER DIV,42045.0
151785,1250 West Maricopa Highway,,Casa Grande,14.0,PINAL COUNTY,1000378,CH4,Methane,32.894,-111.78312,AZ,ARIZONA,2018,85193,Abbott Laboratories,4021.0
203061,1 WAREHOUSE ROAD,,COLSTRIP,37283.0,Rosebud,1001020,CH4,Methane,45.8831,-106.614,MT,MONTANA,2013,59323,Colstrip,30087.0
83598,,,Offshore,12.218,,1011597,N2O,Nitrous Oxide,27.835135,-96.013057,TX,TEXAS,2019,0,BA A 133 B C-AUX E (Complex ID # 10249),


### weather 2020 data 

In [26]:
print(weather_2020_df.columns)
display(weather_2020_df.sample(1))



Unnamed: 0,Unnamed: 1,"<!DOCTYPE html><html><head><title>Google Drive - Virus scan warning</title><meta http-equiv=""content-type"" content=""text/html; charset=utf-8""/><style nonce=""n64sC9Z6ynCaBtoR/JwkSQ"">/* Copyright 2022 Google Inc. All Rights Reserved. */"
.goog-inline-block{position:relative;display:-moz-inline-box;display:inline-block}* html .goog-inline-block{display:inline}*:first-child+html .goog-inline-block{display:inline}.goog-link-button{position:relative;color:#15c;text-decoration:underline;cursor:pointer}.goog-link-button-disabled{color:#ccc;text-decoration:none;cursor:default}body{color:#222;font:normal 13px/1.4 arial,sans-serif;margin:0}.grecaptcha-badge{visibility:hidden}.uc-main{padding-top:50px;text-align:center}#uc-dl-icon{display:inline-block;margin-top:16px;padding-right:1em;vertical-align:top}#uc-text{display:inline-block;max-width:68ex;text-align:left}.uc-error-caption,.uc-warning-caption{color:#222;font-size:16px}...


### wind 2020 data

In [27]:
print(wind_2020_df.columns)
display(wind_2020_df.sample(10))



ValueError: Cannot take a larger sample than population when 'replace=False'

### temp 2020 data

In [28]:
print(temp_2020_df.columns)
display(temp_2020_df.sample(10))

Index(['State Code', 'County Code', 'Site Num', 'Parameter Code', 'POC',
       'Latitude', 'Longitude', 'Datum', 'Parameter Name', 'Sample Duration',
       'Pollutant Standard', 'Date Local', 'Units of Measure', 'Event Type',
       'Observation Count', 'Observation Percent', 'Arithmetic Mean',
       '1st Max Value', '1st Max Hour', 'AQI', 'Method Code', 'Method Name',
       'Local Site Name', 'Address', 'State Name', 'County Name', 'City Name',
       'CBSA Name', 'Date of Last Change'],
      dtype='object')


Unnamed: 0,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Sample Duration,...,AQI,Method Code,Method Name,Local Site Name,Address,State Name,County Name,City Name,CBSA Name,Date of Last Change
230400,48,439,3009,62101,1,32.98426,-97.063721,WGS84,Outdoor Temperature,1 HOUR,...,,40,INSTRUMENTAL - ELECTRONIC OR MACHINE AVG.,Grapevine Fairway,4100 Fairway Dr,Texas,Tarrant,Grapevine,"Dallas-Fort Worth-Arlington, TX",2021-03-19
103241,21,59,5,62101,1,37.780776,-87.075307,WGS84,Outdoor Temperature,1 HOUR,...,,40,INSTRUMENTAL - ELECTRONIC OR MACHINE AVG.,OWENSBORO PRIMARY,716 PLEASANT VALLEY ROAD,Kentucky,Daviess,Not in a city,"Owensboro, KY",2021-03-31
165535,37,119,41,62101,1,35.2401,-80.785683,WGS84,Outdoor Temperature,1 HOUR,...,,20,INSTRUMENTAL - SPOT READING,Garinger High School,1130 EASTWAY DRIVE,North Carolina,Mecklenburg,Charlotte,"Charlotte-Concord-Gastonia, NC-SC",2021-01-22
31312,6,39,2010,62101,1,36.953256,-120.034203,WGS84,Outdoor Temperature,1 HOUR,...,,40,INSTRUMENTAL - ELECTRONIC OR MACHINE AVG.,Madera-City,28261 Avenue 14 Madera CA 93638,California,Madera,Madera,"Madera, CA",2021-04-08
170211,38,57,124,62101,1,47.400624,-101.929099,WGS84,Outdoor Temperature,1 HOUR,...,,41,INSTRUMENTAL - ELEC. OR MACH. AVG. LEVEL 1,6493 FIRST STREET SW,DGC #17,North Dakota,Mercer,Not in a city,,2021-03-18
175843,40,47,555,62101,1,36.512363,-97.845959,NAD83,Outdoor Temperature,1 HOUR,...,,60,Instrumental - Vaisala 435C RH/AT Sensor,,"11826 N 30th St, Kremlin, OK 73753",Oklahoma,Garfield,Enid,"Enid, OK",2021-02-10
285268,56,37,1002,62101,1,41.621073,-109.835484,WGS84,Outdoor Temperature,1 HOUR,...,,40,INSTRUMENTAL - ELECTRONIC OR MACHINE AVG.,Westvaco #002,Genesis Alkali-Westvaco Upwind (West of facility),Wyoming,Sweetwater,Not in a city,"Rock Springs, WY",2021-04-05
202979,48,135,3,62101,1,31.836577,-102.342066,NAD83,Outdoor Temperature,1 HOUR,...,,40,INSTRUMENTAL - ELECTRONIC OR MACHINE AVG.,Odessa-Hays Elementary School,Barrett & Monahans Streets,Texas,Ector,Odessa,"Odessa, TX",2021-03-19
58723,6,101,3,62101,2,39.138773,-121.618549,WGS84,Outdoor Temperature,1 HOUR,...,,59,Instrumental - Vaisala HMP 155,Yuba City,"773 ALMOND ST, YUBA CITY",California,Sutter,Yuba City,"Yuba City, CA",2021-03-23
96983,19,45,19,62101,1,41.823283,-90.211982,WGS84,Outdoor Temperature,1 HOUR,...,,40,INSTRUMENTAL - ELECTRONIC OR MACHINE AVG.,"CLINTON, CHANCY PARK",23RD & CAMANCHE,Iowa,Clinton,Clinton,"Clinton, IA",2021-01-12


### traffic volumes data

In [29]:
print(traffic_volumes_df.columns)
display(traffic_volumes_df.sample(10))

Index(['OBJECTID_1', 'OBJECTID', 'District', 'Route', 'Rte_SFX', 'County',
       'PM_PFX', 'Postmile', 'PM_SFX', 'Descriptn', 'Back_pk_h', 'Back_pk_m',
       'Back_AADT', 'Ahead_pk_h', 'Ahead_pk_m', 'Ahead_AADT', 'Lon_S_or_W',
       'Lat_S_or_W', 'Lon_N_or_E', 'Lat_N_or_E'],
      dtype='object')


Unnamed: 0,OBJECTID_1,OBJECTID,District,Route,Rte_SFX,County,PM_PFX,Postmile,PM_SFX,Descriptn,Back_pk_h,Back_pk_m,Back_AADT,Ahead_pk_h,Ahead_pk_m,Ahead_AADT,Lon_S_or_W,Lat_S_or_W,Lon_N_or_E,Lat_N_or_E
2627,2628,2628,4,680,,CC,R,11.28,,LIVORNA ROAD,12900.0,171000.0,165000.0,12700,166000,161000,-122.0352282,37.86303388,-122.0350278,37.863146
6828,6829,6829,11,905,,SD,,5.164,,JCT. RTE. 805,6500.0,69000.0,67000.0,8700,105000,100000,-117.040511,32.56840146,-117.0406139,32.56806908
5859,5860,5860,10,59,,MER,,19.0,,BELLEVUE RD,480.0,5500.0,3800.0,420,3750,3700,-120.5033718,37.36095488,-120.5033718,37.36095488
1361,1362,1362,3,99,,SUT,,26.12,,BARRY ROAD,1900.0,22300.0,21200.0,2100,23600,22600,-121.6349389,39.0763065,-121.6348172,39.07628175
5950,5951,5951,10,99,,SJ,,0.0,,STANISLAUS/SAN JOAQUIN COUNTY LINE,,,,9200,119000,114200,-121.1100833,37.73032136,-121.1098951,37.73038018
3634,3635,3635,6,145,,FRE,,13.212,,JCT. RTE. 269,600.0,5500.0,4800.0,680,6700,5300,-120.103086,36.429652,-120.103086,36.429652
867,868,868,3,20,,COL,,30.639,,"COLUSA, FREMONT STREET",680.0,6800.0,6700.0,680,7000,6900,-122.0171672,39.20931187,-122.0171672,39.20931187
1149,1150,1150,3,65,,PLA,R,17.446,,NICOLAUS ROAD,2100.0,25000.0,23600.0,2100,25000,23600,-121.3686296,38.89724937,-121.3682534,38.89724227
5955,5956,5956,10,99,,SJ,,6.654,,"MANTECA, NORTH JCT. RTE. 120",7600.0,91000.0,83000.0,8400,93000,92500,-121.1913311,37.7973453,-121.1911279,37.79741574
1079,1080,1080,3,50,,SAC,,17.008,,FOLSOM BOULEVARD/NATOMA,10300.0,134000.0,125400.0,8800,100000,95000,-121.1972109,38.63996841,-121.1973718,38.63978433


### aqi data 2020

In [31]:
print(aqi_2020.columns)
display(aqi_2020.sample(10))

Index(['State Name', 'county Name', 'State Code', 'County Code', 'Date', 'AQI',
       'Category', 'Defining Parameter', 'Defining Site',
       'Number of Sites Reporting'],
      dtype='object')


Unnamed: 0,State Name,county Name,State Code,County Code,Date,AQI,Category,Defining Parameter,Defining Site,Number of Sites Reporting
317414,Washington,Yakima,53,77,2020-12-05,80,Moderate,PM2.5,53-077-0005,4
310087,Washington,Garfield,53,23,2020-03-12,23,Good,PM2.5,53-023-0001,1
308109,Washington,Chelan,53,7,2020-09-15,295,Very Unhealthy,PM2.5,53-007-0007,4
224391,Ohio,Wood,39,173,2020-06-01,41,Good,Ozone,39-173-0003,1
59583,Georgia,Bibb,13,21,2020-06-24,29,Good,Ozone,13-021-0012,1
319160,West Virginia,Hancock,54,29,2020-07-13,36,Good,Ozone,54-029-0009,3
320984,West Virginia,Monongalia,54,61,2020-09-04,37,Good,Ozone,54-061-0003,1
205786,North Carolina,Rowan,37,159,2020-02-03,42,Good,Ozone,37-159-0021,1
191447,New York,Putnam,36,79,2020-07-22,61,Moderate,Ozone,36-079-0005,1
262486,South Dakota,Brown,46,13,2020-02-13,43,Good,PM2.5,46-013-0004,1
