In [1]:
import pandas as pd
import os
import requests as req
import json
import csv
import math
from datetime import date, datetime, timedelta, timezone
import time

import asyncio
import aiohttp

In [2]:
API_KEY = 'c5d22ed423af74ba40fc97b49c023304'

In [3]:
trt_data = pd.read_csv('dimTRTstore.csv', header=0)

In [4]:
utc_timestamp_now = math.floor(datetime.timestamp(datetime.utcnow()))
utc_timestamp_now

1670498280

In [6]:
month_start = int(datetime(2021,1,1,1,0).timestamp())
month_end = int(datetime(2021,2,1,0,59,59).timestamp())

In [5]:
month_start

1609459200

In [7]:
month_end

1612137599

In [14]:
def hour_rounder(t):
    # Rounds down to the nearest hour
    return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour))

In [15]:
options = {}
options["aqi_url"] = "http://api.openweathermap.org/data/2.5/air_pollution/history"
options["start"] = str(month_start)
options["end"] = str(month_end)
options["api_key"] = API_KEY

In [16]:
# Builds request url to access AQI data from OpenWeather
# Params: 
# lat: latitude of desired location
# lon: longitude of desired location
# return: request url for aqi

def request_aqi_url(lat,lon):

    REQUEST_URL = options["aqi_url"] + "?lat=" \
    + str(lat) \
    + "&lon=" + str(lon) \
    + "&start=" + options["start"] \
    + "&end=" + options["end"] \
    + "&appid=" + options["api_key"]

    return REQUEST_URL

In [17]:
AQI_URL_LIST = []
for lat,lon in zip(trt_data["Latitude"], trt_data["Longitude"]):
    AQI_URL_LIST.append(request_aqi_url(lat,lon))

In [18]:
async def fetch(session, url):
    async with session.get(url) as response:
        json_response = await response.json(content_type=None)
        await asyncio.sleep(1) # this is to prevent remote host close error
        return json_response

In [19]:
async def main_aqi(urls):
    async with aiohttp.ClientSession() as session:
        tasks = [fetch(session, url) for url in AQI_URL_LIST]
        return await asyncio.gather(*tasks)

In [21]:
result_aqi = await main_aqi(AQI_URL_LIST)

In [22]:
def aqi_historical_to_parquet(response_list):
    dn = pd.json_normalize(result_aqi, 'list', ['coord', ['coord', 'lon'], ['coord', 'lat']])
    dn = dn.drop("coord", axis=1)
    dn['dt'] = dn['dt'].apply(lambda t: hour_rounder(datetime.utcfromtimestamp(t)).strftime('%Y-%m-%d %H:%M:%S'))
    aqi_file_name = "AQI_Historical_Nov_2022.parquet"
    dn.to_parquet(aqi_file_name, index=False)

In [23]:
aqi_historical_to_parquet(result_aqi)