Work Flow:
0. Call Airnow API for latitude and longitude data
1. Call Weather API and Air Pollution API from OpenWeather
2. Transform the json responses, convert to DataFrame and save as csv file on local directory

Next Steps:
0. Figure out a way to get latitude and longitude data of all desired locations
1. Schedule hourly calls to both APIs in parallel (virtual environment, multi threads)
2. Set up a database (or use existing one) and establish a pipeline to populate the database on an hourly basis
3. Connect database to Power BI dashboards and visualize the data
4. Use the database to perform machine learning tasks
5. Set up a notification system for alerts

Currently using student license for OpenWeather (valid only for 6 months) that allows 3000 calls/min. Free version allows 60 calls/min
Extracting weather data from api according to the latitude and longitude of the Airnow AQI data

In [1]:
import pandas as pd
import os
import requests as req
import json
import csv
from datetime import datetime, timedelta
from datetime import date

In [2]:
# OpenWeather API Key
API_KEY = '6dc507f5ca3f2dac7486e5037f6c9f89'

In [3]:
data_frame = pd.read_csv('dimTRTstore.csv', header=0)

In [4]:
# API Parameters
options = {}
options["aqi_url"] = "http://api.openweathermap.org/data/2.5/air_pollution"
options["weather_url"] = "https://api.openweathermap.org/data/2.5/weather"
options["units"] = "metric"
options["api_key"] = API_KEY

In [5]:
def hour_rounder(t):
    # Rounds down to the nearest hour
    return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour))

In [6]:
# Builds request url to access weather data from OpenWeather
# Params: 
# lat: latitude of desired location
# lon: longitude of desired location
# return: request url for weather

def request_weather_url(lat,lon):

    REQUEST_URL = options["weather_url"] + "?lat=" \
    + str(lat) \
    + "&lon=" + str(lon) \
    + "&appid=" + options["api_key"] \
    + "&units=" + options["units"]

    return REQUEST_URL

In [None]:
# Builds request url to access AQI data from OpenWeather
# Params: 
# lat: latitude of desired location
# lon: longitude of desired location
# return: request url for aqi

# def request_aqi_url(lat,lon):

#     REQUEST_URL = options["aqi_url"] + "?lat=" \
#     + str(lat) \
#     + "&lon=" + str(lon) \
#     + "&appid=" + options["api_key"]

#     return REQUEST_URL

In [8]:
WEATHER_URL_LIST = []
#AQI_URL_LIST = []
for lat,lon in zip(data_frame["Latitude"], data_frame["Longitude"]):
    WEATHER_URL_LIST.append(request_weather_url(lat,lon))
    #AQI_URL_LIST.append(request_aqi_url(lat,lon))

In [9]:
weather_list = []
aqi_list = []

In [10]:
def api_call(url):
    response = req.get(url)
    data = response.json()
    return data

In [11]:
def weather_loop():
    for url in WEATHER_URL_LIST:
        data_weather = api_call(url)
        weather_list.append(data_weather)

In [12]:
def aqi_loop():
    for url in AQI_URL_LIST:
        data_aqi = api_call(url)
        aqi_list.append(data_aqi)

In [13]:
weather_loop()

In [15]:
print("time taken for weather 22m 37.4s")

time taken for weather 22m 37.4s


In [14]:
aqi_loop()

In [16]:
print("time taken for aqi 10m 23.1s")

time taken for aqi 10m 23.1s


In [17]:
def weather_to_csv():
    dw = pd.json_normalize(weather_list, 'weather', record_prefix='weather.')
    dn = pd.json_normalize(weather_list)
    dn = dn.drop('weather', axis=1)
    dn['dt'] = dn['dt'].apply(lambda t: hour_rounder(datetime.utcfromtimestamp(t)).strftime('%Y-%m-%d %H:%M:%S'))
    dn['sys.sunrise'] = dn['sys.sunrise'].apply(lambda t: datetime.utcfromtimestamp(t).strftime('%Y-%m-%d %H:%M:%S'))
    dn['sys.sunset'] = dn['sys.sunset'].apply(lambda t: datetime.utcfromtimestamp(t).strftime('%Y-%m-%d %H:%M:%S'))
    df_update = pd.concat([dn,dw], axis=1)
    weather_file_name = "Weather" + datetime.now().strftime("_%Y%m%d%H%M%S.csv")
    df_update.to_csv(weather_file_name, index=False)

In [18]:
def aqi_to_csv():
    dw = pd.json_normalize(aqi_list, 'list', record_prefix='aqi.')
    dn = pd.json_normalize(aqi_list)
    dw['aqi.dt'] = dw['aqi.dt'].apply(lambda t: hour_rounder(datetime.utcfromtimestamp(t)).strftime('%Y-%m-%d %H:%M:%S'))
    dn = dn.drop('list', axis=1)
    df = pd.concat([dn,dw], axis=1)
    aqi_file_name = "AQI" + datetime.now().strftime("_%Y%m%d%H%M%S.csv")
    df.to_csv(aqi_file_name, index=False)


In [19]:
weather_to_csv()
aqi_to_csv()