In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import spatial
from collections import defaultdict
import random
import pickle
from datetime import datetime, timedelta

import os
import sys
from datetime import datetime
from os.path import expanduser
import urllib
import json

In [2]:
API_KEY = "96602008-3701-466F-982E-7F022DFDBBAD"

## Set the longitude/latitude coordinates of the sensor area
min_lon = -123.948
max_lon = -119.246
min_lat = 35.853
max_lat = 39.724
BBox = ((min_lon, max_lon,      
        min_lat, max_lat))

## Set dates and times over collected data:
start_date = "2021-02-09"
start_hour = "0"
end_date = "2021-02-13"
end_hour = "23"

year = int(end_date[0:4])
month = int(end_date[5:7])
day = int(end_date[8:])

In [3]:
# API parameters
options = {}
options["url"] = "https://airnowapi.org/aq/data/"
options["start_date"] = start_date
options["start_hour_utc"] = start_hour
options["end_date"] = end_date
options["end_hour_utc"] = end_hour
options["parameters"] = "pm25"
options["bbox"] = str(min_lon) + "," + str(min_lat) + "," + str(max_lon) + "," + str(max_lat)
options["data_type"] = "a"
options["format"] = "json"
options["ext"] = "json"
options["api_key"] = API_KEY

# API request URL
REQUEST_URL = options["url"] \
              + "?startdate=" + options["start_date"] \
              + "t" + options["start_hour_utc"] \
              + "&enddate=" + options["end_date"] \
              + "t" + options["end_hour_utc"] \
              + "&parameters=" + options["parameters"] \
              + "&bbox=" + options["bbox"] \
              + "&datatype=" + options["data_type"] \
              + "&format=" + options["format"] \
              + "&api_key=" + options["api_key"]

try:
    # Request AirNowAPI data
    print("Requesting AirNowAPI data...")

    # User's home directory.
    home_dir = expanduser("~")
    download_file_name = "AirNowAPI" + datetime.now().strftime("_%Y%M%d%H%M%S." + options["ext"])
    download_file = os.path.join(home_dir, download_file_name)

    # Perform the AirNow API data request
#     api_data = urllib.URLopener()
#     api_data = urllib.request.urlopen(REQUEST_URL)
#     api_data.retrieve(REQUEST_URL, download_file)

    response = urllib.request.urlopen(REQUEST_URL)

    data = json.loads(response.read())

#     data = api_data.read()

    # Download complete
    print("Download URL: %s" % REQUEST_URL)
    print("Download File: %s" % download_file)

except Exception as e:
    print("Unable perform AirNowAPI request. %s" % e)
    sys.exit(1)

Requesting AirNowAPI data...
Download URL: https://airnowapi.org/aq/data/?startdate=2021-02-09t0&enddate=2021-02-13t23&parameters=pm25&bbox=-123.948,35.853,-119.246,39.724&datatype=a&format=json&api_key=96602008-3701-466F-982E-7F022DFDBBAD
Download File: /home/javier/AirNowAPI_20230720150734.json


In [4]:
### Filter for unique long/lat coordinates
data_lat = []
data_lon = []
for sensor in data:
    data_lat.append(sensor['Latitude'])
    data_lon.append(sensor['Longitude'])
    
airnow_sensors = []
for item in zip(data_lon,data_lat):
    if(item not in airnow_sensors):
        airnow_sensors.append(item)

d = len(airnow_sensors)
print("Number of AirNow Sensors: " + str(d))

airnow_lon = []
airnow_lat = []
for sensor in airnow_sensors:
    airnow_lon.append(sensor[0])
    airnow_lat.append(sensor[1])

Number of AirNow Sensors: 63


In [5]:
from pyairnow.conv import aqi_to_concentration, concentration_to_aqi

#### AirNow matrix

In [7]:
time_record = defaultdict(list)      ## make dictionary of all time keys to keep time
airnow_data_dict = defaultdict(list) ## make dictionary hashed by (AirNow sensor coordinates x time) keys
for entry in data:
    airnow_key = (entry['Longitude'],entry['Latitude'])
    time_key = entry['UTC']
    if(time_key not in time_record):
        time_record[time_key]
    airnow_data_dict[(airnow_key,time_key)] = entry['AQI']

In [8]:
output_airnow_data = np.zeros((len(time_record),len(airnow_sensors))) ### memory allocate array
for (i,airnow_key) in enumerate(airnow_sensors):
    for t,time_key in enumerate(time_record):
        if(not airnow_data_dict[(airnow_key,time_key)]):
            output_airnow_data[t,i] = np.nan
        else:
            output_airnow_data[t,i] = airnow_data_dict[(airnow_key,time_key)]

In [9]:
output_airnow_data.shape

(120, 63)

In [10]:
### Eliminate any sensor columns with mostly missing entries (10% of entries are missing)
removed_idx = []
removed_sensors = []
for (i,sensor) in enumerate(airnow_sensors):
    if(sum(np.isnan(output_airnow_data[:,i])) > len(time_record)*0.1):
        removed_idx.append(i)
        removed_sensors.append(sensor)

        
output_airnow_data = np.delete(output_airnow_data, removed_idx, axis=1)

for sensor in removed_sensors:
    airnow_sensors.remove(sensor)


In [12]:
# (rows, cols) = output_airnow_data.shape
# airMatrix = np.zeros([rows,cols])
# for i in range(rows):
#     for j in range(cols):
#         try:
#             airMatrix[i,j] = aqi_to_concentration(output_airnow_data[i,j], 'PM2.5')
#         except:
#             airMatrix[i,j] = np.nan

In [13]:
np.save("airMatrix.npy",airMatrix)