# Temporal-Spatial Pollution Distribution in Salt Lake County

In [None]:
%matplotlib inline

In [None]:
import mesowest
import importlib
from datetime import datetime, date
import json
import locale
import numpy as np
import folium
import requests
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pandas as pd
from scipy.interpolate import griddata
from mpl_toolkits.basemap import Basemap
import seaborn as sns
import gzip
import json
from dateutil.parser import *
import pandas as pd
import geopandas as gpd
import urllib
import matplotlib.mlab as mlab
import pickle
import matplotlib.tri as tri

#### First We Need to Identify the Measurement Sites in the County of Interest

In [None]:
counties = ["Weber", "Davis", "Salt Lake", "Summit", "Utah", "Tooele"]

In [None]:
state = 'UT'
county = "Utah"
fo = mesowest.urlopen("""https://api.synopticlabs.org/v2/stations/metadata?&token=demotoken&state=%s&county=%s&vars=PM_25_concentration&status=active"""%(state, quote(county)))
stations_ca = json.loads(fo.read())
fo.close()


station_data = [(";".join((s["STID"],s['NAME'])),locale.atof(s['LATITUDE']),locale.atof(s['LONGITUDE'])) for s in stations_ca['STATION']]




In [None]:
lat = np.mean([s[1] for s in ca_station_data])
lon = np.mean([s[2] for s in ca_station_data])
ca_map = folium.Map(location=[lat,lon], tiles="Stamen Terrain", zoom_start=6.5)
for s in ca_station_data:
    folium.Marker([s[1], s[2]],
                  popup=s[0],
                  icon=folium.Icon(icon='cloud')).add_to(ca_map)
ca_map

In [None]:
map = folium.Map(location=[40.76623, -111.84755], tiles="Stamen Terrain", zoom_start=7.5)
map

In [None]:

map = folium.Map(location=[40.76623, -111.84755], tiles="Stamen Terrain", zoom_start=7.5)
for s in station_data:
    #print(s)
    rslt = folium.Marker([s[1], s[2]],
                  popup=s[0],
                  icon=folium.Icon(icon='cloud')).add_to(map)
    #print(rslt)
    #print()
map

In [None]:
payload = {"token":"demotoken",
           "state":"UT",
           "county":"Salt Lake",
           "vars":"PM_25_concentration",
           "status":"active"}
r = requests.get("""https://api.synopticlabs.org/v2/stations/metadata""", params=payload)


In [None]:
r.json()["STATION"][0]

In [None]:
station_id = {}
for s in r.json()["STATION"]:
    try:
        station_id[s['STID']] = {
               "ELEVATION":s["ELEVATION"],
               "LONGITUDE":s["LONGITUDE"],
               "LATITUDE":s["LATITUDE"]}
    except:
        pass

## We can get a quick plot of the data

In [None]:
may24 = pd.DataFrame.from_dict(r.json()["STATION"][0]["OBSERVATIONS"]).dropna()
may24.plot(x="date_time", rot=45)

### Secondary Axis

This takes a little more work

In [None]:

ax1 = may24.plot(y="ozone_concentration_set_1", x="date_time", color='blue', grid=True, label='ozone', rot=45)
ax2 = may24.plot(y="PM_25_concentration_set_1", x="date_time", color='red', grid=True, 
           secondary_y=True, label='pm25', ax=ax1, rot=45)

ax1.set_ylabel("Ozone")
ax2.set_ylabel("pm25")

plt.show()

### Third party dealing with dates

* https://github.com/dateutil/dateutil
* https://github.com/scrapinghub/dateparser

### Collect Time Series Data

In [None]:
if False:
    start = parse("January 1, 2019 12:00am MDT").strftime("%Y%m%d%H%M")
    end   = parse("March 31, 2019 12:59 pm MDT").strftime("%Y%m%d%H%M")
    data = {}
    for s in station_id:
        payload = {"token":"demotoken",
                   "stid":s["stid"],
                   "start":start,
                   "end":end,
                   "obtimezone":"LOCAL",
                   "vars":"PM_25_concentration,ozone_concentration",
                   "output":"json"}
        r = requests.get("""http://api.mesowest.net/v2/stations/timeseries""", params=payload)
        data[s["stid"]]= r.json()

In [None]:
if False:
    with open("pm25_03_slc_01012019_03312019.json", "w") as f0:
        json.dump(data,f0)

In [None]:
if True:
    with open("pm25_03_slc_01012019_03312019.json", "r") as f0:
        data = json.load(f0)

## We need to parse dates

In [None]:
dfs = {}
fails = []
for key, value in data.items():
    try:
        dfs[key] = pd.DataFrame.from_dict(value["STATION"][0]["OBSERVATIONS"]).dropna()
        dfs[key]["date_time"] = pd.to_datetime(dfs[key]["date_time"])
    except:
        fails.append(key)

In [None]:
if False:
    with open("dfs.pickle", "wb") as f0:
        pickle.dump(dfs, f0)

In [None]:
if True:
    with open("dfs.pickle", "rb") as f0:
        dfs = pickle.load(f0)

In [None]:
feb01 = {}
for key, value in dfs.items():
    #print(value.keys())
    feb01[key] = value[value.apply(lambda row: row["date_time"].date() == date(2019,2,1), axis=1)]
    #print(feb01[key].keys())
    #print("-"*22)

In [None]:
feb01mean_pm25 = {}
for key, value in feb01.items():
    try:
        feb01mean_pm25[key] = np.mean(value["PM_25_concentration_set_1"])
        #print(feb01mean_pm25.keys())
    except Exception as error:
        pass #print(key, error)
#feb01mean_pm25.keys()

In [None]:
feb01mean_O3 = {}
for key, value in feb01.items():
    try:
        feb01mean_O3[key] = np.mean(value["ozone_concentration_set_1"])
    except:
        pass #feb01mean[key] = np.nan

```Python
station_id = [{"stid":s["STID"],
               "ELEVATION":s["ELEVATION"],
               "LONGITUDE":s["LONGITUDE"],
               "LATITUDE":s["LATITUDE"]} for s in r.json()["STATION"]]
```

### Create a DataFrame with the mean values and lat/lon

In [None]:
pm25 = pd.DataFrame(
       [[station_id[key]["LONGITUDE"],
         station_id[key]["LATITUDE"],
         feb01mean_pm25[key]] for key in feb01mean_pm25.keys()], columns=["long", "lat", "pm25"], dtype=np.float64).dropna()
pm25.head()

In [None]:
pm25["lon_shift"] = pm25["long"] - np.min(pm25["long"])
pm25["lat_shift"] = pm25["lat"] - np.min(pm25["lat"])

In [None]:
lllon = np.min(pm25["lon_shift"])
lllat = np.min(pm25["lat_shift"])
urlon = np.max(pm25["lon_shift"])
urlat = np.max(pm25["lat_shift"])
lllon, lllat, urlon, urlat

In [None]:
numcols = 128
numrows = 128
xi = np.linspace(np.min(pm25["long"]), np.max(pm25["long"]), numcols)
yi = np.linspace(np.min(pm25["lat"]), np.max(pm25["lat"]), numrows)

In [None]:
np.max(pm25["pm25"])

In [None]:
tmp = pm25[pm25["pm25"] >= 2.0]
# interpolate, there are better methods, especially if you have many datapoints
tri_grid = tri.Triangulation(tmp["long"],tmp["lat"])
interpolator = tri.LinearTriInterpolator(tri_grid, tmp["pm25"])
Xi, Yi = np.meshgrid(xi, yi)
pm25i = interpolator(Xi, Yi)

In [None]:
np.min(pm25i), np.max(pm25i)

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2)
ax1.contour(xi, yi, pm25i, levels=10, linewidths=0.5, colors='k', axes="equal")
cntr1 = ax1.contourf(xi, yi, pm25i, levels=10, cmap="RdBu_r", axes="equal")


In [None]:
plt.imshow(pm25i)

In [None]:
sns.scatterplot(x=tmp["long"], y=tmp["lat"])

In [None]:
for _, row in pm25.iterrows():
    print(row["pm25"])

In [None]:
for _,row in pm25.iterrows():
    print(row)
    print("-"*22)

In [None]:
for v in pm25["pm25"]:
    print(v)

In [None]:
?np.min

In [None]:
np.nan in pm25[:,0]