### Overview
---

Generate output csv for single air monitoring station

In [1]:
%run -i '../scripts/py/config.py'

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

import geopandas as gpd
from shapely.geometry import Point, LineString,Polygon

import seaborn as sns

In [3]:
'''
Open all files
'''

variables = ["CO.csv",
             "H2S.csv",
             "HUMIDITY.csv",
             "NO.csv",
             "NO2.csv",
             "NOx.csv",
             "O3.csv",
#              "PM10.csv",
             "PM25.csv",
             "PRECIP.csv",
             "PRESSURE.csv",
             "SNOW.csv",
             "SO2.csv",
             "TEMP_MEAN.csv",
#              "TRS.csv",
             "WDIR_UVEC.csv",
             "WDIR_VECT.csv",
             "WSPD_SCLR.csv",
             "WSPD_VECT.csv"
            ]

frames = []
for var in variables:
    try:
        tmp = ReadCSV(var)
        tmp = tmp[tmp['EMS_ID']=='E231866']
        frames.append(tmp)
    except Exception as e:
        print(var, e)

df = pd.concat(frames)
df = df.reset_index(drop=True)

df['EMS_ID'] = df['EMS_ID'].astype(str)

df.head()

  if (await self.run_code(code, result,  async_=asy)):


Unnamed: 0,DATE_PST,DATE,TIME,STATION_NAME,STATION_NAME_FULL,EMS_ID,NAPS_ID,RAW_VALUE,ROUNDED_VALUE,UNIT,INSTRUMENT,PARAMETER,OWNER,REGION
0,2019-01-01 01:00,2019-01-01,01:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,0.903666,0.904,ppm,CO_API300,CO,ENV,01 - Vancouver Island
1,2019-01-01 02:00,2019-01-01,02:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,0.874146,0.874,ppm,CO_API300,CO,ENV,01 - Vancouver Island
2,2019-01-01 03:00,2019-01-01,03:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,0.793668,0.794,ppm,CO_API300,CO,ENV,01 - Vancouver Island
3,2019-01-01 04:00,2019-01-01,04:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,0.680288,0.68,ppm,CO_API300,CO,ENV,01 - Vancouver Island
4,2019-01-01 05:00,2019-01-01,05:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,0.665818,0.666,ppm,CO_API300,CO,ENV,01 - Vancouver Island


In [4]:
len(df)

94416

In [5]:
df.dtypes

DATE_PST              object
DATE                  object
TIME                  object
STATION_NAME          object
STATION_NAME_FULL     object
EMS_ID                object
NAPS_ID                int64
RAW_VALUE            float64
ROUNDED_VALUE        float64
UNIT                  object
INSTRUMENT            object
PARAMETER             object
OWNER                 object
REGION                object
dtype: object

### Group

In [6]:
'''
Group to get unique samples for each date
'''

combos = df.groupby(by=['DATE_PST','PARAMETER'])

In [7]:
'''
Get mean of each var (should be mean of 1 sample, could use first)
and set inde to date
'''

tmp = pd.DataFrame(combos['ROUNDED_VALUE'].mean())

tmp = tmp.reset_index()
tmp = tmp.set_index('DATE_PST')

tmp.head()

Unnamed: 0_level_0,PARAMETER,ROUNDED_VALUE
DATE_PST,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01 01:00,CO,0.904
2019-01-01 01:00,HUMIDITY,81.8
2019-01-01 01:00,NO,26.0
2019-01-01 01:00,NO2,16.4
2019-01-01 01:00,NOx,42.4


In [9]:
len(tmp)

87672

In [8]:
'''
Flatten df - get all features for a date in single record 
'''

data = pd.DataFrame(columns = ['DATE_PST']+tmp['PARAMETER'].unique().tolist())

rows = tmp.index.unique().tolist()[:]
for row in rows:
    params = ['DATE_PST']+tmp.loc[row]['PARAMETER'].tolist()
    values = [row]+tmp.loc[row]['ROUNDED_VALUE'].tolist()
    combined = dict(zip(params,values))
    
    data = data.append(combined, ignore_index=True)
    
# Limit columns
cols = ['DATE_PST', 'CO', 'HUMIDITY', 'NO', 'NOx', 'O3', 'PM25', 'SO2',
       'TEMP_MEAN', 'WDIR_UVEC', 'WDIR_VECT', 'WSPD_SCLR', 'WSPD_VECT', 'NO2']
data = data[cols]

data.head()

Unnamed: 0,DATE_PST,CO,HUMIDITY,NO,NOx,O3,PM25,SO2,TEMP_MEAN,WDIR_UVEC,WDIR_VECT,WSPD_SCLR,WSPD_VECT,NO2
0,2019-01-01 01:00,0.904,81.8,26.0,42.4,1.7,50.0,0.8,2.2,345.5,344.4,0.94,0.87,16.4
1,2019-01-01 02:00,0.874,78.6,22.7,39.9,1.8,50.0,0.7,3.0,155.9,169.9,0.47,0.18,17.2
2,2019-01-01 03:00,0.794,82.3,18.6,33.1,1.8,46.0,0.6,2.4,347.2,345.6,1.42,1.38,14.5
3,2019-01-01 04:00,0.68,81.1,7.1,22.2,1.4,42.0,0.3,2.6,340.6,340.7,1.23,1.22,15.1
4,2019-01-01 05:00,0.666,78.1,6.7,23.2,1.7,36.0,0.3,3.3,99.5,101.3,0.52,0.51,16.6


In [177]:
len(data)

6744

In [176]:
filename = 'AQ_combined.csv'

WriteCSV(data,filename)