In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
# import jax.numpy as jnp
import numpy as np
import pandas as pd
import pytz
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
os.environ['PYTHONWARNINGS']='ignore'
from scipy.interpolate import CubicSpline

In [2]:
source = 'combined'
sensor = 'pm25'
res_time = '1H'
filepath_root = '/scratch/ab9738/pollution_with_sensors/'
# spikes_file = filepath_root+'hotspots/spikes_combined_1H.csv'
# time_high_file = filepath_root+'hotspots/hotspots_combined_temporalhigh_1H.pkl'
# time_low_file = filepath_root+'hotspots/hotspots_combined_temporallow_1H.pkl'
# space_high_file = filepath_root+'hotspots/hotspots_combined_spatialhigh_1H.pkl'
# space_low_file = filepath_root+'hotspots/hotspots_combined_spatiallow_1H.pkl'

# Data Loading

In [3]:
filepath_data_kai = filepath_root+'data/kaiterra/kaiterra_fieldeggid_{}_current_panel.csv'.format(res_time)
filepath_data_gov = filepath_root+'data/govdata/govdata_{}_current.csv'.format(res_time)
filepath_locs_kai = filepath_root+'data/kaiterra/kaiterra_locations.csv'
filepath_locs_gov = filepath_root+'data/govdata/govdata_locations.csv'

locs_kai = pd.read_csv(filepath_locs_kai, index_col=[0])
locs_kai['Type'] = 'Kaiterra'
locs_gov = pd.read_csv(filepath_locs_gov, index_col=[0])
locs_gov['Type'] = 'Govt'
locs = pd.merge(locs_kai, locs_gov, how='outer',\
                on=['Monitor ID', 'Latitude', 'Longitude', 'Location', 'Type'], copy=False)
data_kai = pd.read_csv(filepath_data_kai, index_col=[0,1], parse_dates=True)[sensor]
data_gov = pd.read_csv(filepath_data_gov, index_col=[0,1], parse_dates=True)[sensor]
data = pd.concat([data_kai, data_gov], axis=0, copy=False)

start_dt = data.index.levels[1][0]
end_dt = data.index.levels[1][-1]

if start_dt.tzname != 'IST':
        if start_dt.tzinfo is None:
            start_dt = start_dt.tz_localize('UTC')
        start_dt = start_dt.tz_convert(pytz.FixedOffset(330))
    
if end_dt.tzname != 'IST':
    if end_dt.tzinfo is None: 
        end_dt = end_dt.tz_localize('UTC')
    end_dt = end_dt.tz_convert(pytz.FixedOffset(330))

# now, filter through the start and end dates
data.sort_index(inplace=True)
data = data.loc[(slice(None), slice(start_dt, end_dt))]

if(source=='govdata'):
    df = data_gov.unstack(level=0)
elif(source=='kaiterra'):
    df = data_kai.unstack(level=0)
else:
    df = data.unstack(level=0)
distances = pd.read_csv('/scratch/ab9738/pollution_with_sensors/data/combined_distances.csv', index_col=[0])
distances = distances.loc[df.columns, df.columns]
distances[distances == 0] = np.nan

In [4]:
df.replace(0, np.nan, inplace=True)

In [19]:
sens = data.to_frame().reset_index()

sens['hour_of_day'] = sens['timestamp_round'].apply(lambda x: x.hour)

spline = sens.groupby(['field_egg_id', 'hour_of_day']).mean()['pm25'].reset_index()
spline_avg = sens.groupby(['hour_of_day']).mean()['pm25'].reset_index()

fields = []
times = []
pm25 = []
for i in np.unique(spline['field_egg_id']):
    s_i = spline[spline['field_egg_id']==i]
    x = s_i['hour_of_day'].values
    y = [t for t in s_i['pm25'].values]
    c1 = CubicSpline(x[:8],y[:8])
    c2 = CubicSpline(x[8:16],y[8:16])
    c3 = CubicSpline(x[16:24],y[16:24])
    ix = [k/100.0 for k in range(2400)]
    iy = list(np.concatenate((c1(ix[:800]),c2(ix[800:1600]),c3(ix[1600:2400]))))
    fields += [i]*2400
    times += ix
    pm25 += iy

spline_df = pd.DataFrame((fields, times, pm25)).transpose()

spline_df.columns = ['field_egg_id', 'time', 'pm25']

hours_in_day = np.arange(24).astype(float)

spline_df = spline_df[spline_df['time'].isin(hours_in_day)]

spline_mat = np.transpose(spline_df['pm25'].to_numpy().reshape((60,24)))

In [20]:
spline_df[:24]

Unnamed: 0,field_egg_id,time,pm25
0,113E,0.0,127.644077
100,113E,1.0,129.808797
200,113E,2.0,132.313521
300,113E,3.0,133.695388
400,113E,4.0,135.629878
500,113E,5.0,136.734299
600,113E,6.0,146.032807
700,113E,7.0,152.246089
800,113E,8.0,148.532569
900,113E,9.0,132.440802


In [27]:
spline_mat[:,0]

array([127.64407671030794, 129.80879742431605, 132.31352099437206,
       133.69538793530688, 135.62987755164224, 136.73429915266408,
       146.03280681567804, 152.24608880713072, 148.53256941694264,
       132.44080227874056, 115.20611518196814, 101.59137623828518,
       91.39558520743995, 80.45773523306805, 74.39722785424179,
       70.84487156729303, 68.34181498091851, 71.73643753567357,
       83.3931703218267, 99.25064383119913, 117.68973780993471,
       126.78097611247689, 127.57563232542105, 127.94772370766489],
      dtype=object)