In [1]:
import os
import sys
import pytz
import argparse
# import jax.numpy as jnp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from geopy import distance
import datetime
import tilemapbase
from copy import deepcopy
import pickle as pkl
from PIL import Image
import skimage.measure
import math
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
os.environ['PYTHONWARNINGS']='ignore'
import hyperopt
from joblib import Parallel, delayed
import random
random.seed(42)
import scipy
import torch
from pykrige.ok import OrdinaryKriging
from pykrige.ok3d import OrdinaryKriging3D
from pykrige.uk import UniversalKriging
from sklearn.model_selection import train_test_split
from scipy.interpolate import CubicSpline

In [2]:
# source = 'combined'
sensor = 'pm25'
res_time = '1H'
filepath_root = '/scratch/ab9738/pollution_with_sensors/'

filepath_data_kai = filepath_root+'data/kaiterra/kaiterra_fieldeggid_{}_current_panel.csv'.format(res_time)
filepath_data_gov = filepath_root+'data/govdata/govdata_{}_current.csv'.format(res_time)
filepath_locs_kai = filepath_root+'data/kaiterra/kaiterra_locations.csv'
filepath_locs_gov = filepath_root+'data/govdata/govdata_locations.csv'

locs_kai = pd.read_csv(filepath_locs_kai, index_col=[0])
locs_kai['Type'] = 'Kaiterra'
locs_gov = pd.read_csv(filepath_locs_gov, index_col=[0])
locs_gov['Type'] = 'Govt'
locs = pd.merge(locs_kai, locs_gov, how='outer',\
                on=['Monitor ID', 'Latitude', 'Longitude', 'Location', 'Type'], copy=False)
data_kai = pd.read_csv(filepath_data_kai, index_col=[0,1], parse_dates=True)[sensor]
data_gov = pd.read_csv(filepath_data_gov, index_col=[0,1], parse_dates=True)[sensor]
data = pd.concat([data_kai, data_gov], axis=0, copy=False)
data.replace(0,np.nan,inplace=True)

start_dt = data.index.levels[1][0]
end_dt = data.index.levels[1][-1]

if start_dt.tzname != 'IST':
        if start_dt.tzinfo is None:
            start_dt = start_dt.tz_localize('UTC')
        start_dt = start_dt.tz_convert(pytz.FixedOffset(330))
    
if end_dt.tzname != 'IST':
    if end_dt.tzinfo is None: 
        end_dt = end_dt.tz_localize('UTC')
    end_dt = end_dt.tz_convert(pytz.FixedOffset(330))

# now, filter through the start and end dates
data.sort_index(inplace=True)
data = data.loc[(slice(None), slice(start_dt, end_dt))]

# if(source=='govdata'):
df_gov = data_gov.unstack(level=0)
# elif(source=='kaiterra'):
df_kai = data_kai.unstack(level=0)
# else:
df = data.unstack(level=0)
distances = pd.read_csv('/scratch/ab9738/pollution_with_sensors/data/combined_distances.csv', index_col=[0])
distances = distances.loc[df.columns, df.columns]
distances[distances == 0] = np.nan

In [3]:
start_date_year_1 = pd.to_datetime('2018-07-01 00:00:00+0530')
end_date_year_1 = pd.to_datetime('2019-06-30 23:00:00+0530')
start_date_year_2 = pd.to_datetime('2019-07-01 00:00:00+0530')
end_date_year_2 = pd.to_datetime('2020-06-30 23:00:00+0530')

In [4]:
df_year_1 = df.loc[slice(start_date_year_1, end_date_year_1)]

In [5]:
df_year_2 = df.loc[slice(start_date_year_2, end_date_year_2)]

In [6]:
df_year1_avg = df_year_1.mean()
df_year2_avg = df_year_2.mean()

In [7]:
year1_chart = df_year1_avg[df_year1_avg>100]
year2_chart = df_year2_avg[df_year2_avg>100]
year1_chart.index = locs.loc[year1_chart.index].Location
year2_chart.index = locs.loc[year2_chart.index].Location

In [8]:
year1_chart.sort_values(ascending=False)

Location
Anand Vihar, Delhi - DPCC                 150.073282
Safdarjung Enclave Block B4               150.031129
Wazirpur, Delhi - DPCC                    143.832025
Lado Sarai                                143.165332
Jahangirpuri, Delhi - DPCC                141.826173
Rohini, Delhi - DPCC                      137.250796
Mundaka, Delhi - DPCC                     135.358039
Chhatapur                                 133.879540
Burari Crossing, New Delhi - IMD          128.221020
Nehru Nagar, Delhi - DPCC                 127.760614
Shadipur, New Delhi - CPCB                127.026639
Faridabad Sector 32                       124.793649
Preet Vihar                               123.670595
Ashok Vihar, Delhi - DPCC                 122.143042
Punjabi Bagh, Delhi - DPCC                121.752016
GK-1 Pamposh Enclave                      121.612071
Aya Nagar Extension                       119.735999
Sonia Vihar, Delhi - DPCC                 119.723686
R K Puram, New Delhi - DPCC          

In [9]:
year2_chart.sort_values(ascending=False)

Location
Fulbright House                     216.197564
Malviya Nagar                       152.404025
Burari Crossing, New Delhi - IMD    123.536235
Faridabad Sector 32                 117.741105
Wazirpur, Delhi - DPCC              117.597498
Rohini, Delhi - DPCC                117.229494
Anand Vihar, Delhi - DPCC           116.515631
Preet Vihar                         114.880152
Jahangirpuri, Delhi - DPCC          112.752082
Mundaka, Delhi - DPCC               112.377219
Nehru Nagar, Delhi - DPCC           112.310200
DTU, New Delhi - CPCB               109.355380
Ashok Vihar, Delhi - DPCC           105.481922
ITO, New Delhi - CPCB               105.031344
Vivek Vihar, Delhi - DPCC           103.834498
Dwarka-Sector 8, Delhi - DPCC       102.383160
Punjabi Bagh, Delhi - DPCC          100.059421
dtype: float64

In [10]:
df_year1_avg.mean()

109.22330075190605

In [11]:
df_year2_avg.mean()

87.52631114393691

In [40]:
df_kai = df_kai.groupby(df_kai.index.date).mean()

In [41]:
df_kai.index = pd.to_datetime(df_kai.index)

In [42]:
df_kai_2018 = df_kai[df_kai.index.year==2018]
df_kai_2019 = df_kai[df_kai.index.year==2019]
df_kai_2020 = df_kai[df_kai.index.year==2020]

In [43]:
df_new_hsps = pd.DataFrame(columns=df_kai.columns)

In [44]:
def process_month(df_kai_month, month, year):
    idx = str(month)+'-'+str(year)
    df_new_hsps.loc[idx] = np.zeros(len(df_kai.columns)).astype(np.bool)
    for sensor in df_kai_month.columns:
        cell = False
        if(df_kai_month[sensor].isna().sum()>10):
            continue
        if(len(df_kai_month[sensor][df_kai_month[sensor]>60])>0.6*len(df_kai_month[sensor].dropna())):
            cell = True
        if(df_kai_month[sensor].mean()>90):
            cell = True
        y = df_kai_month[sensor].dropna()
        y = y-60
        y = (y>0).astype(int)
        y = y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
        if(max(y)>=3):
            cell = True
            
        df_new_hsps.loc[idx][sensor] = cell

In [45]:
year = 2018
for month in range(5,13):
    df_kai_month = df_kai_2018[df_kai_2018.index.month==month]
    process_month(df_kai_month, month, year)
    
year = 2019
for month in range(1,13):
    df_kai_month = df_kai_2019[df_kai_2019.index.month==month]
    process_month(df_kai_month, month, year)
    
year = 2020
for month in range(1,10):
    df_kai_month = df_kai_2020[df_kai_2020.index.month==month]
    process_month(df_kai_month, month, year)

In [46]:
df_new_hsps.columns = locs.loc[df_kai.columns].Location

In [47]:
df_new_hsps.sum(axis=0)

Location
GK-1 Pamposh Enclave                  9
Safdarjung Enclave Block B1           4
Anand Niketan                         9
Lado Sarai                            0
Sarvodaya Enclave                     0
GK-1 N Block                          1
Malviya Nagar                        10
Panchsheel Park                       8
Gurugram Sector 24                    1
Malcha Diplomatic Enclave Block C     5
Faridabad Sector 32                  15
Aya Nagar Extension                   4
Chhatapur                             7
Safdarjung Enclave Block B4           6
Fulbright House                       4
Sadiq Nagar                          18
Hauz Khas Village                     5
Preet Vihar                          11
Saket                                 0
Gurugram Sector 49                   10
US Embassy                           12
Yusuf Sarai                           6
Lajpat Nagar                          6
Defence Colony                        6
Sarvapriya Vihar               

In [48]:
df_new_hsps.sum(axis=0).sum()

189

In [31]:
df_gov = df_gov.groupby(df_gov.index.date).mean()

In [32]:
df_gov.index = pd.to_datetime(df_gov.index)

In [33]:
df_gov_2018 = df_gov[df_gov.index.year==2018]
df_gov_2019 = df_gov[df_gov.index.year==2019]
df_gov_2020 = df_gov[df_gov.index.year==2020]

In [34]:
df_new_hsps = pd.DataFrame(columns=df_gov.columns)

In [36]:
def process_month(df_kai_month, month, year):
    idx = str(month)+'-'+str(year)
    df_new_hsps.loc[idx] = np.zeros(len(df_gov.columns)).astype(np.bool)
    for sensor in df_kai_month.columns:
        cell = False
        if(df_kai_month[sensor].isna().sum()>10):
            continue
        if(len(df_kai_month[sensor][df_kai_month[sensor]>60])>0.6*len(df_kai_month[sensor].dropna())):
            cell = True
        if(df_kai_month[sensor].mean()>90):
            cell = True
        y = df_kai_month[sensor].dropna()
        y = y-60
        y = (y>0).astype(int)
        y = y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
        if(max(y)>=3):
            cell = True
            
        df_new_hsps.loc[idx][sensor] = cell

In [37]:
year = 2018
for month in range(5,13):
    df_kai_month = df_gov_2018[df_gov_2018.index.month==month]
    process_month(df_kai_month, month, year)
    
year = 2019
for month in range(1,13):
    df_kai_month = df_gov_2019[df_gov_2019.index.month==month]
    process_month(df_kai_month, month, year)
    
year = 2020
for month in range(1,10):
    df_kai_month = df_gov_2020[df_gov_2020.index.month==month]
    process_month(df_kai_month, month, year)

In [38]:
df_new_hsps.columns = locs.loc[df_gov.columns].Location

In [39]:
df_new_hsps.sum(axis=0)

Location
Anand Vihar, Delhi - DPCC                           23
Ashok Vihar, Delhi - DPCC                           22
Aya Nagar, New Delhi - IMD                          21
Burari Crossing, New Delhi - IMD                    13
CRRI Mathura Road, New Delhi - IMD                  23
Dr. Karni Singh Shooting Range, Delhi - DPCC        16
DTU, New Delhi - CPCB                               24
Dwarka-Sector 8, Delhi - DPCC                       22
IGI Airport (T3), New Delhi - IMD                   20
IHBAS, Dilshad Garden,New Delhi - CPCB              24
ITO, New Delhi - CPCB                               26
Jawaharlal Nehru Stadium, Delhi - DPCC              17
Jahangirpuri, Delhi - DPCC                          24
Lodhi Road, New Delhi - IMD                         21
Major Dhyan Chand National Stadium, Delhi - DPCC    21
Mandir Marg, New Delhi - DPCC                       22
Mundaka, Delhi - DPCC                               21
NSIT Dwarka, New Delhi - CPCB                       26
N