In [1]:
import os
import sys
import pytz
import argparse
# import jax.numpy as jnp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from geopy import distance
import datetime
import tilemapbase
from copy import deepcopy
import pickle as pkl
from PIL import Image
import skimage.measure
import math
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
os.environ['PYTHONWARNINGS']='ignore'
import hyperopt
from joblib import Parallel, delayed
import random
random.seed(42)
import scipy
import torch
from pykrige.ok import OrdinaryKriging
from pykrige.ok3d import OrdinaryKriging3D
from pykrige.uk import UniversalKriging
from sklearn.model_selection import train_test_split
from scipy.interpolate import CubicSpline

In [24]:
# source = 'combined'
sensor = 'pm25'
res_time = '1H'
filepath_root = '/scratch/ab9738/pollution_with_sensors/'

filepath_data_kai = filepath_root+'data/kaiterra/kaiterra_fieldeggid_{}_current_panel.csv'.format(res_time)
filepath_data_gov = filepath_root+'data/govdata/govdata_{}_current.csv'.format(res_time)
filepath_locs_kai = filepath_root+'data/kaiterra/kaiterra_locations.csv'
filepath_locs_gov = filepath_root+'data/govdata/govdata_locations.csv'

locs_kai = pd.read_csv(filepath_locs_kai, index_col=[0])
locs_kai['Type'] = 'Kaiterra'
locs_gov = pd.read_csv(filepath_locs_gov, index_col=[0])
locs_gov['Type'] = 'Govt'
locs = pd.merge(locs_kai, locs_gov, how='outer',\
                on=['Monitor ID', 'Latitude', 'Longitude', 'Location', 'Type'], copy=False)
data_kai = pd.read_csv(filepath_data_kai, index_col=[0,1], parse_dates=True)[sensor]
data_gov = pd.read_csv(filepath_data_gov, index_col=[0,1], parse_dates=True)[sensor]
data = pd.concat([data_kai, data_gov], axis=0, copy=False)
data.replace(0,np.nan,inplace=True)

start_dt = data.index.levels[1][0]
end_dt = data.index.levels[1][-1]

if start_dt.tzname != 'IST':
        if start_dt.tzinfo is None:
            start_dt = start_dt.tz_localize('UTC')
        start_dt = start_dt.tz_convert(pytz.FixedOffset(330))
    
if end_dt.tzname != 'IST':
    if end_dt.tzinfo is None: 
        end_dt = end_dt.tz_localize('UTC')
    end_dt = end_dt.tz_convert(pytz.FixedOffset(330))

# now, filter through the start and end dates
data.sort_index(inplace=True)
data = data.loc[(slice(None), slice(start_dt, end_dt))]

# if(source=='govdata'):
df_gov = data_gov.unstack(level=0)
# elif(source=='kaiterra'):
df_kai = data_kai.unstack(level=0)
# else:
df = data.unstack(level=0)
distances = pd.read_csv('/scratch/ab9738/pollution_with_sensors/data/combined_distances.csv', index_col=[0])
distances = distances.loc[df.columns, df.columns]
distances[distances == 0] = np.nan

## Anual Hotspots

In [25]:
df_kai_an = df_kai.groupby(df_kai.index.year).mean()
df_gov_an = df_gov.groupby(df_kai.index.year).mean()

In [30]:
len(df_kai_an.loc[2019][df_kai_an.loc[2019]>100])

6

In [31]:
len(df_gov_an.loc[2019][df_gov_an.loc[2019]>100])

24

In [36]:
df_new_hotspots = df_kai_an.loc[2019][df_kai_an.loc[2019]>100]

In [37]:
df_new_hotspots.index = locs.loc[df_new_hotspots.index].Location

In [38]:
df_new_hotspots

Location
Lado Sarai                     167.638217
Malviya Nagar                  120.039290
Faridabad Sector 32            103.765461
Chhatapur                      181.889563
Fulbright House                140.229133
Safdarjung Enclave Block B4    218.814171
Name: 2019, dtype: float64

In [39]:
df_govt_hotspots = df_gov_an.loc[2019][df_gov_an.loc[2019]>100]

In [40]:
df_govt_hotspots

monitor_id
AnandVihar_DPCC        128.203259
AshokVihar_DPCC        120.730632
BurariCrossing_IMD     111.232359
CRRIMathuraRoad_IMD    104.290820
DTU_CPCB               119.182859
DwarkaSector8_DPCC     109.599886
IHBAS_CPCB             115.217622
ITO_CPCB               109.442360
JNS_DPCC               104.148098
Jahangirpuri_DPCC      128.660903
MandirMarg_DPCC        103.736260
Mundaka_DPCC           126.415383
NSIT_CPCB              113.483118
NehruNagar_DPCC        123.174957
OkhlaPhase2_DPCC       107.589131
PunjabiBagh_DPCC       107.467880
Pusa_DPCC              100.869147
RKPuram_DPCC           105.337193
Rohini_DPCC            128.616534
Shadipur_CPCB          118.443249
Sirifort_CPCB          107.629116
SoniaVihar_DPCC        106.452801
VivekVihar_DPCC        114.340096
Wazirpur_DPCC          135.973206
Name: 2019, dtype: float64

## APH Paper Hotspots

In [41]:
df_kai = df_kai.groupby(df_kai.index.date).mean()
df_gov = df_gov.groupby(df_gov.index.date).mean()

In [46]:
df_kai.index = pd.to_datetime(df_kai.index)

In [48]:
df_gov.index = pd.to_datetime(df_gov.index)

In [50]:
df_kai_2018 = df_kai[df_kai.index.year==2018]
df_kai_2019 = df_kai[df_kai.index.year==2019]
df_kai_2020 = df_kai[df_kai.index.year==2020]

In [51]:
df_gov_2018 = df_gov[df_gov.index.year==2018]
df_gov_2019 = df_gov[df_gov.index.year==2019]
df_gov_2020 = df_gov[df_gov.index.year==2020]

In [82]:
def process_month(df_kai_month, df_gov_month, year):
    kai_freq, kai_scale, kai_cons = 0, 0, 0
    for sensor in df_kai_month.columns:
        if(df_kai_month[sensor].isna().sum()>10):
            continue
        if(len(df_kai_month[sensor][df_kai_month[sensor]>60])>0.6*len(df_kai_month[sensor].dropna())):
            kai_freq += 1
        if(df_kai_month[sensor].mean()>90):
            kai_scale += 1
        y = df_kai_month[sensor].dropna()
        y = y-60
        y = (y>0).astype(int)
        y = y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
        if(max(y)>=3):
            kai_cons += 1
    
    gov_freq, gov_scale, gov_cons = 0, 0, 0
    for sensor in df_gov_month.columns:
        if(df_gov_month[sensor].isna().sum()>10):
            continue
        if(len(df_gov_month[sensor][df_gov_month[sensor]>60])>0.6*len(df_gov_month[sensor].dropna())):
            gov_freq += 1
        if(df_gov_month[sensor].mean()>90):
            gov_scale += 1
        y = df_gov_month[sensor].dropna()
        y = y-60
        y = (y>0).astype(int)
        y = y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
        if(max(y)>=3):
            gov_cons += 1
            
    print(str(year)+"-"+str(month), "("+str(gov_freq)+","+str(gov_scale)+","+str(gov_cons)+")", "("+str(kai_freq)+","+str(kai_scale)+","+str(kai_cons)+")")

In [83]:
# year 2018
for month in range(5,13):
    df_kai_month = df_kai_2018[df_kai_2018.index.month==month]
    df_gov_month = df_gov_2018[df_gov_2018.index.month==month]
    
    process_month(df_kai_month, df_gov_month, 2018)
    
    
# year 2019
for month in range(1,13):
    df_kai_month = df_kai_2019[df_kai_2019.index.month==month]
    df_gov_month = df_gov_2019[df_gov_2019.index.month==month]
    
    process_month(df_kai_month, df_gov_month, 2019)
    
# year 2020
for month in range(1,10):
    df_kai_month = df_kai_2020[df_kai_2020.index.month==month]
    df_gov_month = df_gov_2020[df_gov_2020.index.month==month]
    
    process_month(df_kai_month, df_gov_month, 2020)

2018-5 (24,11,27) (7,0,10)
2018-6 (9,8,27) (1,0,7)
2018-7 (0,0,14) (1,0,4)
2018-8 (0,0,12) (0,0,1)
2018-9 (0,1,23) (2,0,11)
2018-10 (31,29,32) (13,13,13)
2018-11 (32,32,32) (15,15,15)
2018-12 (32,32,32) (16,16,16)
2019-1 (32,32,32) (17,17,17)
2019-2 (30,29,32) (16,16,16)
2019-3 (29,11,32) (13,0,16)
2019-4 (23,10,32) (0,0,9)
2019-5 (27,12,32) (0,0,12)
2019-6 (9,3,26) (0,0,0)
2019-7 (0,0,23) (0,0,2)
2019-8 (0,0,2) (0,0,1)
2019-9 (0,0,14) (0,0,4)
2019-10 (32,32,32) (7,7,7)
2019-11 (32,32,32) (8,8,8)
2019-12 (31,31,31) (8,8,8)
2020-1 (31,31,31) (5,5,5)
2020-2 (31,28,31) (4,4,4)
2020-3 (5,0,29) (0,0,3)
2020-4 (1,0,9) (0,0,0)
2020-5 (2,1,27) (0,0,0)
2020-6 (0,0,7) (0,0,0)
2020-7 (0,0,5) (0,0,0)
2020-8 (0,0,0) (0,0,0)
2020-9 (0,0,21) (0,0,0)
