In [32]:
%load_ext autoreload
%autoreload 2
import os
import sys
import pytz
import argparse
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from geopy import distance
import datetime
import tilemapbase
from copy import deepcopy
import pickle as pkl
from PIL import Image
import skimage.measure
import math

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Parameters

In [2]:
source = 'combined'
sensor = 'pm25'
res_time = '1H'
filepath_root = '/scratch/ab9738/pollution_with_sensors/'
spikes_file = filepath_root+'hotspots/spikes_combined_1H.csv'
time_high_file = filepath_root+'hotspots/hotspots_combined_temporalhigh_1H.pkl'
time_low_file = filepath_root+'hotspots/hotspots_combined_temporallow_1H.pkl'
space_high_file = filepath_root+'hotspots/hotspots_combined_spatialhigh_1H.pkl'
space_low_file = filepath_root+'hotspots/hotspots_combined_spatiallow_1H.pkl'

# Data Loading

In [3]:
filepath_data_kai = filepath_root+'data/kaiterra/kaiterra_fieldeggid_{}_current_panel.csv'.format(res_time)
filepath_data_gov = filepath_root+'data/govdata/govdata_{}_current.csv'.format(res_time)
filepath_locs_kai = filepath_root+'data/kaiterra/kaiterra_locations.csv'
filepath_locs_gov = filepath_root+'data/govdata/govdata_locations.csv'

locs_kai = pd.read_csv(filepath_locs_kai, index_col=[0])
locs_kai['Type'] = 'Kaiterra'
locs_gov = pd.read_csv(filepath_locs_gov, index_col=[0])
locs_gov['Type'] = 'Govt'
locs = pd.merge(locs_kai, locs_gov, how='outer',\
                on=['Monitor ID', 'Latitude', 'Longitude', 'Location', 'Type'], copy=False)
data_kai = pd.read_csv(filepath_data_kai, index_col=[0,1], parse_dates=True)[sensor]
data_gov = pd.read_csv(filepath_data_gov, index_col=[0,1], parse_dates=True)[sensor]
data = pd.concat([data_kai, data_gov], axis=0, copy=False)

start_dt = data.index.levels[1][0]
end_dt = data.index.levels[1][-1]

if start_dt.tzname != 'IST':
        if start_dt.tzinfo is None:
            start_dt = start_dt.tz_localize('UTC')
        start_dt = start_dt.tz_convert(pytz.FixedOffset(330))
    
if end_dt.tzname != 'IST':
    if end_dt.tzinfo is None: 
        end_dt = end_dt.tz_localize('UTC')
    end_dt = end_dt.tz_convert(pytz.FixedOffset(330))

# now, filter through the start and end dates
data.sort_index(inplace=True)
data = data.loc[(slice(None), slice(start_dt, end_dt))]

if(source=='govdata'):
    df = data_gov.unstack(level=0)
elif(source=='kaiterra'):
    df = data_kai.unstack(level=0)
else:
    df = data.unstack(level=0)
distances = pd.read_csv('/scratch/ab9738/pollution_with_sensors/data/combined_distances.csv', index_col=[0])
distances = distances.loc[df.columns, df.columns]
distances[distances == 0] = np.nan

In [7]:
locs

Unnamed: 0_level_0,UDID,Latitude,Longitude,Address,Location,Type
Monitor ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BB4A,492457f434f94afc865bb4701990bb4a,28.554980,77.194430,Jamun,Hauz Khas Village,Kaiterra
91B8,ea3ceeefd9654dfd9aab41422f7391b8,28.503050,77.185660,Vihara,Chhatapur,Kaiterra
BC46,29b8262425cf4135899cd65b2458bc46,28.632950,77.288700,Segel Design,Preet Vihar,Kaiterra
BFDC,11047d2ddc514f63a12ad4f1ad3bbfdc,28.521083,77.214237,Arundhati,Saket,Kaiterra
D804,f083e8afd43e4727a5eb7f3a1529d804,28.558230,77.208620,EPoD,Yusuf Sarai,Kaiterra
...,...,...,...,...,...,...
Sirifort_CPCB,,28.550425,77.215938,,"Sirifort, New Delhi - CPCB",Govt
SoniaVihar_DPCC,,28.710508,77.249485,,"Sonia Vihar, Delhi - DPCC",Govt
SriAurobindoMarg_DPCC,,28.531346,77.190156,,"Sri Aurobindo Marg, Delhi - DPCC",Govt
VivekVihar_DPCC,,28.672342,77.315260,,"Vivek Vihar, Delhi - DPCC",Govt


# Load Hotspots

In [4]:
with open(time_low_file,'rb') as file:
    thsp_low = pkl.load(file)

In [5]:
with open(time_high_file,'rb') as file:
    thsp_high = pkl.load(file)

In [6]:
with open(space_high_file,'rb') as file:
    shsp_high = pkl.load(file)
with open(space_low_file,'rb') as file:
    shsp_low = pkl.load(file)

# Load Wind Speeds

In [52]:
df_ws = pd.read_csv('/scratch/ab9738/pollution_with_sensors/hotspots/source_apportionment/wind_speeds.csv', parse_dates=True)

In [55]:
df_ws = df_ws.sort_values(['Timestamp']).reset_index(drop=True)

In [61]:
df_ws = df_ws.set_index(pd.DatetimeIndex(df_ws['Timestamp']))

In [62]:
df_ws = df_ws[['u-component', 'v-component']].groupby('Timestamp').mean()

In [63]:
df_ws

Unnamed: 0_level_0,u-component,v-component
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-05-01 00:00:00,-5.912000,3.205333
2018-05-01 06:00:00,-3.213333,5.312000
2018-05-01 12:00:00,-3.637333,-0.202667
2018-05-01 18:00:00,-6.544000,2.458667
2018-05-02 00:00:00,-4.949333,2.280000
...,...,...
2020-10-30 18:00:00,2.400000,-1.920000
2020-10-31 00:00:00,2.560000,-1.093333
2020-10-31 06:00:00,4.853333,-4.106667
2020-10-31 12:00:00,2.613333,-2.293333


# Load Intensity Maps

In [31]:
brick_kilns = np.load('brick_kilns_intensity_80x80.npy')
industries = np.load('industries_intensity_80x80.npy')
power_plants = np.load('power_plants_intensity_80x80.npy')
population_density = np.load('population_density_intensity_80x80.npy')
traffic_06 = np.load('traffic_06_intensity_80x80.npy')
traffic_12 = np.load('traffic_12_intensity_80x80.npy')
traffic_18 = np.load('traffic_18_intensity_80x80.npy')
traffic_00 = np.load('traffic_00_intensity_80x80.npy')

# Gaussian-Plume Formula for Point Source

In [75]:
def gaussian_plume(src, dest, intensity, H, wind_speed, Dy, Dz, alpha, z=6.5):
    distance_direction = np.array([dest[1]-src[1], dest[0]-src[0]])/math.sqrt((dest[1]-src[1])**2+(dest[0]-src[0])**2) 
    #reversing as lat=y-axis and long=x-axis
    distance_magnitude = distance.distance(src, dest).meters
    distance_vector = distance_magnitude * distance_direction
    unit_wind_vector = wind_speed/math.sqrt(wind_speed[0]**2 + wind_speed[1]**2)
    wind_magnitude = np.linalg.norm(wind_speed)
    distance_wind = np.dot(distance_vector, unit_wind_vector)
    if(distance_wind<=0):
        return 0.0
    distance_perpendicular = np.linalg.norm(np.subtract(distance_vector, distance_wind))
    sigma_y = math.sqrt(2*distance_wind*Dy/wind_magnitude)
    sigma_z = math.sqrt(2*distance_wind*Dz/wind_magnitude)
    concentration = ((alpha*intensity)/(2*math.pi*wind_magnitude*sigma_x*sigma_y))*math.exp(-distance_perpendicular**2/(2*sigma_y**2))*\
    (math.exp(-(z-H)**2/(2*sigma_z**2))+math.exp(-(z+H)**2/(2*sigma_z**2)))
    return concentration

In [76]:
def compute_concentration(sensor, ts, Dy, Dz, alpha):
#     assumed that the timestamp will have a wind entry
    stack_height = {'traffic':0, 'brick_kilns':25, 'population_density':10, 'industry':30, 'power_plant':200}
    wind_speed = df_ws.loc[ts].to_numpy()
    dest = (locs.loc[sensor]['Latitude'], locs.loc[hsp[1]]['Longitude'])
    idx_i = int((dest[1]-76.85)/0.01)
    idx_j = int((dest[0]-28.2)/0.01)
    src_radius = 7
    brick_kilns_srcs = brick_kilns[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    industries_srcs = industries[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    power_plants_srcs = power_plants[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    pop_density_srcs = population_density[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    if(ts.hour>3 and ts.hour<9):
        traffic_srcs = traffic_06[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    elif(ts.hour>=9 and ts.hour<15):
        traffic_srcs = traffic_12[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    elif(ts.hour>=15 and ts.hour<21):
        traffic_srcs = traffic_18[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
    else:
        traffic_srcs = traffic_00[idx_i-src_radius:idx_i+src_radius+1, idx_j-src_radius:idx_j+src_radius+1]
        
    total_concentration = 0
        
    for i in range(idx_i-src_radius, idx_i+src_radius+1):
        for j in range(idx_j-src_radius, idx_j+src_radius+1):
            src = (28.2+(j*0.01)+0.005, 76.85+(i*0.01)+0.005)
            total_concentration = total_concentration \
            + gaussian_plume(src, dest, brick_kilns_srcs[i,j], stack_height['brick_kilns'], wind_speed, Dy, Dz, alpha)\
            + gaussian_plume(src, dest, industries_srcs[i,j], stack_height['industry'], wind_speed, Dy, Dz, alpha)\
            + gaussian_plume(src, dest, power_plants_srcs[i,j], stack_height['power_plant'], wind_speed, Dy, Dz, alpha)\
            + gaussian_plume(src, dest, pop_density_srcs[i,j], stack_height['population_density'], wind_speed, Dy, Dz, alpha)\
            + gaussian_plume(src, dest, traffic_srcs[i,j], stack_height['traffic'], wind_speed, Dy, Dz, alpha)
            
    return total_concentration