In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import sys
sys.path.append("../")
import torch
import torch.nn as nn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
from PIL import Image
from src.train_pm import Dataset, double_conv, LeUNet, StandardNet, EnsembleNet, EPAPLN, ResNetUNet, EnsembleLeUNet
import cv2
%matplotlib inline
import matplotlib.pyplot as plt
from src.preprocessing.trans_func import *
import torchvision.models as models

In [None]:
data = pd.read_csv('../test_data.csv')
files = list(data['filename'])
ppm = list(data['ppm'])
ids = [i for i in range(len(files))]

In [None]:
# model = LeUNet()
# model = torch.nn.DataParallel(model).cuda()
# model.load_state_dict(torch.load("../src/model_pm_train.pth"),strict=False) # on GPU

In [None]:
# model = ResNetUNet()
# model = torch.nn.DataParallel(model).cuda()
# model.load_state_dict(torch.load("../src/resnetunet_pm_train.pth"),strict=False) # on GPU

In [None]:
# model = StandardNet('resnet50').cuda()
# model.load_state_dict(torch.load("../src/resnet50_pm_train.pth"),strict=False)

In [None]:
# model = StandardNet('vgg16').cuda()
# model.load_state_dict(torch.load("../src/vgg16_pm_train.pth"),strict=False)

In [None]:
# model = StandardNet('inception_v3').cuda()
# model.load_state_dict(torch.load("../src/inception_pm_train.pth"),strict=False)

In [None]:
model = EPAPLN().cuda()
model.load_state_dict(torch.load("../src/epapln_pm_train.pth"),strict=False)

In [None]:
# model = EnsembleNet().cuda()
# model.load_state_dict(torch.load("../src/ensemble_pm_train.pth"),strict=False)

In [None]:
# model = EnsembleLeUNet().cuda()
# model.load_state_dict(torch.load("../src/ensembleleunet_pm_train.pth"),strict=False)

In [None]:
dataset = Dataset(ids, files, ppm, transforms.Compose([transforms.Resize((256,256)),transforms.ToTensor(),transforms.Normalize(mean=[0.5231, 0.5180, 0.5115],std=[0.2014, 0.2018, 0.2100]),])) # normalize
loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=12)

In [None]:
model = model.eval()

In [None]:
actual = []
preds = []
for x, y in loader:
    y = y.float()
    x = x.cuda(non_blocking=True)
    y = y.cuda(non_blocking=True)

    x_var = torch.autograd.Variable(x)
    y_var = torch.autograd.Variable(y)

    yhat = model(x_var).squeeze()
    actual += [y_var.cpu().detach().numpy()]
    preds += [yhat.cpu().detach().numpy()]

In [None]:
actual = np.concatenate(actual)

In [None]:
preds = np.concatenate(preds)

In [None]:
err_test = np.array(preds)-np.array(actual)
plt.hist(err_test, bins=20,range=(-300,300))
plt.title("Error b/w prediction and actual PM-2.5 \nfor test set (without outliers)")
plt.xlabel('err')
plt.ylabel('# examples')
plt.savefig('err_full.png')
plt.show()
print(np.mean(err_test))
print(np.std(err_test))

In [None]:
plt.hist(np.abs(err_test), bins=20)
plt.title("Absolute error b/w prediction and actual PM-2.5 \nfor test set (with outliers)")
plt.xlabel('err')
plt.ylabel('# examples')
plt.savefig('err_val.png')
plt.show()
print(np.mean(np.abs(err_test)))
print(np.std(np.abs(err_test)))

In [None]:
# plt.hist(np.abs(err_train), bins=20)
# plt.title("Absolute error b/w prediction and actual PM-2.5 \nfor test set (with outliers)")
# plt.xlabel('err')
# plt.ylabel('# examples')
# plt.savefig('err_val.png')
# plt.show()
# print(np.mean(np.abs(err_train)))
# print(np.std(np.abs(err_train)))

In [None]:
id_china = []
id_delhi = []
for index, row in data.iterrows():
    filename = row['filename']
    if filename.startswith('Shanghai1') or filename.startswith('Shanghai2') or filename.startswith('Shanghai3') or filename.startswith('Beijing'):
        id_china.append(index)
    else:
        id_delhi.append(index)

print(len(id_china))
print(len(id_delhi))

In [None]:
err_china = np.abs(err_test[np.array(id_china)]) #err_val,err?
err_delhi = np.abs(err_test[np.array(id_delhi)])
plt.hist(err_china, bins=20)
plt.title("Absolute error b/w prediction and actual PM-2.5 \nfor single-scene test set (with outliers)")
plt.xlabel('err')
plt.ylabel('# examples')
plt.savefig('err_china.png')
plt.show()
print(np.mean(err_china))
print(np.std(err_china))

In [None]:
plt.hist(err_delhi, bins=20)
plt.title("Absolute error b/w prediction and actual PM-2.5 \nfor multiple-scene delhi test set (with outliers)")
plt.xlabel('err')
plt.ylabel('# examples')
plt.savefig('err_delhi.png')
plt.show()
print(np.mean(err_delhi))
print(np.std(err_delhi))

In [None]:
plt.hist(err_test[np.array(id_delhi)], bins=40)
plt.title("Error b/w prediction and actual PM-2.5 \nfor multiple-scene delhi test set (with outliers)")
plt.xlabel('err')
plt.ylabel('# examples')
plt.savefig('err_delhi.png')
plt.show()
print(np.mean(err_test[np.array(id_delhi)]))
print(np.std(err_test[np.array(id_delhi)]))

In [None]:
actual_delhi = actual[np.array(id_delhi)]

In [None]:
preds_delhi = preds[np.array(id_delhi)]

In [None]:
correct_p = 0
for i in range(len(id_delhi)):
    if((preds_delhi[i]<actual_delhi[i]+25) and (preds_delhi[i]>actual_delhi[i]-25)):
        correct_p+=1
print(correct_p, len(id_delhi), correct_p*1.0/len(id_delhi))

In [None]:
correct_p = 0
for i in range(len(id_delhi)):
    if((preds_delhi[i]<actual_delhi[i]+50) and (preds_delhi[i]>actual_delhi[i]-50)):
        correct_p+=1
print(correct_p, len(id_delhi), correct_p*1.0/len(id_delhi))

## Pollution Map

In [None]:
location_df = pd.read_csv("/scratch/ab9738/pollution_img/govdata/locations.csv")
location_df = location_df.drop(["source"], axis=1)

In [None]:
location_df['Lat-Lon'] = location_df['lat'].apply(str) +','+ location_df['lon'].apply(str)

In [None]:
location_df = location_df.drop(['lat', 'lon'], axis=1)

In [None]:
# Adding UP Sensor Locations
location_df.loc[len(location_df)] = ['SanjayNagar_UPPCP', '28.685382,77.453839']
location_df.loc[len(location_df)] = ['Indirapuram_UPPCP', '28.646233,77.358075']
location_df.loc[len(location_df)] = ['Vasundhara_UPPCP', '28.6603346,77.3572563']
location_df.loc[len(location_df)] = ['Loni_UPPCP', '28.757294,77.278792']
location_df.loc[len(location_df)] = ['NoidaSector62_IMD', '28.6245479,77.3577104']
location_df.loc[len(location_df)] = ['NoidaSector116_UPPCP', '28.56912141,77.3939069']
location_df.loc[len(location_df)] = ['KnowledgeParkV_UPPCP', '28.55856132,77.45445483']
location_df.loc[len(location_df)] = ['KnowledgeParkIII_UPPCP', '28.47250249,77.48179193']
location_df.loc[len(location_df)] = ['NoidaSector1_UPPCP', '28.58966084,77.30998866']
location_df.loc[len(location_df)] = ['NoidaSector125_UPPCP', '28.54492244,77.32281108']

In [None]:
location_df

In [None]:
df_act_preds = data.loc[id_delhi]

In [None]:
df_annot = pd.read_csv('/scratch/ab9738/pollution_img/code/Annotations.csv')

In [None]:
df_annot

In [None]:
df_annot['filename'] = df_annot['filename'].str[:-4]

In [None]:
df_annot = df_annot.set_index('filename')

In [None]:
df_act_preds['pred'] = preds_delhi

In [None]:
df_ap = df_act_preds.set_index('filename')

In [None]:
df_ap

In [None]:
df_cm = df_annot['closest_monitor']

In [None]:
df_cm

In [None]:
df_ap = df_ap.merge(df_cm, on='filename')

In [None]:
df_ap = df_ap.groupby('closest_monitor').mean()

In [None]:
# df_ap.to_csv('pollution_map_input.csv')

In [None]:
abs(df_ap['ppm']-df_ap['pred']).mean()

In [None]:
(abs(df_ap['ppm']-df_ap['pred'])<50).sum()