In [1]:
import haversine as hs
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import LabelEncoder
import geopandas as gpd
import xarray as xr
import torch
from torch_geometric.data import Data, Dataset
from Trainer import Trainer
from wind_utils import get_distance, draw_graph, nearest_neighbors_graph, plot_heatmap, dataset_generation
from GCN_Regression import GCN_RegressionModel
from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler
from functools import partial

data = xr.open_dataset("data/delhi_cpcb_2022.nc")
# data = xr.open_dataset("delhi_cpcb_2022.nc")
data

In [2]:
lat_list = [28.815329,28.647622,28.695381,28.4706914,28.7762,28.7256504,28.5512005,28.656756,28.7500499,28.498571,28.5710274,28.655935,28.5627763,28.6811736,28.628624,28.73282,28.58028,28.588333,28.5918245,28.611281,28.636429,28.684678,28.60909,28.570173,28.822836,28.56789,28.6573814,28.530785,28.623763,28.674045,28.639645,28.639645,28.563262,28.732528,28.6514781,28.5504249,28.710508,28.531346,28.672342,28.699793]

In [3]:
df = data.to_dataframe().reset_index()
wind_data = pd.read_csv('data/wind_temp2.csv')

In [4]:
df = df.dropna(subset = ['PM2.5'])
df = df[df["time"]=="2022-03-01 01:30:00"]
wind_data = wind_data[wind_data['time'] == "2022-03-01 01:30:00"]

In [5]:
df['WS'] = wind_data['u10'].mean()
df['WD'] = wind_data['v10'].mean()

In [6]:
df.head()

Unnamed: 0,station,time,From Date,To Date,PM2.5,PM10,NO,NO2,NOx,NH3,...,RH,WS,WD,SR,BP,AT,RF,TOT-RF,latitude,longitude
1417,"Alipur, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,70.0,150.0,3.78,43.38,26.15,3.42,...,78.22,1.766641,-0.220913,4.55,993.4,11.43,0.0,0.0,28.815329,77.15301
10200,"Anand Vihar, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,52.5,144.0,11.88,30.62,26.02,15.82,...,68.95,1.766641,-0.220913,8.95,973.65,15.45,,0.0,28.647622,77.315809
18983,"Ashok Vihar, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,79.0,134.0,7.47,67.17,41.8,50.77,...,71.3,1.766641,-0.220913,5.43,987.17,13.82,0.0,0.0,28.695381,77.181665
27766,"Aya Nagar, Delhi - IMD",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,73.94,159.3,22.49,26.28,48.8,,...,,1.766641,-0.220913,,,,,0.0,28.470691,77.109936
36549,"Bawana, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,135.0,245.0,4.8,20.65,25.43,41.45,...,81.62,1.766641,-0.220913,5.7,993.12,12.3,0.0,0.0,28.7762,77.051074


# Tuning for  2 Layers

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN_RegressionModel(nn.Module):

    def __init__(self, input_dim, l1 = 16, l2 = 10):
        super().__init__()
        self.conv1 = GCNConv(input_dim, l1)
        self.conv2 = GCNConv(l1, l2)
        self.linear1 = nn.Linear(l2, l2)
        self.linear2 = nn.Linear(l2, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN_RegressionModel(4)

In [9]:
config = {
    "n": tune.sample_from(lambda _: np.random.randint(5, 20)),
    "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 10)),
    "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 10)),
    "lr": tune.loguniform(1e-4, 1e-1)
}

In [10]:
import random
seed = 42
random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_loop(config, checkpoint_dir=None, data_dir=None):
    
    G = nearest_neighbors_graph(df, config['n'])
    
    node_features, edge_index, y = dataset_generation(G)

    train_mask = torch.zeros(y.size(0), dtype=torch.bool)
    test_mask = torch.zeros(y.size(0), dtype=torch.bool)

    for i, node in enumerate(G.nodes):
        lat = G.nodes[node]['latitude']
        if lat in lat_list[:25]:
            train_mask[i] = True
        else:
            test_mask[i] = True

    data = Data(x=torch.tensor(node_features, dtype=torch.float), edge_index=edge_index, y=y,train_mask=train_mask, test_mask = test_mask)
    
    net = GCN_RegressionModel(4, config["l1"], config["l2"])
    net.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])
    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        net.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    for epoch in range(start_epoch, 10):
        for i in range(2500):
            optimizer.zero_grad()
            out = net(data.x.to(device), data.edge_index.to(device))
            loss = criterion(out[data.train_mask].squeeze(), data.y[data.train_mask].squeeze())
            loss.backward()
            optimizer.step()
        
        out = net(data.x.to(device), data.edge_index.to(device))  # Predicted PM values
        test_rmse = torch.sqrt(torch.mean((out[data.test_mask].squeeze() - data.y[data.test_mask].squeeze())**2))
    
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)
        print('here')
        session.report(
            {"rmse": float(test_rmse.detach())}, checkpoint=checkpoint)
            
    print('Finished Training')

In [11]:
num_samples = 50
max_num_epochs = 10

scheduler = ASHAScheduler(
            metric="rmse",
            mode="min",
            max_t=max_num_epochs,
            grace_period=1,
            reduction_factor=2,)


result = tune.run(
    partial(train_loop),
    resources_per_trial={"cpu": 8},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    verbose = 3)
#     checkpoint_at_end=True)

best_trial = result.get_best_trial("rmse", "min", "last")
print(f"Best trial config: {best_trial.config}")


2023-06-05 20:46:36,549	INFO worker.py:1625 -- Started a local Ray instance.
2023-06-05 20:46:39,551	INFO tune.py:218 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/latest/tune/api/trainable.html



0,1
Current time:,2023-06-05 21:06:29
Running for:,00:19:50.01
Memory:,10.4/15.8 GiB

Trial name,status,loc,lr,iter,total time (s),rmse
train_loop_f8588_00000,TERMINATED,127.0.0.1:10448,0.000257694,10,64.5809,24.7151
train_loop_f8588_00001,TERMINATED,127.0.0.1:10448,0.0101725,1,4.39024,29.5963
train_loop_f8588_00002,TERMINATED,127.0.0.1:10448,0.0250316,1,4.65556,28.3805
train_loop_f8588_00003,TERMINATED,127.0.0.1:10448,0.000129179,2,70.7228,31.3623
train_loop_f8588_00004,TERMINATED,127.0.0.1:10448,0.00947224,2,10.6507,29.8933
train_loop_f8588_00005,TERMINATED,127.0.0.1:10448,0.00835782,1,6.50361,32.9139
train_loop_f8588_00006,TERMINATED,127.0.0.1:10448,0.0279088,1,6.86418,30.6349
train_loop_f8588_00007,TERMINATED,127.0.0.1:10448,0.00859046,1,4.38815,28.6719
train_loop_f8588_00008,TERMINATED,127.0.0.1:10448,0.00216307,1,4.32962,36.8192
train_loop_f8588_00009,TERMINATED,127.0.0.1:10448,0.0173948,4,29.878,28.381


Trial name,date,done,hostname,iterations_since_restore,node_ip,pid,rmse,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_loop_f8588_00000,2023-06-05_20-47-51,True,LAPTOP-ETKK6OVI,10,127.0.0.1,10448,24.7151,True,64.5809,6.30431,64.5809,1685978271,10,f8588_00000
train_loop_f8588_00001,2023-06-05_20-47-55,True,LAPTOP-ETKK6OVI,1,127.0.0.1,10448,29.5963,True,4.39024,4.39024,4.39024,1685978275,1,f8588_00001
train_loop_f8588_00002,2023-06-05_20-48-00,True,LAPTOP-ETKK6OVI,1,127.0.0.1,10448,28.3805,True,4.65556,4.65556,4.65556,1685978280,1,f8588_00002
train_loop_f8588_00003,2023-06-05_20-49-11,True,LAPTOP-ETKK6OVI,2,127.0.0.1,10448,31.3623,True,70.7228,36.9323,70.7228,1685978351,2,f8588_00003
train_loop_f8588_00004,2023-06-05_20-49-21,True,LAPTOP-ETKK6OVI,2,127.0.0.1,10448,29.8933,True,10.6507,5.40652,10.6507,1685978361,2,f8588_00004
train_loop_f8588_00005,2023-06-05_20-49-28,True,LAPTOP-ETKK6OVI,1,127.0.0.1,10448,32.9139,True,6.50361,6.50361,6.50361,1685978368,1,f8588_00005
train_loop_f8588_00006,2023-06-05_20-49-35,True,LAPTOP-ETKK6OVI,1,127.0.0.1,10448,30.6349,True,6.86418,6.86418,6.86418,1685978375,1,f8588_00006
train_loop_f8588_00007,2023-06-05_20-49-39,True,LAPTOP-ETKK6OVI,1,127.0.0.1,10448,28.6719,True,4.38815,4.38815,4.38815,1685978379,1,f8588_00007
train_loop_f8588_00008,2023-06-05_20-49-44,True,LAPTOP-ETKK6OVI,1,127.0.0.1,10448,36.8192,True,4.32962,4.32962,4.32962,1685978384,1,f8588_00008
train_loop_f8588_00009,2023-06-05_20-50-14,True,LAPTOP-ETKK6OVI,4,127.0.0.1,10448,28.381,True,29.878,7.54454,29.878,1685978414,4,f8588_00009


2023-06-05 21:06:29,631	INFO tune.py:945 -- Total run time: 1190.08 seconds (1190.00 seconds for the tuning loop).


Best trial config: {'n': 13, 'l1': 4, 'l2': 32, 'lr': 0.00048695683139523275}


In [12]:
result.get_best_trial("rmse", "min", "all").config

{'n': 16, 'l1': 8, 'l2': 256, 'lr': 0.0037884634983166134}

# Tuning for 1 Layer

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN_RegressionModel2(nn.Module):

    def __init__(self, input_dim, l1 = 16, l2 = 10):
        super().__init__()
        self.conv1 = GCNConv(input_dim, l1)
#         self.conv2 = GCNConv(l1, l2)
        self.linear1 = nn.Linear(l1, l2)
        self.linear2 = nn.Linear(l2, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
#         x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x


In [14]:
def train_loop2(config, checkpoint_dir=None, data_dir=None):
    
    G = nearest_neighbors_graph(df, config['n'])
    
    node_features, edge_index, y = dataset_generation(G)

    train_mask = torch.zeros(y.size(0), dtype=torch.bool)
    test_mask = torch.zeros(y.size(0), dtype=torch.bool)

    for i, node in enumerate(G.nodes):
        lat = G.nodes[node]['latitude']
        if lat in lat_list[:25]:
            train_mask[i] = True
        else:
            test_mask[i] = True

    data = Data(x=torch.tensor(node_features, dtype=torch.float), edge_index=edge_index, y=y,train_mask=train_mask, test_mask = test_mask)
    
    net = GCN_RegressionModel2(4, config["l1"], config["l2"])
    net.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])
    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        net.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    for epoch in range(start_epoch, 10):
        for i in range(1000):
            optimizer.zero_grad()
            out = net(data.x.to(device), data.edge_index.to(device))
            loss = criterion(out[data.train_mask].squeeze(), data.y[data.train_mask].squeeze())
            loss.backward()
            optimizer.step()
        
        out = net(data.x.to(device), data.edge_index.to(device))  # Predicted PM values
        test_rmse = torch.sqrt(torch.mean((out[data.test_mask].squeeze() - data.y[data.test_mask].squeeze())**2))
    
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)
        print('here')
        session.report(
            {"rmse": float(test_rmse.detach())}, checkpoint=checkpoint)
            
    print('Finished Training')

In [15]:
num_samples = 50
max_num_epochs = 100

scheduler = ASHAScheduler(
            metric="rmse",
            mode="min",
            max_t=max_num_epochs,
            grace_period=2,
            reduction_factor=2,)


result = tune.run(
    partial(train_loop2),
    resources_per_trial={"cpu": 8},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    verbose = 3)
#     checkpoint_at_end=True)

best_trial = result.get_best_trial("rmse", "min", "last")
print(f"Best trial config: {best_trial.config}")

0,1
Current time:,2023-06-05 21:14:44
Running for:,00:08:14.01
Memory:,10.4/15.8 GiB

Trial name,status,loc,lr,iter,total time (s),rmse
train_loop2_bdf62_00000,TERMINATED,127.0.0.1:25208,0.000199717,10,11.782,31.0411
train_loop2_bdf62_00001,TERMINATED,127.0.0.1:25208,0.0526293,2,17.0599,31.6768
train_loop2_bdf62_00002,TERMINATED,127.0.0.1:25208,0.0524774,10,16.4313,29.2863
train_loop2_bdf62_00003,TERMINATED,127.0.0.1:25208,0.0404352,10,14.3765,29.3121
train_loop2_bdf62_00004,TERMINATED,127.0.0.1:25208,0.000183697,2,3.13088,31.8781
train_loop2_bdf62_00005,TERMINATED,127.0.0.1:25208,0.0119657,2,3.24994,60.9741
train_loop2_bdf62_00006,TERMINATED,127.0.0.1:25208,0.000485311,10,40.076,28.0375
train_loop2_bdf62_00007,TERMINATED,127.0.0.1:25208,0.000929457,2,2.4785,31.7237
train_loop2_bdf62_00008,TERMINATED,127.0.0.1:25208,0.00154485,2,2.39147,31.5682
train_loop2_bdf62_00009,TERMINATED,127.0.0.1:25208,0.00947355,4,9.12,29.5985


Trial name,date,done,hostname,iterations_since_restore,node_ip,pid,rmse,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_loop2_bdf62_00000,2023-06-05_21-06-48,True,LAPTOP-ETKK6OVI,10,127.0.0.1,25208,31.0411,True,11.782,1.14175,11.782,1685979408,10,bdf62_00000
train_loop2_bdf62_00001,2023-06-05_21-07-05,True,LAPTOP-ETKK6OVI,2,127.0.0.1,25208,31.6768,True,17.0599,9.94988,17.0599,1685979425,2,bdf62_00001
train_loop2_bdf62_00002,2023-06-05_21-07-21,True,LAPTOP-ETKK6OVI,10,127.0.0.1,25208,29.2863,True,16.4313,1.84051,16.4313,1685979441,10,bdf62_00002
train_loop2_bdf62_00003,2023-06-05_21-07-36,True,LAPTOP-ETKK6OVI,10,127.0.0.1,25208,29.3121,True,14.3765,1.43022,14.3765,1685979456,10,bdf62_00003
train_loop2_bdf62_00004,2023-06-05_21-07-39,True,LAPTOP-ETKK6OVI,2,127.0.0.1,25208,31.8781,True,3.13088,1.56094,3.13088,1685979459,2,bdf62_00004
train_loop2_bdf62_00005,2023-06-05_21-07-42,True,LAPTOP-ETKK6OVI,2,127.0.0.1,25208,60.9741,True,3.24994,1.66395,3.24994,1685979462,2,bdf62_00005
train_loop2_bdf62_00006,2023-06-05_21-08-22,True,LAPTOP-ETKK6OVI,10,127.0.0.1,25208,28.0375,True,40.076,3.38603,40.076,1685979502,10,bdf62_00006
train_loop2_bdf62_00007,2023-06-05_21-08-25,True,LAPTOP-ETKK6OVI,2,127.0.0.1,25208,31.7237,True,2.4785,1.16,2.4785,1685979505,2,bdf62_00007
train_loop2_bdf62_00008,2023-06-05_21-08-27,True,LAPTOP-ETKK6OVI,2,127.0.0.1,25208,31.5682,True,2.39147,1.13009,2.39147,1685979507,2,bdf62_00008
train_loop2_bdf62_00009,2023-06-05_21-08-36,True,LAPTOP-ETKK6OVI,4,127.0.0.1,25208,29.5985,True,9.12,2.53998,9.12,1685979516,4,bdf62_00009


2023-06-05 21:14:44,137	INFO tune.py:945 -- Total run time: 494.05 seconds (493.99 seconds for the tuning loop).


Best trial config: {'n': 16, 'l1': 512, 'l2': 64, 'lr': 0.0030450193677180426}
