In [1]:
import haversine as hs
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import LabelEncoder
import geopandas as gpd
import xarray as xr
import torch
from torch_geometric.data import Data, Dataset
from Trainer import Trainer
from utils import get_distance, draw_graph, nearest_neighbors_graph, plot_heatmap, dataset_generation
from GCN_Regression import GCN_RegressionModel
from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler
from functools import partial

data = xr.open_dataset("data/delhi_cpcb_2022.nc")
# data = xr.open_dataset("delhi_cpcb_2022.nc")
data

In [2]:
lat_list = [28.815329,28.647622,28.695381,28.4706914,28.7762,28.7256504,28.5512005,28.656756,28.7500499,28.498571,28.5710274,28.655935,28.5627763,28.6811736,28.628624,28.73282,28.58028,28.588333,28.5918245,28.611281,28.636429,28.684678,28.60909,28.570173,28.822836,28.56789,28.6573814,28.530785,28.623763,28.674045,28.639645,28.639645,28.563262,28.732528,28.6514781,28.5504249,28.710508,28.531346,28.672342,28.699793]

In [3]:
df = data.to_dataframe().reset_index()
wind_data = pd.read_csv('data/wind_temp_shifted.csv')

In [4]:
df = df.dropna(subset = ['PM2.5'])
df = df[df["time"]=="2022-03-01 01:30:00"]

In [5]:
df.head()

Unnamed: 0,station,time,From Date,To Date,PM2.5,PM10,NO,NO2,NOx,NH3,...,RH,WS,WD,SR,BP,AT,RF,TOT-RF,latitude,longitude
1417,"Alipur, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,70.0,150.0,3.78,43.38,26.15,3.42,...,78.22,,139.97,4.55,993.4,11.43,0.0,0.0,28.815329,77.15301
10200,"Anand Vihar, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,52.5,144.0,11.88,30.62,26.02,15.82,...,68.95,0.3,246.25,8.95,973.65,15.45,,0.0,28.647622,77.315809
18983,"Ashok Vihar, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,79.0,134.0,7.47,67.17,41.8,50.77,...,71.3,0.38,206.9,5.43,987.17,13.82,0.0,0.0,28.695381,77.181665
27766,"Aya Nagar, Delhi - IMD",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,73.94,159.3,22.49,26.28,48.8,,...,,,,,,,,0.0,28.470691,77.109936
36549,"Bawana, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,135.0,245.0,4.8,20.65,25.43,41.45,...,81.62,0.62,22.33,5.7,993.12,12.3,0.0,0.0,28.7762,77.051074


# Tuning for  2 Layers

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN_RegressionModel(nn.Module):

    def __init__(self, input_dim, l1 = 16, l2 = 10):
        super().__init__()
        self.conv1 = GCNConv(input_dim, l1)
        self.conv2 = GCNConv(l1, l2)
        self.linear1 = nn.Linear(l2, l2)
        self.linear2 = nn.Linear(l2, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x


In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
config = {
    "n": tune.sample_from(lambda _: np.random.randint(5, 20)),
    "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 10)),
    "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 10)),
    "lr": tune.loguniform(1e-4, 1e-1)
}

In [9]:
import random
seed = 42
random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_loop(config, checkpoint_dir=None, data_dir=None):
    
    G = nearest_neighbors_graph(df, config['n'])
    
    node_features, edge_index, y = dataset_generation(G)

    train_mask = torch.zeros(y.size(0), dtype=torch.bool)
    test_mask = torch.zeros(y.size(0), dtype=torch.bool)

    for i, node in enumerate(G.nodes):
        lat = G.nodes[node]['latitude']
        if lat in lat_list[:25]:
            train_mask[i] = True
        else:
            test_mask[i] = True

    data = Data(x=torch.tensor(node_features, dtype=torch.float), edge_index=edge_index, y=y,train_mask=train_mask, test_mask = test_mask)
    
    net = GCN_RegressionModel(2, config["l1"], config["l2"])
    net.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])
    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        net.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    for epoch in range(start_epoch, 10):
        for i in range(2500):
            optimizer.zero_grad()
            out = net(data.x.to(device), data.edge_index.to(device))
            loss = criterion(out[data.train_mask].squeeze(), data.y[data.train_mask].squeeze())
            loss.backward()
            optimizer.step()
        
        out = net(data.x.to(device), data.edge_index.to(device))  # Predicted PM values
        test_rmse = torch.sqrt(torch.mean((out[data.test_mask].squeeze() - data.y[data.test_mask].squeeze())**2))
    
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)
        print('here')
        session.report(
            {"rmse": float(test_rmse.detach())}, checkpoint=checkpoint)
            
    print('Finished Training')

In [10]:
num_samples = 50
max_num_epochs = 10

scheduler = ASHAScheduler(
            metric="rmse",
            mode="min",
            max_t=max_num_epochs,
            grace_period=1,
            reduction_factor=2,)


result = tune.run(
    partial(train_loop),
    resources_per_trial={"cpu": 8},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    verbose = 3)
#     checkpoint_at_end=True)

best_trial = result.get_best_trial("rmse", "min", "last")
print(f"Best trial config: {best_trial.config}")


2023-06-05 19:32:15,742	INFO worker.py:1625 -- Started a local Ray instance.
2023-06-05 19:32:18,579	INFO tune.py:218 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/latest/tune/api/trainable.html



0,1
Current time:,2023-06-05 20:00:29
Running for:,00:28:11.30
Memory:,10.4/15.8 GiB

Trial name,status,loc,lr,iter,total time (s),rmse
train_loop_95670_00000,TERMINATED,127.0.0.1:21148,0.000133224,10,46.3815,30.5086
train_loop_95670_00001,TERMINATED,127.0.0.1:21148,0.000679588,1,6.75984,33.7842
train_loop_95670_00002,TERMINATED,127.0.0.1:21148,0.000148426,4,22.77,27.0844
train_loop_95670_00003,TERMINATED,127.0.0.1:21148,0.000430219,1,4.60009,32.4295
train_loop_95670_00004,TERMINATED,127.0.0.1:21148,0.00018802,1,8.53836,41.9172
train_loop_95670_00005,TERMINATED,127.0.0.1:21148,0.00590844,1,5.38513,33.5359
train_loop_95670_00006,TERMINATED,127.0.0.1:21148,0.000817655,2,22.9296,29.5712
train_loop_95670_00007,TERMINATED,127.0.0.1:21148,0.00131133,2,12.2516,32.5877
train_loop_95670_00008,TERMINATED,127.0.0.1:21148,0.000172431,1,6.02686,32.0044
train_loop_95670_00009,TERMINATED,127.0.0.1:21148,0.080912,2,26.8913,28.3805


Trial name,date,done,hostname,iterations_since_restore,node_ip,pid,rmse,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_loop_95670_00000,2023-06-05_19-33-10,True,LAPTOP-ETKK6OVI,10,127.0.0.1,21148,30.5086,True,46.3815,4.9996,46.3815,1685973790,10,95670_00000
train_loop_95670_00001,2023-06-05_19-33-17,True,LAPTOP-ETKK6OVI,1,127.0.0.1,21148,33.7842,True,6.75984,6.75984,6.75984,1685973797,1,95670_00001
train_loop_95670_00002,2023-06-05_19-33-39,True,LAPTOP-ETKK6OVI,4,127.0.0.1,21148,27.0844,True,22.77,5.86566,22.77,1685973819,4,95670_00002
train_loop_95670_00003,2023-06-05_19-33-44,True,LAPTOP-ETKK6OVI,1,127.0.0.1,21148,32.4295,True,4.60009,4.60009,4.60009,1685973824,1,95670_00003
train_loop_95670_00004,2023-06-05_19-33-53,True,LAPTOP-ETKK6OVI,1,127.0.0.1,21148,41.9172,True,8.53836,8.53836,8.53836,1685973833,1,95670_00004
train_loop_95670_00005,2023-06-05_19-33-58,True,LAPTOP-ETKK6OVI,1,127.0.0.1,21148,33.5359,True,5.38513,5.38513,5.38513,1685973838,1,95670_00005
train_loop_95670_00006,2023-06-05_19-34-21,True,LAPTOP-ETKK6OVI,2,127.0.0.1,21148,29.5712,True,22.9296,12.0095,22.9296,1685973861,2,95670_00006
train_loop_95670_00007,2023-06-05_19-34-33,True,LAPTOP-ETKK6OVI,2,127.0.0.1,21148,32.5877,True,12.2516,6.01225,12.2516,1685973873,2,95670_00007
train_loop_95670_00008,2023-06-05_19-34-39,True,LAPTOP-ETKK6OVI,1,127.0.0.1,21148,32.0044,True,6.02686,6.02686,6.02686,1685973879,1,95670_00008
train_loop_95670_00009,2023-06-05_19-35-06,True,LAPTOP-ETKK6OVI,2,127.0.0.1,21148,28.3805,True,26.8913,14.7919,26.8913,1685973906,2,95670_00009


2023-06-05 20:00:29,932	INFO tune.py:945 -- Total run time: 1691.35 seconds (1691.29 seconds for the tuning loop).


Best trial config: {'n': 10, 'l1': 128, 'l2': 512, 'lr': 0.00017975298564176873}


In [11]:
result.get_best_trial("rmse", "min", "all").config

{'n': 13, 'l1': 512, 'l2': 8, 'lr': 0.0002931834478118096}

# Tuning for 1 Layer

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN_RegressionModel2(nn.Module):

    def __init__(self, input_dim, l1 = 16, l2 = 10):
        super().__init__()
        self.conv1 = GCNConv(input_dim, l1)
#         self.conv2 = GCNConv(l1, l2)
        self.linear1 = nn.Linear(l1, l2)
        self.linear2 = nn.Linear(l2, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
#         x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x


In [15]:
def train_loop2(config, checkpoint_dir=None, data_dir=None):
    
    G = nearest_neighbors_graph(df, config['n'])
    
    node_features, edge_index, y = dataset_generation(G)

    train_mask = torch.zeros(y.size(0), dtype=torch.bool)
    test_mask = torch.zeros(y.size(0), dtype=torch.bool)

    for i, node in enumerate(G.nodes):
        lat = G.nodes[node]['latitude']
        if lat in lat_list[:25]:
            train_mask[i] = True
        else:
            test_mask[i] = True

    data = Data(x=torch.tensor(node_features, dtype=torch.float), edge_index=edge_index, y=y,train_mask=train_mask, test_mask = test_mask)
    
    net = GCN_RegressionModel2(2, config["l1"], config["l2"])
    net.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])
    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        net.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    for epoch in range(start_epoch, 10):
        for i in range(1000):
            optimizer.zero_grad()
            out = net(data.x.to(device), data.edge_index.to(device))
            loss = criterion(out[data.train_mask].squeeze(), data.y[data.train_mask].squeeze())
            loss.backward()
            optimizer.step()
        
        out = net(data.x.to(device), data.edge_index.to(device))  # Predicted PM values
        test_rmse = torch.sqrt(torch.mean((out[data.test_mask].squeeze() - data.y[data.test_mask].squeeze())**2))
    
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)
        print('here')
        session.report(
            {"rmse": float(test_rmse.detach())}, checkpoint=checkpoint)
            
    print('Finished Training')

In [16]:
num_samples = 50
max_num_epochs = 100

scheduler = ASHAScheduler(
            metric="rmse",
            mode="min",
            max_t=max_num_epochs,
            grace_period=2,
            reduction_factor=2,)


result = tune.run(
    partial(train_loop2),
    resources_per_trial={"cpu": 8},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    verbose = 3)
#     checkpoint_at_end=True)

best_trial = result.get_best_trial("rmse", "min", "last")
print(f"Best trial config: {best_trial.config}")

0,1
Current time:,2023-06-05 20:19:11
Running for:,00:06:22.99
Memory:,10.0/15.8 GiB

Trial name,status,loc,lr,iter,total time (s),rmse
train_loop2_3d979_00000,TERMINATED,127.0.0.1:12244,0.0011264,10,33.8142,26.0516
train_loop2_3d979_00001,TERMINATED,127.0.0.1:12244,0.00036204,2,2.80932,26.6832
train_loop2_3d979_00002,TERMINATED,127.0.0.1:12244,0.000335769,2,10.2279,29.0955
train_loop2_3d979_00003,TERMINATED,127.0.0.1:12244,0.000419439,4,5.38984,28.7977
train_loop2_3d979_00004,TERMINATED,127.0.0.1:12244,0.00590596,2,3.29396,32.1902
train_loop2_3d979_00005,TERMINATED,127.0.0.1:12244,0.0361084,2,3.05545,28.3722
train_loop2_3d979_00006,TERMINATED,127.0.0.1:12244,0.000557707,2,2.66027,31.5534
train_loop2_3d979_00007,TERMINATED,127.0.0.1:12244,0.0383769,4,6.07953,27.2232
train_loop2_3d979_00008,TERMINATED,127.0.0.1:12244,0.00108265,4,5.1572,29.2273
train_loop2_3d979_00009,TERMINATED,127.0.0.1:12244,0.0860729,4,6.89985,28.3803


Trial name,date,done,hostname,iterations_since_restore,node_ip,pid,rmse,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_loop2_3d979_00000,2023-06-05_20-13-29,True,LAPTOP-ETKK6OVI,10,127.0.0.1,12244,26.0516,True,33.8142,3.37731,33.8142,1685976209,10,3d979_00000
train_loop2_3d979_00001,2023-06-05_20-13-31,True,LAPTOP-ETKK6OVI,2,127.0.0.1,12244,26.6832,True,2.80932,1.34557,2.80932,1685976211,2,3d979_00001
train_loop2_3d979_00002,2023-06-05_20-13-42,True,LAPTOP-ETKK6OVI,2,127.0.0.1,12244,29.0955,True,10.2279,5.41126,10.2279,1685976222,2,3d979_00002
train_loop2_3d979_00003,2023-06-05_20-13-47,True,LAPTOP-ETKK6OVI,4,127.0.0.1,12244,28.7977,True,5.38984,1.35007,5.38984,1685976227,4,3d979_00003
train_loop2_3d979_00004,2023-06-05_20-13-50,True,LAPTOP-ETKK6OVI,2,127.0.0.1,12244,32.1902,True,3.29396,1.58154,3.29396,1685976230,2,3d979_00004
train_loop2_3d979_00005,2023-06-05_20-13-54,True,LAPTOP-ETKK6OVI,2,127.0.0.1,12244,28.3722,True,3.05545,1.46892,3.05545,1685976234,2,3d979_00005
train_loop2_3d979_00006,2023-06-05_20-13-56,True,LAPTOP-ETKK6OVI,2,127.0.0.1,12244,31.5534,True,2.66027,1.22893,2.66027,1685976236,2,3d979_00006
train_loop2_3d979_00007,2023-06-05_20-14-02,True,LAPTOP-ETKK6OVI,4,127.0.0.1,12244,27.2232,True,6.07953,1.45798,6.07953,1685976242,4,3d979_00007
train_loop2_3d979_00008,2023-06-05_20-14-07,True,LAPTOP-ETKK6OVI,4,127.0.0.1,12244,29.2273,True,5.1572,1.19676,5.1572,1685976247,4,3d979_00008
train_loop2_3d979_00009,2023-06-05_20-14-14,True,LAPTOP-ETKK6OVI,4,127.0.0.1,12244,28.3803,True,6.89985,1.69991,6.89985,1685976254,4,3d979_00009


2023-06-05 20:19:11,274	INFO tune.py:945 -- Total run time: 383.02 seconds (382.97 seconds for the tuning loop).


Best trial config: {'n': 16, 'l1': 512, 'l2': 64, 'lr': 0.00036024527818624805}
