In [1]:
import haversine as hs
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import LabelEncoder
import geopandas as gpd
import xarray as xr
import torch
from torch_geometric.data import Data, Dataset
from Trainer import Trainer
from wind_utils import get_distance, draw_graph, nearest_neighbors_graph, plot_heatmap, dataset_generation
from GCN_Regression import GCN_RegressionModel
from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler
from functools import partial

data = xr.open_dataset("data/delhi_cpcb_2022.nc")
# data = xr.open_dataset("delhi_cpcb_2022.nc")
data

In [2]:
lat_list = [28.815329,28.647622,28.695381,28.4706914,28.7762,28.7256504,28.5512005,28.656756,28.7500499,28.498571,28.5710274,28.655935,28.5627763,28.6811736,28.628624,28.73282,28.58028,28.588333,28.5918245,28.611281,28.636429,28.684678,28.60909,28.570173,28.822836,28.56789,28.6573814,28.530785,28.623763,28.674045,28.639645,28.639645,28.563262,28.732528,28.6514781,28.5504249,28.710508,28.531346,28.672342,28.699793]

In [3]:
df = data.to_dataframe().reset_index()
wind_data = pd.read_csv('data/wind_temp_shifted.csv')

In [4]:
df = df.dropna(subset = ['PM2.5'])
df = df[df["time"]=="2022-03-01 01:30:00"]
wind_data = wind_data[wind_data['time'] == "2022-03-01 01:30:00"]

In [5]:
df['WS'] = wind_data['u'].mean()
df['WD'] = wind_data['v'].mean()

In [6]:
df.head()

Unnamed: 0,station,time,From Date,To Date,PM2.5,PM10,NO,NO2,NOx,NH3,...,RH,WS,WD,SR,BP,AT,RF,TOT-RF,latitude,longitude
1417,"Alipur, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,70.0,150.0,3.78,43.38,26.15,3.42,...,78.22,42.463141,2.620503,4.55,993.4,11.43,0.0,0.0,28.815329,77.15301
10200,"Anand Vihar, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,52.5,144.0,11.88,30.62,26.02,15.82,...,68.95,42.463141,2.620503,8.95,973.65,15.45,,0.0,28.647622,77.315809
18983,"Ashok Vihar, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,79.0,134.0,7.47,67.17,41.8,50.77,...,71.3,42.463141,2.620503,5.43,987.17,13.82,0.0,0.0,28.695381,77.181665
27766,"Aya Nagar, Delhi - IMD",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,73.94,159.3,22.49,26.28,48.8,,...,,42.463141,2.620503,,,,,0.0,28.470691,77.109936
36549,"Bawana, Delhi - DPCC",2022-03-01 01:30:00,2022-03-01 01:00:00,2022-03-01 02:00:00,135.0,245.0,4.8,20.65,25.43,41.45,...,81.62,42.463141,2.620503,5.7,993.12,12.3,0.0,0.0,28.7762,77.051074


# Tuning for  2 Layers

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN_RegressionModel(nn.Module):

    def __init__(self, input_dim, l1 = 16, l2 = 10):
        super().__init__()
        self.conv1 = GCNConv(input_dim, l1)
        self.conv2 = GCNConv(l1, l2)
        self.linear1 = nn.Linear(l2, l2)
        self.linear2 = nn.Linear(l2, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN_RegressionModel(4)

In [9]:
config = {
    "n": tune.sample_from(lambda _: np.random.randint(5, 20)),
    "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 10)),
    "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 10)),
    "lr": tune.loguniform(1e-4, 1e-1)
}

In [10]:
import random
seed = 42
random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_loop(config, checkpoint_dir=None, data_dir=None):
    
    G = nearest_neighbors_graph(df, config['n'])
    
    node_features, edge_index, y = dataset_generation(G)

    train_mask = torch.zeros(y.size(0), dtype=torch.bool)
    test_mask = torch.zeros(y.size(0), dtype=torch.bool)

    for i, node in enumerate(G.nodes):
        lat = G.nodes[node]['latitude']
        if lat in lat_list[:25]:
            train_mask[i] = True
        else:
            test_mask[i] = True

    data = Data(x=torch.tensor(node_features, dtype=torch.float), edge_index=edge_index, y=y,train_mask=train_mask, test_mask = test_mask)
    
    net = GCN_RegressionModel(4, config["l1"], config["l2"])
    net.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])
    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        net.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    for epoch in range(start_epoch, 10):
        for i in range(2500):
            optimizer.zero_grad()
            out = net(data.x.to(device), data.edge_index.to(device))
            loss = criterion(out[data.train_mask].squeeze(), data.y[data.train_mask].squeeze())
            loss.backward()
            optimizer.step()
        
        out = net(data.x.to(device), data.edge_index.to(device))  # Predicted PM values
        test_rmse = torch.sqrt(torch.mean((out[data.test_mask].squeeze() - data.y[data.test_mask].squeeze())**2))
    
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)
        print('here')
        session.report(
            {"rmse": float(test_rmse.detach())}, checkpoint=checkpoint)
            
    print('Finished Training')

In [11]:
num_samples = 50
max_num_epochs = 10

scheduler = ASHAScheduler(
            metric="rmse",
            mode="min",
            max_t=max_num_epochs,
            grace_period=1,
            reduction_factor=2,)


result = tune.run(
    partial(train_loop),
    resources_per_trial={"cpu": 8},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    verbose = 3)
#     checkpoint_at_end=True)

best_trial = result.get_best_trial("rmse", "min", "last")
print(f"Best trial config: {best_trial.config}")


2023-06-05 15:17:18,847	INFO worker.py:1625 -- Started a local Ray instance.
2023-06-05 15:17:22,237	INFO tune.py:218 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/latest/tune/api/trainable.html



0,1
Current time:,2023-06-05 15:56:19
Running for:,00:38:57.55
Memory:,7.8/15.8 GiB

Trial name,status,loc,lr,iter,total time (s),rmse
train_loop_f8120_00000,TERMINATED,127.0.0.1:19160,0.000226238,10,295.4,31.1784
train_loop_f8120_00001,TERMINATED,127.0.0.1:19160,0.00257563,1,7.13678,27.5129
train_loop_f8120_00002,TERMINATED,127.0.0.1:19160,0.00397063,10,103.65,27.3058
train_loop_f8120_00003,TERMINATED,127.0.0.1:19160,0.0316603,1,11.0924,30.6615
train_loop_f8120_00004,TERMINATED,127.0.0.1:19160,0.00155208,1,9.08981,30.2938
train_loop_f8120_00005,TERMINATED,127.0.0.1:19160,0.00368066,1,13.903,30.4867
train_loop_f8120_00006,TERMINATED,127.0.0.1:19160,0.000276238,1,12.3329,32.4423
train_loop_f8120_00007,TERMINATED,127.0.0.1:19160,0.0124867,4,44.5459,31.5248
train_loop_f8120_00008,TERMINATED,127.0.0.1:19160,0.000165565,1,14.6845,45.7886
train_loop_f8120_00009,TERMINATED,127.0.0.1:19160,0.00117846,2,17.5479,28.4427


Trial name,date,done,hostname,iterations_since_restore,node_ip,pid,rmse,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_loop_f8120_00000,2023-06-05_15-22-31,True,LAPTOP-ETKK6OVI,10,127.0.0.1,19160,31.1784,True,295.4,28.7672,295.4,1685958751,10,f8120_00000
train_loop_f8120_00001,2023-06-05_15-22-38,True,LAPTOP-ETKK6OVI,1,127.0.0.1,19160,27.5129,True,7.13678,7.13678,7.13678,1685958758,1,f8120_00001
train_loop_f8120_00002,2023-06-05_15-24-22,True,LAPTOP-ETKK6OVI,10,127.0.0.1,19160,27.3058,True,103.65,9.93188,103.65,1685958862,10,f8120_00002
train_loop_f8120_00003,2023-06-05_15-24-33,True,LAPTOP-ETKK6OVI,1,127.0.0.1,19160,30.6615,True,11.0924,11.0924,11.0924,1685958873,1,f8120_00003
train_loop_f8120_00004,2023-06-05_15-24-42,True,LAPTOP-ETKK6OVI,1,127.0.0.1,19160,30.2938,True,9.08981,9.08981,9.08981,1685958882,1,f8120_00004
train_loop_f8120_00005,2023-06-05_15-24-56,True,LAPTOP-ETKK6OVI,1,127.0.0.1,19160,30.4867,True,13.903,13.903,13.903,1685958896,1,f8120_00005
train_loop_f8120_00006,2023-06-05_15-25-08,True,LAPTOP-ETKK6OVI,1,127.0.0.1,19160,32.4423,True,12.3329,12.3329,12.3329,1685958908,1,f8120_00006
train_loop_f8120_00007,2023-06-05_15-25-53,True,LAPTOP-ETKK6OVI,4,127.0.0.1,19160,31.5248,True,44.5459,12.0061,44.5459,1685958953,4,f8120_00007
train_loop_f8120_00008,2023-06-05_15-26-08,True,LAPTOP-ETKK6OVI,1,127.0.0.1,19160,45.7886,True,14.6845,14.6845,14.6845,1685958968,1,f8120_00008
train_loop_f8120_00009,2023-06-05_15-26-25,True,LAPTOP-ETKK6OVI,2,127.0.0.1,19160,28.4427,True,17.5479,9.23022,17.5479,1685958985,2,f8120_00009


2023-06-05 15:56:19,851	INFO tune.py:945 -- Total run time: 2337.61 seconds (2337.54 seconds for the tuning loop).


Best trial config: {'n': 10, 'l1': 128, 'l2': 256, 'lr': 0.0016059378180954026}


In [12]:
result.get_best_trial("rmse", "min", "all").config

{'n': 10, 'l1': 128, 'l2': 256, 'lr': 0.0016059378180954026}

# Tuning for 1 Layer

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN_RegressionModel2(nn.Module):

    def __init__(self, input_dim, l1 = 16, l2 = 10):
        super().__init__()
        self.conv1 = GCNConv(input_dim, l1)
#         self.conv2 = GCNConv(l1, l2)
        self.linear1 = nn.Linear(l1, l2)
        self.linear2 = nn.Linear(l2, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
#         x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x


In [14]:
def train_loop2(config, checkpoint_dir=None, data_dir=None):
    
    G = nearest_neighbors_graph(df, config['n'])
    
    node_features, edge_index, y = dataset_generation(G)

    train_mask = torch.zeros(y.size(0), dtype=torch.bool)
    test_mask = torch.zeros(y.size(0), dtype=torch.bool)

    for i, node in enumerate(G.nodes):
        lat = G.nodes[node]['latitude']
        if lat in lat_list[:25]:
            train_mask[i] = True
        else:
            test_mask[i] = True

    data = Data(x=torch.tensor(node_features, dtype=torch.float), edge_index=edge_index, y=y,train_mask=train_mask, test_mask = test_mask)
    
    net = GCN_RegressionModel2(4, config["l1"], config["l2"])
    net.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])
    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        net.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    for epoch in range(start_epoch, 10):
        for i in range(1000):
            optimizer.zero_grad()
            out = net(data.x.to(device), data.edge_index.to(device))
            loss = criterion(out[data.train_mask].squeeze(), data.y[data.train_mask].squeeze())
            loss.backward()
            optimizer.step()
        
        out = net(data.x.to(device), data.edge_index.to(device))  # Predicted PM values
        test_rmse = torch.sqrt(torch.mean((out[data.test_mask].squeeze() - data.y[data.test_mask].squeeze())**2))
    
        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)
        print('here')
        session.report(
            {"rmse": float(test_rmse.detach())}, checkpoint=checkpoint)
            
    print('Finished Training')

In [15]:
num_samples = 50
max_num_epochs = 100

scheduler = ASHAScheduler(
            metric="rmse",
            mode="min",
            max_t=max_num_epochs,
            grace_period=2,
            reduction_factor=2,)


result = tune.run(
    partial(train_loop2),
    resources_per_trial={"cpu": 8},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    verbose = 3)
#     checkpoint_at_end=True)

best_trial = result.get_best_trial("rmse", "min", "last")
print(f"Best trial config: {best_trial.config}")

0,1
Current time:,2023-06-05 16:07:12
Running for:,00:10:51.41
Memory:,7.8/15.8 GiB

Trial name,status,loc,lr,iter,total time (s),rmse
train_loop_69d36_00000,TERMINATED,127.0.0.1:9996,0.0126527,10,76.0417,19.5704
train_loop_69d36_00001,TERMINATED,127.0.0.1:9996,0.00139231,3,5.1596,24.7056
train_loop_69d36_00002,TERMINATED,127.0.0.1:9996,0.00589106,3,19.39,27.6177
train_loop_69d36_00003,TERMINATED,127.0.0.1:9996,0.00362534,3,17.6049,26.6162
train_loop_69d36_00004,TERMINATED,127.0.0.1:9996,0.00103483,10,19.3899,21.0133
train_loop_69d36_00005,TERMINATED,127.0.0.1:9996,0.00319618,3,5.77664,32.226
train_loop_69d36_00006,TERMINATED,127.0.0.1:9996,0.0170418,3,5.46017,26.908
train_loop_69d36_00007,TERMINATED,127.0.0.1:9996,0.000205203,3,13.2804,28.4744
train_loop_69d36_00008,TERMINATED,127.0.0.1:9996,0.00898244,3,4.95997,28.3747
train_loop_69d36_00009,TERMINATED,127.0.0.1:9996,0.000236966,3,7.08001,29.1087


Trial name,date,done,hostname,iterations_since_restore,node_ip,pid,rmse,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_loop_69d36_00000,2023-06-05_15-57-49,True,LAPTOP-ETKK6OVI,10,127.0.0.1,9996,19.5704,True,76.0417,7.67176,76.0417,1685960869,10,69d36_00000
train_loop_69d36_00001,2023-06-05_15-57-55,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,24.7056,True,5.1596,1.61559,5.1596,1685960875,3,69d36_00001
train_loop_69d36_00002,2023-06-05_15-58-14,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,27.6177,True,19.39,6.82002,19.39,1685960894,3,69d36_00002
train_loop_69d36_00003,2023-06-05_15-58-32,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,26.6162,True,17.6049,6.06976,17.6049,1685960912,3,69d36_00003
train_loop_69d36_00004,2023-06-05_15-58-51,True,LAPTOP-ETKK6OVI,10,127.0.0.1,9996,21.0133,True,19.3899,1.82881,19.3899,1685960931,10,69d36_00004
train_loop_69d36_00005,2023-06-05_15-58-57,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,32.226,True,5.77664,1.83511,5.77664,1685960937,3,69d36_00005
train_loop_69d36_00006,2023-06-05_15-59-02,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,26.908,True,5.46017,1.77516,5.46017,1685960942,3,69d36_00006
train_loop_69d36_00007,2023-06-05_15-59-16,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,28.4744,True,13.2804,4.40002,13.2804,1685960956,3,69d36_00007
train_loop_69d36_00008,2023-06-05_15-59-21,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,28.3747,True,4.95997,1.46016,4.95997,1685960961,3,69d36_00008
train_loop_69d36_00009,2023-06-05_15-59-28,True,LAPTOP-ETKK6OVI,3,127.0.0.1,9996,29.1087,True,7.08001,2.60967,7.08001,1685960968,3,69d36_00009


2023-06-05 16:07:12,016	INFO tune.py:945 -- Total run time: 651.44 seconds (651.39 seconds for the tuning loop).


Best trial config: {'n': 13, 'l1': 256, 'l2': 512, 'lr': 0.01265269523435504}
