In [1]:
import numpy 
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch_geometric.utils import remove_self_loops, add_self_loops, add_remaining_self_loops, degree
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from math import sqrt
from sklearn.metrics import r2_score

In [2]:
%run preprocessing.ipynb
%run graph.ipynb
data_ns, adj_matrix_ns, conv_layer_ns = create_graph_pyg_ns(master_df, num_neighbors=4, hidden_channels=64)
data_sw, adj_matrix_sw, conv_layer_sw = create_graph_pyg_sw(master_df, hidden_channels=64)

In [3]:
# Graph Convolution Layer
class GraphConvolution(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GraphConvolution, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = nn.Parameter(torch.Tensor(input_dim, output_dim))
        self.bias = nn.Parameter(torch.Tensor(output_dim))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)
        nn.init.zeros_(self.bias)

    def forward(self, x, adj):
        support = torch.matmul(x, self.weight)
        output = torch.sparse.mm(adj, support)
        output = output + self.bias
        return F.relu(output)

# GCN Model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolution(input_dim, hidden_dim)
        self.gc2 = GraphConvolution(hidden_dim, output_dim)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x, adj):
        x = self.gc1(x, adj)
        x = self.dropout(x)
        x = self.gc2(x, adj)
        return x

In [4]:
def rmse(predictions, targets):
    return sqrt(((predictions - targets) ** 2).mean().item())

def train_gcn_model(model, data, adj_matrix, epochs=150, lr=0.01, accumulation_steps=5, clip_value=2,stop_loss=1.5):
    optimizer = Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    mean = data.x.mean(dim=0)
    std = data.x.std(dim=0)
    data.x = (data.x - mean) / std

    losses = []
    rmses =[]
    r2_scores = []

    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        accumulated_loss = 0.0

        for i in range(accumulation_steps):
            output = model(data.x, adj_matrix)
            loss = criterion(output.view(-1), data.y)
            loss.backward()
            accumulated_loss += loss.item() / accumulation_steps

        nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        optimizer.step()

        losses.append(accumulated_loss)

        if accumulated_loss <= stop_loss:
            print(f"Early stopping as accumulated loss went below stop loss")
            print(f"Epoch {epoch + 1},Loss : {accumulated_loss}")
            break

        rmse_val = rmse(output.view(-1), data.y)
        rmses.append(rmse_val)
        r2_val = r2_score(data.y.cpu().numpy(), output.view(-1).detach().cpu().numpy())
        r2_scores.append(r2_val)
        print(f'Epoch {epoch + 1}, Loss: {accumulated_loss}, RMSE: {rmse_val}, R^2: {r2_val}')
    
    return rmses, losses, r2_scores

In [5]:
gcn_model = GCN(input_dim=data_ns.x.size(1), hidden_dim=32, output_dim=1)
rmses, losses, r2_scores = train_gcn_model(gcn_model, data_ns, adj_matrix_ns, epochs=200, lr=0.001, accumulation_steps=10, clip_value=1, stop_loss=1)

plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss')
plt.title('Training Loss Per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(rmses, label='RMSE')
plt.title('RMSE Per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(r2_scores, label='R^2')
plt.title('R^2 Score Per Epoch')
plt.xlabel('Epoch')
plt.ylabel('R2')
plt.legend()
plt.grid(True)
plt.show()

Epoch 1, Loss: 126031433.60000001, RMSE: 11129.39926500977, R^2: -123863532.73925523
Epoch 2, Loss: 103810966.4, RMSE: 9868.806614783776, R^2: -97393342.47058243
Epoch 3, Loss: 89458717.19999999, RMSE: 10436.17247845205, R^2: -108913692.7894746
Epoch 4, Loss: 63259080.39999999, RMSE: 9718.425386861803, R^2: -94447789.10219142
Epoch 5, Loss: 54885030.800000004, RMSE: 7527.532929187358, R^2: -56663756.63666134
Epoch 6, Loss: 37835334.800000004, RMSE: 5597.287021405996, R^2: -31329624.523138784
Epoch 7, Loss: 34560534.8, RMSE: 5658.482305353618, R^2: -32018424.443160415
Epoch 8, Loss: 28036164.1, RMSE: 6476.86652633818, R^2: -41949804.532508664


In [None]:
gcn_model = GCN(input_dim=data_sw.x.size(1), hidden_dim=32, output_dim=1)
rmses, losses, r2_scores = train_gcn_model(gcn_model, data_sw, adj_matrix_sw, epochs=200, lr=0.001, accumulation_steps=10, clip_value=1, stop_loss=1)

plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss')
plt.title('Training Loss Per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(rmses, label='RMSE')
plt.title('RMSE Per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(r2_scores, label='R^2')
plt.title('R^2 Score Per Epoch')
plt.xlabel('Epoch')
plt.ylabel('R2')
plt.legend()
plt.grid(True)
plt.show()

Epoch 1, Loss: 24435163.200000003, RMSE: 4801.390007070869, R^2: -23053343.490012545
Epoch 2, Loss: 18501757.499999996, RMSE: 4202.342679982203, R^2: -17659682.833144587
Epoch 3, Loss: 12964521.900000002, RMSE: 3380.2252883498754, R^2: -11425921.274605813
Epoch 4, Loss: 9233358.600000001, RMSE: 3185.977087174357, R^2: -10150449.137523195
Epoch 5, Loss: 6885460.149999999, RMSE: 2679.938432128619, R^2: -7182069.018405197
Epoch 6, Loss: 4679807.600000001, RMSE: 2299.239439466886, R^2: -5286501.423481141
Epoch 7, Loss: 3370878.25, RMSE: 1910.1898465859356, R^2: -3648824.4735843358
Epoch 8, Loss: 2199411.5749999997, RMSE: 1602.7562977570858, R^2: -2568826.682216374
Epoch 9, Loss: 1484235.1187500001, RMSE: 1188.7692690341553, R^2: -1413171.3790570656
