This file was created for running pytorch learning at cluster without GPU.

In [None]:
# SECTION: Define-GNN
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
import torch.optim as optim
import time

class simple_gnn_gcn(nn.Module):
    def __init__(self,  hidden_channels=64):
        super().__init__()
        self.conv1 = GraphConv(1, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, 1)
    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weights
        x = self.conv1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = self.conv2(x, edge_index, edge_weight)
        x = torch.sigmoid(x)  # Outputs between 0-1
        return x  # [num_nodes]
    


model = simple_gnn_gcn(hidden_channels=72).to('cuda' if torch.cuda.is_available() else 'cpu')
loss_fn = nn.MSELoss()                                                # Loss function for regression
optimizer = optim.Adam(model.parameters(), lr=0.01) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
import os
import glob
from tqdm import tqdm
from torch_geometric.loader import DataLoader
import time

# Declare data paths
directory = '/mnt/data/sur/users/mrivera/Train-sims/4379fd40-9f0a/batching'
train_files = glob.glob(f'{directory}/TrainBatch_*.pt')

# Testing lines
path = train_files[1]
loader = DataLoader(data, batch_size=32, shuffle=False)

epochs = 10
model.train()
# Empty lists for predictions, targets, loss at each epoch
x_train, y_train, loss_epochs  = [], [], []
total_elapsed = 0
for iter in range(1, epochs+1):
    start = time.time()
    total_loss = 0
    for path in dat_batched:
        data = torch.load(path, weights_only=False)           
        loader = DataLoader(data, batch_size=200, shuffle=True)
        for batch in loader:
            optimizer.zero_grad()
            out = model(batch)
            loss = loss_fn(out, batch.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()   # Accumulate loss
            # print(data)
            if iter == epochs:
                x_train.append(out.cpu().detach().numpy()) 
                y_train.append(batch.y.cpu().detach().numpy())
    loss_epochs.append(round(total_loss))
    elapsed = time.time() - start
    total_elapsed += elapsed
    print(f"Epoch {iter}: Loss = {total_loss:.4f},  Elapsed time: {elapsed:.2f}")
    

print(f'>> the total elapsed time with {epochs} epochs is {total_elapsed/60:.2f} minutes')      


  



## Plotting


In [None]:
# Section: Plotting
import matplotlib.pyplot as plt
import numpy as np

def loss_plotter(loss_epochs = None, epochs = None):
    # After collecting your data
    y = np.round(loss_epochs, 3)
    x = list(range(0, epochs))
    # Create scatter plot
    fig = plt.figure(figsize=(8, 8))
    plt.plot(x, y, alpha=0.5)
    # Add perfect prediction line (y=x)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Loss over epochs')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    ymin = min(y) 
    plt.ylim(ymin, max(y))
    plt.xlim(0, max(x))
    return fig

fig = loss_plotter(loss_epochs, epochs)
fig.savefig('/mnt/data/sur/users/mrivera/Plots/4379fd40-9f0a-loss.png', dpi=150, bbox_inches='tight')

In [None]:
def preds_plotter(preds = None, tgts = None, path = None ):
    # After collecting your data
    preds = np.concatenate(preds)  # predictions
    tgts = np.concatenate(tgts)  # targets
    # Create scatter plot
    fig = plt.figure(figsize=(8, 8))
    plt.scatter(preds, tgts, alpha=0.5)
   # Add perfect prediction line (y=x)
    plt.plot([0,  np.max(tgts)], [0,  np.max(tgts)], 'r--', label='Perfect prediction')
    plt.xlabel('Predictions')
    plt.ylabel('True Values')
    plt.title('Predictions vs True Values')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.ylim(0, max(tgts))
    plt.xlim(0, max(tgts))
    plt.tight_layout()
    return fig