In [1]:
import sys
sys.path.append("../scripts")


In [None]:
import os
from models import *
from tg_functions import *
import pickle
import torch
import wandb

# --- Configurations ---
epochs = int(os.getenv("EPOCHS", 50000))
learning_rate = float(os.getenv("LEARNING_RATE", 0.0001))
hidden_c = int(os.getenv("HIDDEN_C", 300))
random_seed = int(os.getenv("RANDOM_SEED", 100))
bins = [try_int(i) for i in os.getenv("BINS", "400 800 1300 2100 3000 3700 4700 7020 9660").split()]
bins = 'regression'

num_layers = int(os.getenv("NUM_LAYERS", 0))
nh = int(os.getenv("NUM_HEADS", 1))
use_gat = try_int(os.getenv("GAT", 1))
api_key = os.getenv("API_KEY", None)
graph_num = os.getenv("GRAPH_NUM", 29)
wandb_entity = os.getenv("WANDB_USERNAME", "christian-hugo-rasmussen-it-universitetet-i-k-benhavn")
project_name = os.getenv("PROJECT_NAME", "test")

if bins[0] == 'REGRESSION':
    bins = 'regression'
if use_gat in[0, 1]:
    use_gat = bool(use_gat)

# --- WandB Initialization ---
# wandb.login(key=api_key)
# run = wandb.init(
#     project=project_name,
#     entity=wandb_entity,
#     config={
#         "epochs": epochs,
#         "learning_rate": learning_rate,
#         "hidden_c": hidden_c,
#         "random_seed": random_seed,
#         "bins": bins,
#         "num_layers": num_layers,
#         "num_heads": nh,
#         "gat": use_gat,
#         "graph_num": graph_num,
#     },
#     settings=wandb.Settings(init_timeout=300)
# )


In [None]:
# --- Device Setup ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device}: {torch.cuda.get_device_name(0) if device.type == 'cuda' else 'CPU'}", flush=True)

if bins != 'regression':
    bins = torch.tensor(bins, device=device)

# --- Load Graph Data ---
with open(f'../data/graphs/{graph_num}/linegraph_tg.pkl', 'rb') as f:
    data = pickle.load(f)

data.edge_index = data.edge_index.contiguous()
data.x = data.x.contiguous()
data.y = data.y.contiguous()
print(data.x.shape, data.edge_index.shape, data.y.shape, flush=True)
    
# --- Model Instantiation ---
model = GAT(hidden_c, num_layers, random_seed, bins, data, nh).to(device) if use_gat else GCN(hidden_c, num_layers, random_seed, bins, data).to(device)

if use_gat == 'MLP':
    model = MLP(hidden_c, num_layers, random_seed, bins, data, nh).to(device)

print(model, flush=True)
# torch.save(model, f"../data/graphs/{graph_num}/models/{run.name}.pt")
torch.save(model, f"../data/graphs/{graph_num}/models/test_run.pt")

# Move data to device
data.x = data.x.to(device)
data.edge_index = data.edge_index.to(device)

data = stratified_split(data = data , random_seed = random_seed)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-3)
criterion = torch.nn.CrossEntropyLoss()
if bins == 'regression':
    criterion = torch.nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=epochs//100)

best_val_acc = 0
best_val_loss = float('inf')

# --- Training Loop ---
for epoch in range(1, epochs + 1):
    loss = train(model, data, optimizer, criterion, device, bins)
    if epoch % 5 == 0:
        acc, val_out, val_loss = test(model, data, criterion, device, bins)
        if acc > best_val_acc:
            best_val_acc = acc
            # torch.save(model.state_dict(), f'../data/graphs/{graph_num}/models/{run.name}_best_accuracy.pt')
            torch.save(model.state_dict(), f'../data/graphs/{graph_num}/models/test_run_best_accuracy.pt')
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            # torch.save(model.state_dict(), f'../data/graphs/{graph_num}/models/{run.name}_best_loss.pt')
        if bins == 'regression':
            print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
        else:
            print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val_loss: {val_loss:.4f}, Validation Accuracy: {acc}', flush = True)
        # torch.save(model.state_dict(), f'../data/graphs/{graph_num}/models/{run.name}_latest.pt')
        # run.log({"training_loss": loss, "val_loss": val_loss, "val_acc": acc, "epoch": epoch})
    scheduler.step(loss)
# run.finish()


Using cuda: NVIDIA GeForce RTX 2060
torch.Size([144282, 32]) torch.Size([2, 393834]) torch.Size([144282])
GAT(
  (convs): ModuleList(
    (0): GATConv(32, 300, heads=1)
    (1): GATConv(300, 10, heads=1)
  )
)
Epoch: 005, Loss: 9.5450, Val_loss: 5.4455, Validation Accuracy: 0.1134020618556701
Epoch: 010, Loss: 8.9354, Val_loss: 4.7316, Validation Accuracy: 0.1134020618556701
Epoch: 015, Loss: 7.9833, Val_loss: 4.3028, Validation Accuracy: 0.08247422680412371
Epoch: 020, Loss: 7.6331, Val_loss: 4.0705, Validation Accuracy: 0.12371134020618557
Epoch: 025, Loss: 7.6501, Val_loss: 3.9581, Validation Accuracy: 0.13402061855670103
Epoch: 030, Loss: 6.6050, Val_loss: 3.8774, Validation Accuracy: 0.13402061855670103
Epoch: 035, Loss: 6.6367, Val_loss: 3.8041, Validation Accuracy: 0.13402061855670103
Epoch: 040, Loss: 5.8370, Val_loss: 3.7096, Validation Accuracy: 0.12371134020618557
Epoch: 045, Loss: 5.8214, Val_loss: 3.5798, Validation Accuracy: 0.12371134020618557
Epoch: 050, Loss: 5.6083, V

In [None]:
data.y.bool().sum()

tensor(644, device='cuda:0')