In [None]:
    import os, torch
    from sklearn.model_selection import train_test_split
    import pickle
    import torch_geometric.transforms as T
    import numpy as np
    from torch_geometric.nn.models import Node2Vec
    from torch_geometric.data import DataLoader
    from torch_geometric.nn import MessagePassing
    from torch_geometric.data import Data
    from torch.nn import Linear
    import torch.nn.functional as F
    from torch_geometric.nn import GCNConv, GATConv
    import matplotlib.pyplot as plt
    from sklearn.preprocessing import StandardScaler

    epochs = int(os.getenv("EPOCHS", 1000))  # Default to 10 if not provided
    learning_rate = float(os.getenv("LEARNING_RATE", 0.001))  # Default to 0.001
    hidden_c = int(os.getenv("HIDDEN_C", 128))  # Default to 16
    random_seed = int(os.getenv("RANDOM_SEED", 20))  # Default to 42
    api_key = os.getenv("API_KEY", None)
    graph_num = os.getenv("GRAPH_NUM", 17)
    dropout_p = float(os.getenv("DROPOUT", 0.5))

    # wandb.login()
    # run = wandb.init(
    #     project="graph-embedding",
    #     config={
    #         "epochs": epochs,
    #         "learning_rate": learning_rate,
    #         "hidden_c": hidden_c,
    #         "random_seed": random_seed,
    #         "num_layers": num_layers,
    #         "dropout_p": dropout_p
    #     }
    # )

    if torch.cuda.is_available():
        device = torch.device('cuda')
        print(f"Using CUDA device: {torch.cuda.get_device_name(0)}", flush = True)
    else:
        device = torch.device('cpu')
        print("Using CPU", flush = True)

    ### load graph data

    with open(f'../data/graphs/{graph_num}/linegraph_tg.pkl', 'rb') as f:
        data = pickle.load(f)

    data.edge_index = data.edge_index.contiguous()
    data.x = data.x.contiguous()
    data.y = data.y.contiguous()

    sc = StandardScaler()
    data.x = torch.tensor(sc.fit_transform(data.x.cpu().numpy()))


In [None]:
train_loader, test_loader = train_test_split(data.x, test_size=0.2, random_state=random_seed)
train_loader = DataLoader(train_loader, batch_size=64, shuffle=True)
test_loader = DataLoader(test_loader, batch_size=64, shuffle=False)

class my_autoencoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, dropout_p):
        super(my_autoencoder, self).__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(in_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_p),
            torch.nn.Linear(hidden_channels, hidden_channels//2),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_p),
            torch.nn.Linear(hidden_channels//2, 5),
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(5, hidden_channels//2),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_p),
            torch.nn.Linear(hidden_channels//2, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_p),
            torch.nn.Linear(hidden_channels, in_channels),
        )
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

model = my_autoencoder(data.x.shape[1], hidden_c, dropout_p).to(device)
print(f"Model: {model}", flush = True)
criterion = torch.nn.MSELoss()


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)

def train():
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        batch = batch.to(device)
        out = model(batch)
        loss = criterion(out, batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def test(loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch)
            loss = criterion(out, batch)
            total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(1, epochs + 1):
    train_loss = train()
    test_loss = test(test_loader)
    scheduler.step(train_loss)
    print(f'Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss:.4f} current_lr : {scheduler.get_last_lr()}', flush = True)
print('Training complete', flush = True)
