In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
!pip install torch-geometric
!pip install ogb

Looking in links: https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.13.0%2Bcu116/torch_scatter-2.1.1%2Bpt113cu116-cp310-cp310-linux_x86_64.whl (9.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.1+pt113cu116
Looking in links: https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.13.0%2Bcu116/torch_sparse-0.6.17%2Bpt113cu116-cp310-cp310-linux_x86_64.whl (4.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.17+pt113cu116
Collecting torch-geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[2K     [90m━━━

In [33]:
import torch
import torch.nn as nn

import torch_geometric
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F

from tqdm.notebook import tqdm

import numpy as np
import pandas as pd

import os

import math

In [65]:
class GraphDataset(Dataset):
  def __init__(self, root, transform=None):

    self.data = []
    for graph_folder in tqdm(os.listdir(root)):
      graph_path = os.path.join(root, graph_folder)
      self.data.append(convert_to_Data(graph_path))

    self.create_idx_split()

    self.task_type = "regression"

    self.eval_metric = "rmse"


  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      # If the input 'idx' is a tensor, return a list of data items corresponding to the indices
      return [self.data[i] for i in idx]
    else:
      return self.data[idx]

  def create_idx_split(self):
    split = {}

    avail = list(range(len(self.data)))

    train_prop = 0.7
    val_prop = 0.15
    test_prop = 0.15

    num_train = int(train_prop * len(self.data))
    num_val = int(val_prop * len(self.data))
    num_test = len(self.data) - num_train - num_val

    train_split = random.sample(avail, num_train)
    avail = list(set(avail) - set(train_split))

    val_split = random.sample(avail, num_val)
    avail = list(set(avail) - set(val_split))

    test_split = random.sample(avail, num_test)
    avail = list(set(avail) - set(test_split))

    split['train'] = torch.tensor(train_split)
    split['valid'] = torch.tensor(val_split)
    split['test'] = torch.tensor(test_split)

    self.split = split

  def get_idx_split(self):
    return self.split


In [68]:
# Load the dataset
dataset = torch.load("/content/drive/MyDrive/Summer_Invitational_2023_Datathon_Datasets/Test-Data-Processed/NK.pt")

In [69]:
x = []
for data in dataset:
  e = data.edge_attr
  asum = 0
  ac = 0
  bsum = 0
  bc = 0

  for row in e:
    if not math.isnan(row[0]):
      asum += row[0]
      ac += 1
    if not math.isnan(row[1]):
      bsum += row[1]
      bc += 1
  x.append([asum / ac, bsum / bc])

x = torch.tensor(x)
x.resize_(x.shape[0], 2)
print(x.shape)

y = []
for data in dataset:
  y.append(data.y)
y = torch.tensor(y)
y.resize_(y.shape[0], 1)
print(y.shape)


torch.Size([365, 2])
torch.Size([365, 1])


In [70]:
from torch.utils.data import TensorDataset, DataLoader

ds = TensorDataset(x, y)

train_loader = DataLoader(ds, batch_size=2, shuffle=True)
test_loader = DataLoader(ds, batch_size=2, shuffle=True)

In [71]:
num_epochs = 25

In [72]:
import torch
import torch.nn as nn
import torch.optim as optim

class MLPRegression(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPRegression, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Example usage:
input_size = 2  # Replace with the input size of your data
hidden_size = 64
output_size = 1  # Regression output should have a single neuron

# Create an instance of the MLP regression model
model = MLPRegression(input_size, hidden_size, output_size)

# Define loss function and optimizer for regression (Mean Squared Error loss)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop example (assuming you have your data loaded as `train_loader`)
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

# After training, you can use the model for predictions on new data
# For example, if you have test data loaded as `test_loader`:
model.eval()
with torch.no_grad():
    total_loss = 0.0
    num_samples = 0
    for inputs, targets in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        total_loss += loss.item()
        num_samples += len(targets)

    avg_loss = total_loss / num_samples
    print(f"Average loss on test set: {avg_loss:.2f}")


Epoch 1, Loss: 1.6981837924133883e+18
Epoch 2, Loss: 11308080626794.316
Epoch 3, Loss: 6952363111.136612
Epoch 4, Loss: 4274693.175866633
Epoch 5, Loss: 2638.517643850032
Epoch 6, Loss: 8.625202200512906
Epoch 7, Loss: 6.873945707404386
Epoch 8, Loss: 6.947745039122678
Epoch 9, Loss: 6.887807130923971
Epoch 10, Loss: 7.014142767241214
Epoch 11, Loss: 6.906698195713183
Epoch 12, Loss: 6.921781750970973
Epoch 13, Loss: 6.903396966434568
Epoch 14, Loss: 6.899027215978487
Epoch 15, Loss: 6.8999135268093825
Epoch 16, Loss: 6.913205554250811
Epoch 17, Loss: 6.885802300738507
Epoch 18, Loss: 6.890907915298893
Epoch 19, Loss: 6.917697260509496
Epoch 20, Loss: 6.878475078253235
Epoch 21, Loss: 6.8942594957522685
Epoch 22, Loss: 6.975306137307175
Epoch 23, Loss: 6.887254616601871
Epoch 24, Loss: 6.899647037867619
Epoch 25, Loss: 6.9044062387239515
Average loss on test set: 3.43
