In [9]:
from ipynb.fs.full.NovoDataset import NovoDataset
import torch
from torch_geometric.loader import DataLoader


import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch.nn import Linear, BatchNorm1d, ModuleList
from torch_geometric.nn import TransformerConv, TopKPooling , GCNConv, TopKPooling, global_mean_pool
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

## Dataset

In [10]:
train_dataset = NovoDataset(root="data/", filename="training.csv")
test_dataset = NovoDataset(root="data/", filename="testing.csv", test=True)

### Split training data to training and validation data

In [11]:
train_size = int(0.7 * len(train_dataset))
val_size = len(train_dataset) - train_size
training_data, validation_data = torch.utils.data.random_split(train_dataset, [train_size, val_size])

## Parameters

In [12]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCHSIZE = 128
OUTPUT = 1
EMBEDDING_SIZE = 64

## Dataloader

In [13]:
train_loader = DataLoader(dataset=training_data, batch_size=BATCHSIZE, shuffle=True, num_workers=2, pin_memory=True)
valid_loader = DataLoader(dataset=validation_data, batch_size=BATCHSIZE, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True)

In [14]:
class GCN(torch.nn.Module):
    def __init__(self):
        # Init parent
        super(GCN, self).__init__()
        torch.manual_seed(42)

        # GCN layers
        self.initial_conv = GCNConv(10, EMBEDDING_SIZE)
        self.conv1 = GCNConv(EMBEDDING_SIZE, EMBEDDING_SIZE)
        self.conv2 = GCNConv(EMBEDDING_SIZE, EMBEDDING_SIZE)
        self.conv3 = GCNConv(EMBEDDING_SIZE, EMBEDDING_SIZE)

        # Output layer
        self.out = Linear(EMBEDDING_SIZE*2, 1)

    def forward(self, x, edge_index, batch_index):
        # First Conv layer
        hidden = self.initial_conv(x, edge_index)
        hidden = torch.tanh(hidden)

        # Other Conv layers
        hidden = self.conv1(hidden, edge_index)
        hidden = torch.tanh(hidden)
        hidden = self.conv2(hidden, edge_index)
        hidden = torch.tanh(hidden)
        hidden = self.conv3(hidden, edge_index)
        hidden = torch.tanh(hidden)
          
        # Global Pooling (stack different aggregations)
        hidden = torch.cat([gmp(hidden, batch_index), 
                            gap(hidden, batch_index)], dim=1)

        # Apply a final (linear) classifier.
        out = self.out(hidden)

        return out, hidden

In [16]:
model = GCN()
model.to(DEVICE)
optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()
LEARNING_RATE = 0.001
EPOCHS = 20

In [17]:
from scipy import stats


In [20]:
training_score_history = []
training_losses_history = []
validation_score_history = []
validation_losses_history = []
for epoch in range(EPOCHS):
    model.train()
    training_score = []
    training_loss = []
    for batch in train_loader:
        batch.to(DEVICE)
        #==========Forward pass===============
        pred, embedding = model(batch.x.float(), batch.edge_index, batch.batch)
        loss = criterion(pred, torch.Tensor(batch.y))
        
        #==========backward pass==============
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_result = stats.spearmanr(pred.detach().cpu().numpy(), batch.y.cpu().numpy())
        training_score.append(train_result.correlation)
        training_loss.append(loss.item())
    
    validation_score = []
    validation_loss = []
    for batch in test_loader:
        model.eval()
        with torch.no_grad():
            batch.to(DEVICE)
            val_preds = model(batch.x.float(), batch.edge_index, batch.batch)
            val_loss = criterion(val_preds, torch.Tensor(batch.y))
            
            val_result = stats.spearmanr(val_preds.detach().cpu().numpy(), batch.y.cpu().numpy())
            validation_score.append(val_result.correlation)
            validation_loss.append(val_loss.item())
        training_scores = np.mean(training_score)
        training_losses = np.mean(training_loss)
        validation_scores = np.mean(validation_score)
        validation_losses = np.mean(validation_loss)
        
        training_score_history.append(training_scores)
        training_losses_history.append(training_losses)
        validation_score_history.append(validation_scores)
        validation_losses_history.append(validation_losses)
        print(f'{epoch+1:03} EPOCH - Training score : {np.mean(training_scores):.5f} | Validation score : {np.mean(validation_scores):.5f} | Training loss : {np.mean(training_losses):.5f} | Validation loss : {np.mean(validation_losses):.5f}')

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff70ffc48b0>
Traceback (most recent call last):
  File "/Users/gopinath/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/Users/gopinath/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1474, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Users/gopinath/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/Users/gopinath/opt/anaconda3/lib/python3.9/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
  File "/Users/gopinath/opt/anaconda3/lib/python3.9/multiprocessing/connection.py", line 936, in wait
    ready = selector.select(timeout)
  File "/Users/gopinath/opt/anaconda3/lib/python3.9/selectors.py", line 416, in select
    fd_event_list = self._selector.po

KeyboardInterrupt: 