# model packaging and scaling 
https://pytorchlightning.ai/

In [12]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning import LightningDataModule, LightningModule

from blitz.modules import BayesianLinear
from blitz.utils import variational_estimator

import networkx as nx
from tqdm import tqdm
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
from collections import Counter
import math

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

scaler = StandardScaler()

In [6]:
# Hyper-parameters
input_size = 128  # 28x28
hidden_size = 256
num_classes = 2
num_epochs = 100
batch_size = 2
learning_rate = 0.001

In [7]:
def getsavedsample(datasetname):
    with open('data\\'+datasetname+'_classification_train1.npy', 'rb') as f:
        train1 = np.load(f, allow_pickle=True)
    with open('data\\'+datasetname+'_classification_train2.npy', 'rb') as f:
        train2 = np.load(f, allow_pickle=True)
    with open('data\\'+datasetname+'_classification_train3.npy', 'rb') as f:
        train3 = np.load(f, allow_pickle=True)
    with open('data\\'+datasetname+'_classification_test.npy', 'rb') as f:
        test = np.load(f, allow_pickle=True)

    return train1, train2, train3, test


train1, train2, train3, test = getsavedsample('PROTEINS_graph2vec')
#train1, train2, train3, test = getsavedsample('REDDIT-BINARY_graph2vec')
#train1, train2, train3, test = getsavedsample('IMDB-BINARY_graph2vec')


In [8]:
X_train1 = [x for x, y in train1]
scaler_model = scaler.fit(X_train1)
X_train1 = scaler_model.transform(X_train1)
X_train1 = [np.expand_dims(x,0) for x in X_train1]
X_train1 = torch.Tensor(X_train1)

y_train1 = [y for x, y in train1]
y_train1 = torch.Tensor(y_train1)
y_train1 = torch.nn.functional.one_hot(y_train1.long())

X_test = [x for x, y in test]
X_test = scaler_model.fit_transform(X_test)
X_test = [np.expand_dims(x, 0) for x in X_test]
X_test = torch.Tensor(X_test)

y_test = [y for x, y in test]
y_test = torch.Tensor(y_test)
y_test = torch.nn.functional.one_hot(y_test.long())

In [16]:
# Fully connected neural network with one hidden layer

class LitNeuralNet(LightningModule):
    def __init__(self, input_size, hidden_size, num_classes):
        super(LitNeuralNet, self).__init__()
        
        self.seq_len = input_size
        self.embedding_size = 128
        self.hiddendim = hidden_size
        self.out_size = num_classes

        self.stride = 1
        self.kernel_1 = 2
        self.kernel_2 = 3
        self.kernel_3 = 4
        self.kernel_4 = 5

        self.conv_1 = nn.Conv1d(self.seq_len, self.hiddendim,
                              self.kernel_1, self.stride)
        self.conv_2 = nn.Conv1d(self.seq_len, self.hiddendim,
                              self.kernel_2, self.stride)
        self.conv_3 = nn.Conv1d(self.seq_len, self.hiddendim,
                              self.kernel_3, self.stride)
        self.conv_4 = nn.Conv1d(self.seq_len, self.hiddendim,
                              self.kernel_4, self.stride)

        self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride)
        self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride)
        self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride)
        self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride)

        self.fc = nn.Linear(self.in_features_fc(), self.out_size)

        self.dropout = nn.Dropout(0.25)

    def in_features_fc(self):
        out_conv_1 = ((self.embedding_size - 1 *
                    (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_conv_1 = math.floor(out_conv_1)
        out_pool_1 = (
          (out_conv_1 - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_pool_1 = math.floor(out_pool_1)

        out_conv_2 = ((self.embedding_size - 1 *
                    (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_conv_2 = math.floor(out_conv_2)
        out_pool_2 = (
          (out_conv_2 - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_pool_2 = math.floor(out_pool_2)

        out_conv_3 = ((self.embedding_size - 1 *
                    (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_conv_3 = math.floor(out_conv_3)
        out_pool_3 = (
          (out_conv_3 - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_pool_3 = math.floor(out_pool_3)

        out_conv_4 = ((self.embedding_size - 1 *
                    (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_conv_4 = math.floor(out_conv_4)
        out_pool_4 = (
          (out_conv_4 - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_pool_4 = math.floor(out_pool_4)

        return (out_pool_1 + out_pool_2 + out_pool_3 + out_pool_4) * self.hiddendim

    def forward(self, x):
        x1 = self.conv_1(x)
        x1 = torch.relu(x1)
        x1 = self.pool_1(x1)

        x2 = self.conv_2(x)
        x2 = torch.relu((x2))
        x2 = self.pool_2(x2)

        x3 = self.conv_3(x)
        x3 = torch.relu(x3)
        x3 = self.pool_3(x3)

        x4 = self.conv_4(x)
        x4 = torch.relu(x4)
        x4 = self.pool_4(x4)

        union = torch.cat((x1, x2, x3, x4), 2)
        union = union.reshape(union.size(0), -1)

        out = self.fc(union)	
        out = self.dropout(out)
        out = torch.sigmoid(out)
        out = out.squeeze()
        return out
    
    def train_dataloader(self):
        train_dataset  = torch.utils.data.TensorDataset(X_train1, y_train1)
        train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, num_workers=4, shuffle=True)
        return train_loader

    def training_step(self, batch, batch_idx):
        images, labels = batch
        datapoints = images.float()
        labels = labels.squeeze().float()
        # Forward pass
        outputs = self(images)
        loss = F.binary_cross_entropy(outputs, labels)

        tensorboard_logs = {'train_loss': loss}
        # use key 'log'
        return {"loss": loss, 'log': tensorboard_logs}

    def val_dataloader(self):
        test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
        test_loader  = torch.utils.data.DataLoader(
        test_dataset, batch_size=batch_size, num_workers=4, shuffle=False)
        return test_loader 

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        datapoints = images.float()
        labels = labels.squeeze().float()
        # Forward pass
        outputs = self(images)
                        
        loss = F.binary_cross_entropy(outputs, labels)
        return {"val_loss": loss}
    
    def validation_epoch_end(self, outputs):
        # outputs = list of dictionaries
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'avg_val_loss': avg_loss}
        # use key 'log'
        return {'val_loss': avg_loss, 'log': tensorboard_logs}
    
    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=learning_rate)
    
    def predict_step(self, batch, batch_idx):
        images, labels = batch
        images = images.float()
        outputs = self(images)
        proba = torch.argmax(outputs).item()
        return proba

In [21]:
#if __name__ == '__main__':
model = LitNeuralNet(input_size, hidden_size, num_classes)

# gpus=8
# fast_dev_run=True -> runs single batch through training and validation
# train_percent_check=0.1 -> train only on 10% of data
trainer = Trainer(max_epochs=num_epochs, log_every_n_steps=1, auto_lr_find=True, deterministic=True)
trainer.fit(model)
#trainer.test(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type      | Params
--------------------------------------
0 | conv_1  | Conv1d    | 768   
1 | conv_2  | Conv1d    | 1.0 K 
2 | conv_3  | Conv1d    | 1.3 K 
3 | conv_4  | Conv1d    | 1.5 K 
4 | pool_1  | MaxPool1d | 0     
5 | pool_2  | MaxPool1d | 0     
6 | pool_3  | MaxPool1d | 0     
7 | pool_4  | MaxPool1d | 0     
8 | fc      | Linear    | 251 K 
9 | dropout | Dropout   | 0     
--------------------------------------
256 K     Trainable params
0         Non-trainable params
256 K     Total params
1.026     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [22]:
test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
test_loader  = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, num_workers=4, shuffle=False)

In [23]:
trainer.predict(model, test_loader)

Predicting: 2it [00:00, ?it/s]

[0, 0, 0, 3]