In [None]:
!pip install torchmetrics

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset
from torchmetrics.classification import BinaryF1Score
from torchmetrics.classification import BinaryPrecision
from torchmetrics.classification import BinaryRecall
from torchmetrics.classification import BinaryAccuracy
import pandas as pd
from tqdm import tqdm

In [None]:
# reload splits from csv files
X_train = pd.read_csv('relabelled_data/X_train.csv')
X_dev = pd.read_csv('relabelled_data/X_dev.csv')
X_test = pd.read_csv('relabelled_data/X_test.csv')
y_train = pd.read_csv('relabelled_data/y_train.csv')
y_dev = pd.read_csv('relabelled_data/y_dev.csv')
y_test = pd.read_csv('relabelled_data/y_test.csv')

# verify data integrity
print(X_train.shape)
print(X_dev.shape)
print(X_test.shape)
print(y_train.shape)
print(y_dev.shape)
print(y_test.shape)

In [None]:
# make dataset
class MyDataset(Dataset):
    def __init__(self, x, y):
        super(MyDataset, self).__init__()
        assert x.shape[0] == y.shape[0] # assuming shape[0] = dataset size
        self.x = x
        self.y = y

    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_X = torch.tensor(X_train.values, dtype=torch.float32)
train_y = torch.tensor(y_train.values, dtype=torch.float32)
dev_X = torch.tensor(X_dev.values, dtype=torch.float32)
dev_y = torch.tensor(y_dev.values, dtype=torch.float32)
test_X = torch.tensor(X_test.values, dtype=torch.float32)
test_y = torch.tensor(y_test.values, dtype=torch.float32)

In [None]:
traindata = MyDataset(train_X, train_y)
trainloader = torch.utils.data.DataLoader(traindata, batch_size=64, shuffle=True)

In [None]:
devdata = MyDataset(dev_X, dev_y)
devloader = torch.utils.data.DataLoader(devdata, batch_size=64, shuffle=True)

In [None]:
testdata = MyDataset(test_X, test_y)
testloader = torch.utils.data.DataLoader(testdata, batch_size=64, shuffle=True)

In [None]:
# checkpoint functions
def checkpoint(model, filename):
    torch.save(model.state_dict(), filename)

def resume(model, filename):
    model.load_state_dict(torch.load(filename))

def load_model(ModelClass ,filename):
  model = ModelClass()
  model.load_state_dict(torch.load(filename))
  return model

In [None]:
# define the Network
class MyNetwork(nn.Module):
    def __init__(self, lr=0.0001):
        super(MyNetwork, self).__init__()
        self.learning_rate = lr

        self.network = nn.Sequential(
        nn.Linear(10, 100),
        nn.ReLU(),
        nn.Linear(100, 500),
        nn.ReLU(),
        nn.Linear(500, 100),
        nn.ReLU(),
        nn.Linear(100, 1),
        nn.Sigmoid()
)

    def forward(self, x):
        return self.network(x)

In [None]:
model = MyNetwork(lr=0.01)

In [None]:
epochs = 1000
early_stop_thresh = 5
best_accuracy = -1
best_epoch = -1

loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=model.learning_rate)

Acc = BinaryAccuracy()
Precision = BinaryPrecision()
Recall = BinaryRecall()
BF1 = BinaryF1Score()

for epoch in range(epochs):
  for i,(x_train,y_train) in tqdm(enumerate(trainloader)):
    model.train()

    optimizer.zero_grad()

    pred = model(x_train)
    loss = loss_fn(pred, y_train)
    loss.backward()
    optimizer.step()

# compute train metrics for current epoch
  model.eval()
  with torch.no_grad():
    outputs_train = []
    targets_train = []
    for j ,(x_train, y_train) in enumerate(trainloader):
        pred = model(x_train)
        outputs_train.append(pred.round())
        targets_train.append(y_train)

    outputs_train = torch.cat(outputs_train)
    targets_train = torch.cat(targets_train)

    train_acc = Acc(outputs_train, targets_train)
    train_p = Precision(outputs_train, targets_train)
    train_r = Recall(outputs_train, targets_train)
    train_f1 = BF1(outputs_train, targets_train)

    print(f'Training accuracy for epoch {epoch} is: {train_acc}')
    print(f'Training Precision for epoch {epoch} is: {train_p}')
    print(f'Training Recall for epoch {epoch} is: {train_r}')
    print(f'Training F1 for epoch {epoch} is: {train_f1} \n')

# compute dev metrics for current epoch
  model.eval()
  with torch.no_grad():
    outputs_val = []
    targets_val = []
    for j ,(x_dev, y_dev) in enumerate(devloader):
        pred = model(x_dev)
        outputs_val.append(pred.round())
        targets_val.append(y_dev)

    outputs_val = torch.cat(outputs_val)
    targets_val = torch.cat(targets_val)

    val_acc = Acc(outputs_val, targets_val)
    val_p = Precision(outputs_val, targets_val)
    val_r = Recall(outputs_val, targets_val)
    val_f1 = BF1(outputs_val, targets_val)

    print(f'Validation accuracy for epoch {epoch} is: {val_acc}')
    print(f'Validation Precision for epoch {epoch} is: {val_p}')
    print(f'Validation Recall for epoch {epoch} is: {val_r}')
    print(f'Validation F1 for epoch {epoch} is: {val_f1} \n')

    if val_acc > best_accuracy:
        best_accuracy = val_acc
        best_epoch = epoch
        checkpoint(model, "drive/MyDrive/DS-Project/models/ProfifPropheNet-v1.pt")
    elif epoch - best_epoch > early_stop_thresh:
        print("Early stopped training at epoch %d" % epoch)
        break  # terminate the training loop




In [None]:
model.eval()
with torch.no_grad():
    outputs = []
    targets = []
    for j ,(x_test, y_test) in enumerate(testloader):
        pred = model(x_test)
        outputs.append(pred.round())
        targets.append(y_test)

    outputs = torch.cat(outputs)
    targets = torch.cat(targets)
    print(f'test accuracy for best model is: {Acc(outputs, targets)}')
    print(f'test Precision for best model is: {Precision(outputs, targets)}')
    print(f'test Recall for best model is: {Recall(outputs, targets)}')
    print(f'test F1 for best model is: {BF1(outputs, targets)} \n')

In [None]:
# reload model
model = MyNetwork()
model.load_state_dict(torch.load("models/ProfifPropheNet-v1.pt"))

In [None]:
Acc = BinaryAccuracy()
Precision = BinaryPrecision()
Recall = BinaryRecall()
BF1 = BinaryF1Score()

In [None]:
model.eval()
with torch.no_grad():
    outputs = []
    targets = []
    for j ,(x_test, y_test) in enumerate(testloader):
        pred = model(x_test)
        outputs.append(pred.round())
        targets.append(y_test)

    outputs = torch.cat(outputs)
    targets = torch.cat(targets)
    print(f'test accuracy for best model is: {Acc(outputs, targets)}')
    print(f'test Precision for best model is: {Precision(outputs, targets)}')
    print(f'test Recall for best model is: {Recall(outputs, targets)}')
    print(f'test F1 for best model is: {BF1(outputs, targets)} \n')

In [None]:
from sklearn.metrics import confusion_matrix

tn, fp, fn, tp = confusion_matrix(targets, outputs).ravel()

print("True Negatives:", tn)
print("False Positives:", fp)
print("False Negatives:", fn)
print("True Positives:", tp)

In [None]:
# dev confusion matrix
dev_predictions = pd.DataFrame(columns=['y_pred', 'y_true'])
with torch.no_grad():
    for X, y in devloader:
        X = X.to(torch.float32)
        y = y.squeeze()
        y_pred = model(X).squeeze(1).round()
        y_frame = pd.DataFrame({'y_pred': y_pred, 'y_true': y})
        dev_predictions = dev_predictions.append(y_frame, ignore_index=True)

confusion = pd.crosstab(dev_predictions['y_true'], dev_predictions['y_pred'], rownames=['True'],
                        colnames=['Predicted'], margins=True)
print(confusion)


In [None]:
# train confusion matrix
train_predictions = pd.DataFrame(columns=['y_pred', 'y_true'])
with torch.no_grad():
    for X, y in trainloader:
        X = X.to(torch.float32)
        y = y.squeeze()
        y_pred = model(X).squeeze(1).round()
        y_frame = pd.DataFrame({'y_pred': y_pred, 'y_true': y})
        train_predictions = train_predictions.append(y_frame, ignore_index=True)

confusion = pd.crosstab(dev_predictions['y_true'], dev_predictions['y_pred'], rownames=['True'],
                        colnames=['Predicted'], margins=True)
print(confusion)

In [None]:
# test confusion matrix
test_predictions = pd.DataFrame(columns=['y_pred', 'y_true'])
with torch.no_grad():
    for X, y in testloader:
        X = X.to(torch.float32)
        y = y.squeeze()
        y_pred = model(X).squeeze(1).round()
        y_frame = pd.DataFrame({'y_pred': y_pred, 'y_true': y})
        test_predictions = test_predictions.append(y_frame, ignore_index=True)

confusion = pd.crosstab(dev_predictions['y_true'], dev_predictions['y_pred'], rownames=['True'],
                        colnames=['Predicted'], margins=True)
print(confusion)