In [2]:
import os
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# Dataset

In [190]:
progeria = pd.read_csv(r"C:\Users\amulya\Documents\progeria-prediction\230705_progeria_c2\IdentifyPrimaryObjects.csv")
normal = pd.read_csv(r"C:\Users\amulya\Documents\progeria-prediction\230705_normal_c2\IdentifyPrimaryObjects.csv")
cols_to_drop = ["ImageNumber", "ObjectNumber", "Number_Object_Number"]
progeria_features = progeria.drop(columns=cols_to_drop)
progeria_features = progeria_features.assign(label=1) 
normal_features = normal.drop(columns=cols_to_drop)
normal_features = normal_features.assign(label=0) 
full_data = pd.concat([progeria_features, normal_features],axis=0)
full_data = full_data.replace([np.inf, -np.inf], np.nan)
full_data = full_data.dropna()

In [191]:
class CellFeaturesDataset(Dataset):
    def __init__(self, fulldata, transform=None, target_transform=None):
        self.labels = fulldata.loc[:,'label'].values
        self.data = fulldata.loc[:,~fulldata.columns.isin(['label'])].values
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        item = self.data[idx,:]
        label = self.labels[idx]
        if self.transform:
            item = self.transform(item)
        if self.target_transform:
            label = self.target_transform(label)
        return item.float(), label.float()

## Transform data - scale and convert to tensor

In [192]:
def tensor_and_scale (x):
    m = x.mean(0)
    s = x.std(0)
    x -= m
    x /= s
    # torch.allclose(x, torch.from_numpy(arr_norm))
    return torch.as_tensor(x)

t = tensor_and_scale

In [193]:
cell_dataset = CellFeaturesDataset(full_data, transform=torch.as_tensor, target_transform=torch.as_tensor)

## Train-test split, create dataloaders

In [194]:
from sklearn.model_selection import train_test_split

train_cells, test_cells = train_test_split(cell_dataset, stratify=cell_dataset.labels, test_size=0.3)

In [208]:
batch_size = 50

#sampler = torchsample.StratifiedSampler(class_vector=cell_dataset.labels, batch_size=batch_size)
#test_cells, train_cells = torch.utils.data.random_split(cell_dataset, [.3, .7])
train_dataloader = DataLoader(train_cells, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_cells, batch_size=batch_size, shuffle=True)

# ANN

In [209]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        # self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features=58, out_features=30),
            nn.ReLU(),
            nn.Linear(in_features=30, out_features=30),
            nn.ReLU(),
            nn.Linear(in_features=30, out_features=2),
        )

    def forward(self, x):
        # do stuff here
        logits = self.linear_relu_stack(x)
        prediction = torch.sigmoid(logits)
        return prediction

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [216]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    model.float()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y.long())
        # Backpropagation
        loss.backward()
        #torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        optimizer.zero_grad()

        loss, current = loss.item(), (batch + 1) * len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [217]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y.long()).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [219]:
learning_rate = 1e-6
epochs = 5
loss_fn = nn.NLLLoss()
reg_fn = nn.L1Loss()
model = NeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: -0.880000  [   50/ 1570]
loss: -0.940000  [  100/ 1570]
loss: -0.959941  [  150/ 1570]
loss: -0.919999  [  200/ 1570]
loss: -0.919996  [  250/ 1570]
loss: -0.900002  [  300/ 1570]
loss: -0.999998  [  350/ 1570]
loss: -1.000000  [  400/ 1570]
loss: -0.960000  [  450/ 1570]
loss: -0.939999  [  500/ 1570]
loss: -0.960000  [  550/ 1570]
loss: -0.960000  [  600/ 1570]
loss: -0.920000  [  650/ 1570]
loss: -0.920000  [  700/ 1570]
loss: -0.939994  [  750/ 1570]
loss: -0.939730  [  800/ 1570]
loss: -0.900005  [  850/ 1570]
loss: -0.920000  [  900/ 1570]
loss: -0.920000  [  950/ 1570]
loss: -0.919971  [ 1000/ 1570]
loss: -0.939989  [ 1050/ 1570]
loss: -0.939998  [ 1100/ 1570]
loss: -0.919918  [ 1150/ 1570]
loss: -0.900000  [ 1200/ 1570]
loss: -0.959999  [ 1250/ 1570]
loss: -0.960000  [ 1300/ 1570]
loss: -0.939857  [ 1350/ 1570]
loss: -0.939549  [ 1400/ 1570]
loss: -0.960000  [ 1450/ 1570]
loss: -0.960000  [ 1500/ 1570]
loss: -0.940000  [ 1550/ 1570]

# Logistic regression

In [220]:
class LogisticRegression(torch.nn.Module):    
    # build the constructor
    def __init__(self, n_inputs, n_outputs):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(n_inputs, n_outputs)
    # make predictions
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [221]:
def train (dataloader, model, loss_fn, optimizer):
    losses = []
    size = len(dataloader.dataset)
    # model.train()
    for batch, (X, y) in enumerate(dataloader):
        #print(X)
        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y.long())
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    losses.append(loss.item())

In [222]:
def test(dataloader, model, loss_fn):
    accs = []
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            test_loss += loss_fn(outputs, y.long()).item()
            correct += (predicted == y).sum()
        accuracy = 100 * (correct) / len(dataloader.dataset)
        accs.append(accuracy)
    test_loss /= num_batches
    print(f"Test Error: \n Accuracy: {(100*correct/size):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [223]:
log_regr = LogisticRegression(58, 2)
loss_fn = torch.nn.NLLLoss()
learning_rate = 1e-3
optimizer = torch.optim.SGD(log_regr.parameters(), lr=learning_rate)
epochs = 50

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, log_regr, loss_fn, optimizer)
    test(test_dataloader, log_regr, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: -0.000000  [   50/ 1570]
Test Error: 
 Accuracy: 17.4%, Avg loss: -0.008152 

Epoch 2
-------------------------------
loss: -0.024863  [   50/ 1570]
Test Error: 
 Accuracy: 68.2%, Avg loss: -0.031233 

Epoch 3
-------------------------------
loss: -0.000034  [   50/ 1570]
Test Error: 
 Accuracy: 93.0%, Avg loss: -0.939666 

Epoch 4
-------------------------------
loss: -0.960000  [   50/ 1570]
Test Error: 
 Accuracy: 93.0%, Avg loss: -0.937989 

Epoch 5
-------------------------------
loss: -0.960000  [   50/ 1570]
Test Error: 
 Accuracy: 93.0%, Avg loss: -0.939666 

Epoch 6
-------------------------------
loss: -0.920000  [   50/ 1570]
Test Error: 
 Accuracy: 93.0%, Avg loss: -0.939666 

Epoch 7
-------------------------------
loss: -0.980000  [   50/ 1570]
Test Error: 
 Accuracy: 93.0%, Avg loss: -0.936312 

Epoch 8
-------------------------------
loss: -0.960000  [   50/ 1570]
Test Error: 
 Accuracy: 93.0%, Avg loss: -0.937989 

Epoch 9
