In [1]:
import fla_parallelized as a
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler
import pickle as pkl
from importlib import reload
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import math
import time
import os
import pickle as pkl
# from torch.utils.tensorboard import SummaryWriter
import utils as u
import matplotlib.pyplot as plt

In [2]:
cwd=os.getcwd()
with open(f"{cwd}/data/diabetes/X.pkl", "rb") as file:
    X_raw = pkl.load(file)
with open(f"{cwd}/data/diabetes/y.pkl", "rb") as file:
    y = pkl.load(file)
y_counts = np.unique(y, return_counts=True)[1]
weight = torch.tensor([y_counts[0]/y_counts[1]], dtype=torch.float32)

In [3]:
imputer = KNNImputer(n_neighbors=5)
X_imputed_not_norm = imputer.fit_transform(X_raw)
scaler = MinMaxScaler()
X = scaler.fit_transform(X_imputed_not_norm)

In [7]:
reload(a)
seeds = 10
test_predictions_seed = [[]]*seeds
test_label_list_seed = [[]]*seeds
losses_seed = []
activations = [nn.ReLU, nn.Sigmoid, a.GaussianActivation]
forward_times = []
for seed in range(seeds):
    print(f'seed {seed+1}')
    #split data
    X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, stratify=y_train, test_size=0.1, random_state=seed)
    train_dataset = a.npDataset(X_train,y_train)
    test_dataset = a.npDataset(X_test,y_test)
    val_dataset = a.npDataset(X_val,y_val)
    batch_size = 64
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    model = nn.Sequential(
        a.LinearSoftmax(input_dim=108, output_dim=50, activation=nn.ReLU),
        a.LinearSoftmax(input_dim=50, output_dim=25, activation=nn.ReLU),
        a.LinearSoftmax(input_dim=25, output_dim=10, activation=nn.ReLU),
        nn.Linear(in_features=10,out_features=1)
    )
    criterion = nn.BCEWithLogitsLoss(pos_weight=weight)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    #train
    num_epochs = 500
    best_val_loss = float('inf')
    best_model = None
    patience = 10
    early_stop_counter = 0
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            labels = labels.unsqueeze(1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        val_losses = []
        for inputs, labels in val_loader:
            with torch.no_grad():
                outputs = model(inputs)
                labels = labels.unsqueeze(1)
                val_loss = criterion(outputs, labels)
                val_losses.append(val_loss.item())
        
        avg_val_loss = np.mean(val_losses)
        print(f'Epoch {epoch+1}, Validation Loss: {avg_val_loss:.4f}')
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model = model.state_dict()
            early_stop_counter = 0
        else:
            early_stop_counter += 1
        
        if early_stop_counter >= patience:
            print(f'Early stopping after epoch {epoch+1} with validation loss {best_val_loss:.4f}')
            break
        
    model.load_state_dict(best_model)

    #eval
    test_losses = []
    test_predictions = []
    test_true_labels = []

    for inputs, labels in test_loader:
        with torch.no_grad():
            outputs = model(inputs)
            labels = labels.unsqueeze(1)
            test_loss = criterion(outputs, labels)
            test_losses.append(test_loss.item())
            test_predictions.extend(outputs.cpu().numpy())
            test_true_labels.extend(labels.cpu().numpy())
    avg_test_loss = np.mean(test_losses)
    test_predictions_f1 = [F.sigmoid(torch.tensor(y))>0.5 for y in test_predictions]
    test_score = f1_score(test_true_labels, test_predictions_f1)
    print(f'Test Loss: {avg_test_loss:.4f}, Test Score: {test_score:.4f} for seed {seed+1}')
    test_label_list_seed[seed].append(test_true_labels)
    test_predictions_seed[seed].append(test_predictions)
    losses_seed.append(avg_test_loss)

seed 1
Epoch 1, Validation Loss: 1.2035
Epoch 2, Validation Loss: 1.2033
Epoch 3, Validation Loss: 1.2034
Epoch 4, Validation Loss: 1.2027
Epoch 5, Validation Loss: 1.2023
Epoch 6, Validation Loss: 1.2019
Epoch 7, Validation Loss: 1.2011
Epoch 8, Validation Loss: 1.2010
Epoch 9, Validation Loss: 1.1987
Epoch 10, Validation Loss: 1.1955
Epoch 11, Validation Loss: 1.1910
Epoch 12, Validation Loss: 1.1840
Epoch 13, Validation Loss: 1.1758
Epoch 14, Validation Loss: 1.1706
Epoch 15, Validation Loss: 1.1664
Epoch 16, Validation Loss: 1.1648
Epoch 17, Validation Loss: 1.1624
Epoch 18, Validation Loss: 1.1608
Epoch 19, Validation Loss: 1.1609
Epoch 20, Validation Loss: 1.1585
Epoch 21, Validation Loss: 1.1565
Epoch 22, Validation Loss: 1.1552
Epoch 23, Validation Loss: 1.1544
Epoch 24, Validation Loss: 1.1526
Epoch 25, Validation Loss: 1.1516
Epoch 26, Validation Loss: 1.1512
Epoch 27, Validation Loss: 1.1494
Epoch 28, Validation Loss: 1.1492
Epoch 29, Validation Loss: 1.1476
Epoch 30, Valida