In [2]:
import main as a
from datasets import load_diabetes
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler
import pickle as pkl
from importlib import reload
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

import time

In [3]:
data = load_diabetes()

In [4]:
X_train = pd.concat([data['X_train'][k] for k in data['X_train'].keys()], axis=1)
X_test = pd.concat([data['X_test'][k] for k in data['X_test'].keys()], axis=1)
X_raw = pd.concat([X_train, X_test], axis=0).to_numpy()
y = pd.concat([data['y_train'], data['y_test']], axis=0).to_numpy().flatten()
y_counts = np.unique(y, return_counts=True)[1]
weight = torch.tensor([y_counts[0]/y_counts[1]], dtype=torch.float32)

In [5]:
imputer = KNNImputer(n_neighbors=5)
X_imputed_not_norm = imputer.fit_transform(X_raw)
scaler = MinMaxScaler()
X = scaler.fit_transform(X_imputed_not_norm)

In [6]:
reload(a)

<module 'main' from '/Users/aviadsusman/Documents/Python_Projects/FeatureLevelAttention/FLA/main.py'>

In [7]:
head_counts = [0]#[0,5,10,15]
test_prediction_dict = {h: [] for h in head_counts}
test_label_list = []
losses = {h: [] for h in head_counts}

forward_times = []
loss_times = []
backwards_times = []
optimizer_times = []

for seed in range(10):
    for head in head_counts:
        print(f'seed {seed+1}, with {head} heads')
        #split data
        X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, test_size=0.2, random_state=seed)
        X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, stratify=y_train, test_size=0.1, random_state=seed)
        train_dataset = a.npDataset(X_train,y_train)
        test_dataset = a.npDataset(X_test,y_test)
        val_dataset = a.npDataset(X_val,y_val)
        batch_size = 100
        train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        #make model
        hidden_dims = [50,25,10]
        attn_heads = head
        model = a.FLANN(input_dim=108, hidden_dims=hidden_dims, output_dim=1, attn_heads=attn_heads, activation=nn.ReLU())
        criterion = nn.BCEWithLogitsLoss(pos_weight=weight)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

        #train
        num_epochs = 500
        best_val_loss = float('inf')
        best_model = None
        patience = 10
        early_stop_counter = 0
        for epoch in range(num_epochs):
            model.train()
            for inputs, labels in train_loader:
                optimizer.zero_grad()
                start_f = time.time()
                outputs = model(inputs)
                end_f = time.time()
                forward_times.append(end_f-start_f)
                labels = labels.unsqueeze(1)
                start_l = time.time()
                loss = criterion(outputs, labels)
                end_l = time.time()
                loss_times.append(end_l-start_l)
                start_b = time.time()
                loss.backward()
                end_b = time.time()
                backwards_times.append(end_b-start_b)
                start_o = time.time()
                optimizer.step()
                end_o = time.time()
                optimizer_times.append(end_o-start_o)

            model.eval()
            val_losses = []
            for inputs, labels in val_loader:
                with torch.no_grad():
                    outputs = model(inputs)
                    labels = labels.unsqueeze(1)
                    val_loss = criterion(outputs, labels)
                    val_losses.append(val_loss.item())
            
            avg_val_loss = np.mean(val_losses)
            print(f'Epoch {epoch+1}, Validation Loss: {avg_val_loss:.4f}')
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                best_model = model.state_dict()
                early_stop_counter = 0
            else:
                early_stop_counter += 1
            
            if early_stop_counter >= patience:
                print(f'Early stopping after epoch {epoch+1} with validation loss {best_val_loss:.4f}')
                break
            
        model.load_state_dict(best_model)

        #eval
        test_losses = []
        test_predictions = []
        test_true_labels = []

        for inputs, labels in test_loader:
            with torch.no_grad():
                outputs = model(inputs)
                labels = labels.unsqueeze(1)
                test_loss = criterion(outputs, labels)
                test_losses.append(test_loss.item())
                test_predictions.extend(outputs.cpu().numpy())
                test_true_labels.extend(labels.cpu().numpy())
        avg_test_loss = np.mean(test_losses)
        test_predictions_f1 = [y>0.5 for y in test_predictions]
        test_score = f1_score(test_true_labels, test_predictions_f1)
        print(f'Test Loss: {avg_test_loss:.4f}, Test Score: {test_score:.4f} for seed {seed+1} and {head} heads.')
        if head == 0:
            test_label_list.append(test_true_labels)
        test_prediction_dict[head].append(test_predictions)
        losses[head].append(avg_test_loss)
# with open ("/Users/aviadsusman/Documents/Python_Projects/FeatureLevelAttention/FLA/results/diabetes/test_pred_dict_20s.pkl", "wb") as file:
#     pkl.dump(test_prediction_dict, file=file)
# with open ("/Users/aviadsusman/Documents/Python_Projects/FeatureLevelAttention/FLA/results/diabetes/test_losses_dict_20s.pkl", "wb") as file:
#     pkl.dump(losses, file=file)
# with open ("/Users/aviadsusman/Documents/Python_Projects/FeatureLevelAttention/FLA/results/diabetes/test_labels.pkl", "wb") as file:
#     pkl.dump(test_label_list, file=file)

seed 1, with 0 heads
Epoch 1, Validation Loss: 1.1598
Epoch 2, Validation Loss: 1.1284
Epoch 3, Validation Loss: 1.1000
Epoch 4, Validation Loss: 1.0945
Epoch 5, Validation Loss: 1.1170
Epoch 6, Validation Loss: 1.1093
Epoch 7, Validation Loss: 1.1024
Epoch 8, Validation Loss: 1.1186
Epoch 9, Validation Loss: 1.1048
Epoch 10, Validation Loss: 1.1146
Epoch 11, Validation Loss: 1.1135
Epoch 12, Validation Loss: 1.1595
Epoch 13, Validation Loss: 1.1846
Epoch 14, Validation Loss: 1.1771
Early stopping after epoch 14 with validation loss 1.0945
Test Loss: 1.2603, Test Score: 0.2705 for seed 1 and 0 heads.
seed 2, with 0 heads
Epoch 1, Validation Loss: 1.1477
Epoch 2, Validation Loss: 1.1470
Epoch 3, Validation Loss: 1.1448
Epoch 4, Validation Loss: 1.1168
Epoch 5, Validation Loss: 1.1473
Epoch 6, Validation Loss: 1.1302
Epoch 7, Validation Loss: 1.1329
Epoch 8, Validation Loss: 1.1385
Epoch 9, Validation Loss: 1.1450
Epoch 10, Validation Loss: 1.1409
Epoch 11, Validation Loss: 1.1670
Epoch 

In [8]:
print(np.median(forward_times))
print(np.median(loss_times))
print(np.median(backwards_times))
print(np.median(optimizer_times))

8.392333984375e-05
2.4080276489257812e-05
0.00014781951904296875
0.0001919269561767578


In [9]:
f'Average epoch time: {(np.median(forward_times)+np.median(loss_times)+np.median(backwards_times)+np.median(optimizer_times))*110}'

'Average epoch time: 0.04925251007080078'

In [10]:
param_list = []
for j, params in enumerate(model.parameters()):
    param_list.append(np.prod([params.size(i) for i in range(len(params.size()))]))
np.sum(param_list)

np.int64(7166)

In [13]:
for j, params in enumerate(model.parameters()):
    print(j, params.size())

0 torch.Size([50, 108])
1 torch.Size([50])
2 torch.Size([25, 50])
3 torch.Size([25])
4 torch.Size([10, 25])
5 torch.Size([10])
6 torch.Size([50])
7 torch.Size([50])
8 torch.Size([25])
9 torch.Size([25])
10 torch.Size([10])
11 torch.Size([10])
12 torch.Size([1, 10])
13 torch.Size([1])


In [23]:
from sklearn.metrics import f1_score
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
scores = []
param = {
    'eta': [0.1*i for i in range(10)], 
    'max_depth': range(1,10),  
    'n_estimators': [10*(i+1) for i in range(10)]}
xmodel = XGBClassifier()
clf = GridSearchCV(xmodel, param, scoring='f1')
clf.fit(X,y)
# for seed in range(10):
#     X_train, X_test, y_train, y_test = train_test_split(X,y,stratify=y, random_state=seed, test_size=0.2)
#     xmodel = XGBClassifier()
#     xmodel.fit(X_train, y_train)
#     y_pred = xmodel.predict(X_test)
#     scores.append(f1_score(y_pred=y_pred, y_true=y_test))
# np.median(scores)


In [33]:
scores=[]
for seed in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X,y,stratify=y, random_state=seed, test_size=0.2)
    xmodel = XGBClassifier(eta=0.9, n_estimators=60, max_depth=5)
    xmodel.fit(X_train, y_train)
    y_pred = xmodel.predict(X_test)
    scores.append(f1_score(y_pred=y_pred, y_true=y_test))
np.median(scores)

np.float64(0.14653348269853123)