In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn,optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from scipy.stats import mode

from generic_data import GenericDataset
from SimpleNNs import TwoNetBC
from model_helper import train_epoch, eval_model, eval_ensemble

In [2]:
Xtrain = np.load("./data/Xtrain.npy")
Ytrain = np.load("./data/Ytrain.npy")
Xtest = np.load("./data/Xtest.npy")
Ytest = np.load("./data/Ytest.npy")

In [3]:
train_dataset = GenericDataset(Xtrain, Ytrain)
test_dataset = GenericDataset(Xtest, Ytest)

In [4]:
# Define model parameters
input_size = 4
output_size = 1
hidden_size = 4
shuffle = True

# Define training parameters
epochs = 200
lr = 0.01
batch_size = 64

# Device parameters
device = "cuda"

# Ensemble parameters
num_models = 20

In [5]:
list_of_models = []

In [6]:
for i in range(num_models):
    # Initialize new model
    model=TwoNetBC(input_size, output_size, hidden_size)
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))
    criterion = nn.BCELoss()
    train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=shuffle)
    model.to(device)
    
    e_losses = []
    
    # Train model
    for epoch in range(epochs):
        loss = train_epoch(model, optimizer, criterion, train_loader, device, batch_size)
        e_losses.append(loss)
        
    # Evaluate model
    test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=shuffle)
    acc, _, _ = eval_model(model, test_loader)
    
    # Add model to list
    list_of_models.append(model)
    print(f"Model {i}: Final Loss ({e_losses[-1]:.6f}), Test Acc ({acc:.4f})")
    

Model 0: Final Loss (0.470831), Test Acc (0.7525)
Model 1: Final Loss (0.568083), Test Acc (0.7075)
Model 2: Final Loss (0.415471), Test Acc (0.7900)
Model 3: Final Loss (0.448561), Test Acc (0.7875)
Model 4: Final Loss (0.452171), Test Acc (0.7675)
Model 5: Final Loss (0.408072), Test Acc (0.8075)
Model 6: Final Loss (0.436430), Test Acc (0.7650)
Model 7: Final Loss (0.543227), Test Acc (0.7175)
Model 8: Final Loss (0.477892), Test Acc (0.7550)
Model 9: Final Loss (0.441634), Test Acc (0.8175)
Model 10: Final Loss (0.482352), Test Acc (0.7300)
Model 11: Final Loss (0.527879), Test Acc (0.6725)
Model 12: Final Loss (0.455738), Test Acc (0.7700)
Model 13: Final Loss (0.487636), Test Acc (0.6725)
Model 14: Final Loss (0.449957), Test Acc (0.7325)
Model 15: Final Loss (0.385079), Test Acc (0.8275)
Model 16: Final Loss (0.437318), Test Acc (0.7700)
Model 17: Final Loss (0.425649), Test Acc (0.8025)
Model 18: Final Loss (0.461457), Test Acc (0.7525)
Model 19: Final Loss (0.495514), Test Acc

In [7]:
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
ensemble_acc, ensemble_preds, ensemble_labels = eval_ensemble(list_of_models, test_loader)

In [8]:
ensemble_acc

0.8725

In [9]:
# # without best model
# list_of_models2 = [list_of_models[i] for i in range(20) if i != 18]

In [10]:
# test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
# ensemble_acc, ensemble_preds, ensemble_labels = eval_ensemble(list_of_models2, test_loader)

In [1]:
# ensemble_acc