In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn,optim
from torch.utils.data import Dataset, DataLoader

import copy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from scipy.stats import mode

from generic_data import GenericDataset
from SimpleNNs import TwoNetBC
from model_helper import train_epoch, eval_model, eval_ensemble

In [2]:
Xtrain = np.load("./data/Xtrain.npy")
Ytrain = np.load("./data/Ytrain.npy")
Xtest = np.load("./data/Xtest.npy")
Ytest = np.load("./data/Ytest.npy")

In [3]:
train_dataset = GenericDataset(Xtrain, Ytrain)
test_dataset = GenericDataset(Xtest, Ytest)

In [4]:
# Define model parameters
input_size = 4
output_size = 1
hidden_size = 4
shuffle = True

# Define training parameters
epochs = 100
epochs_fine_tune = 100
lr = 0.01
batch_size = 64

# Device parameters
device = "cuda"

# Ensemble parameters
num_models = 20

In [5]:
base_model=TwoNetBC(input_size, output_size, hidden_size)
optimizer = optim.Adam(base_model.parameters(), lr=lr, betas=(0.9, 0.999))
criterion = nn.BCELoss()
train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=shuffle)
base_model.to(device)
    
e_losses = []
    
# Train model
for epoch in range(epochs):
    loss = train_epoch(base_model, optimizer, criterion, train_loader, device, batch_size)
    e_losses.append(loss)
        
# Evaluate model
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=shuffle)
base_acc, _, _ = eval_model(base_model, test_loader)

In [6]:
base_acc

0.8025

In [7]:
e_losses[-1]

0.44323885

In [8]:
list_of_models = []

In [9]:
# Fine tune 20 models starting from base
for i in range(num_models):
    # Initialize new model
    model = copy.deepcopy(base_model)
    # Perturb model first layer weights
    model_state = model.state_dict()
    epsilon = 1e-3
    layer = 'linear_relu_stack.0.weight'
    layer_shape = torch.Tensor.size(model_state[layer])
    perturbation_tensor = 1 + torch.rand(layer_shape).cuda() * epsilon
    perturbation_tensor = perturbation_tensor.to(device)
    model_state[layer] *= perturbation_tensor
    # Load perturbed weights
    model.load_state_dict(model_state)
    
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))
    criterion = nn.BCELoss()
    train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=shuffle)
    model.to(device)
    
    e_losses = []
    
    # Train model
    for epoch in range(epochs_fine_tune):
        loss = train_epoch(model, optimizer, criterion, train_loader, device, batch_size)
        e_losses.append(loss)
        
    # Evaluate model
    test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=shuffle)
    acc, _, _ = eval_model(model, test_loader)
    
    # Add model to list
    list_of_models.append(model)
    print(f"Model {i}: Final Loss ({e_losses[-1]:.6f}), Test Acc ({acc:.4f})")
    

Model 0: Final Loss (0.431512), Test Acc (0.7975)
Model 1: Final Loss (0.438742), Test Acc (0.8100)
Model 2: Final Loss (0.435259), Test Acc (0.8225)
Model 3: Final Loss (0.433974), Test Acc (0.8025)
Model 4: Final Loss (0.432003), Test Acc (0.8050)
Model 5: Final Loss (0.436409), Test Acc (0.8100)
Model 6: Final Loss (0.436230), Test Acc (0.8100)
Model 7: Final Loss (0.434158), Test Acc (0.8100)
Model 8: Final Loss (0.433661), Test Acc (0.7975)
Model 9: Final Loss (0.434789), Test Acc (0.8175)
Model 10: Final Loss (0.437999), Test Acc (0.8100)
Model 11: Final Loss (0.445013), Test Acc (0.8025)
Model 12: Final Loss (0.441832), Test Acc (0.7975)
Model 13: Final Loss (0.435737), Test Acc (0.7925)
Model 14: Final Loss (0.432838), Test Acc (0.8000)
Model 15: Final Loss (0.435175), Test Acc (0.8075)
Model 16: Final Loss (0.437639), Test Acc (0.8100)
Model 17: Final Loss (0.434472), Test Acc (0.8100)
Model 18: Final Loss (0.438728), Test Acc (0.8075)
Model 19: Final Loss (0.439798), Test Acc

In [10]:
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
ensemble_acc, ensemble_preds, ensemble_labels = eval_ensemble(list_of_models, test_loader)

In [11]:
ensemble_acc

0.8

In [12]:
# without best model
list_of_models2 = [list_of_models[i] for i in range(20) if i != 18]

In [13]:
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
ensemble_acc, ensemble_preds, ensemble_labels = eval_ensemble(list_of_models2, test_loader)

In [14]:
ensemble_acc

0.8025