In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd

from GOLDataset import generateDataset
from GOLCNN import OPNet, train_epoch, test_model
from MinimalSolution import MinNet

device = "cuda"

In [2]:
# Seed everything for reproducibility
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f2daff9fc90>

In [3]:
# Ensure test_model() works on the minimal solution CNN
dataset_size = 1000
dataloader = generateDataset(dataSetSize=dataset_size, size=32, n_steps=3)
min_model = MinNet(3)
min_model.to(device)
criterion = nn.MSELoss()
acc, epoch_test_loss, num_correct, num_wrong = test_model(min_model, dataloader, 1, criterion)
print(f'Accuracy: {acc}, Test Loss: {epoch_test_loss}, Correct: {num_correct}/{dataset_size}, Incorrect: {num_wrong}/{dataset_size}')

Accuracy: 1.0, Test Loss: 3.1828268017524587e-18, Correct: 1000/1000, Incorrect: 0/1000


In [4]:
# Data parameters
dataset_size = 1000
datapoint_size = 32

# Training Parameters
learning_rate = 1e-3
batch_size_param = 64
epochs = 1500
era2epochs = 500
checkpoint_rate = 100

m = 16 # Overparameterization Factor
n = 2  # Steps of GOL simulation

model_amber = OPNet(m, n)
model_brian = OPNet(m, n)

criterion_amber = nn.MSELoss()
criterion_brian = nn.MSELoss()
optimizer_amber = torch.optim.SGD(model_amber.parameters(), lr=learning_rate)
optimizer_brian = torch.optim.SGD(model_brian.parameters(), lr=learning_rate)

In [5]:
model_amber.to(device)
model_brian.to(device)
print('models loaded to device')

models loaded to device


In [6]:
full_data_amber = []
full_data_brian = []
checkpoint_data_amber = []
checkpoint_data_brian = []

for t in range(1, epochs + 1):
    dataloader = generateDataset(dataSetSize=dataset_size, 
                                 size=datapoint_size, 
                                 n_steps=n, 
                                 batch_size=batch_size_param)
    
    epoch_train_loss_amber = train_epoch(model_amber, optimizer_amber, criterion_amber, dataloader, m)
    full_data_amber.append([t, epoch_train_loss_amber])
    
    epoch_train_loss_brian = train_epoch(model_brian, optimizer_brian, criterion_brian, dataloader, m)
    full_data_brian.append([t, epoch_train_loss_brian])
    
    if t % checkpoint_rate == 0:
        acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber = test_model(model_amber, dataloader, m, criterion_amber)
        checkpoint_name_amber = f'amber_m{m}_n{n}_checkpoint{t}.pt'
        checkpoint_data_amber.append([t, checkpoint_name_amber, acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber])
        print(f'Amber: Epoch: {t}/{epochs}, Test Loss: {epoch_test_loss_amber}, Incorrect: {num_wrong_amber}/1000 examples')
        torch.save(model_amber, f'./models/{checkpoint_name_amber}')
        
        acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian = test_model(model_brian, dataloader, m, criterion_brian)
        checkpoint_name_brian = f'brian_m{m}_n{n}_checkpoint{t}.pt'
        checkpoint_data_brian.append([t, checkpoint_name_brian, acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian])
        print(f'Brian: Epoch: {t}/{epochs}, Test Loss: {epoch_test_loss_brian}, Incorrect: {num_wrong_brian}/1000 examples')
        torch.save(model_amber, f'./models/{checkpoint_name_brian}')
        
print("END OF ERA 1")

optimizer_amber = torch.optim.SGD(model_amber.parameters(), lr=learning_rate*0.1)
optimizer_brian = torch.optim.SGD(model_brian.parameters(), lr=learning_rate*0.1)

for t in range(epochs + 1, epochs+era2epochs+1):
    dataloader = generateDataset(dataSetSize=dataset_size, 
                                 size=datapoint_size, 
                                 n_steps=n, 
                                 batch_size=batch_size_param)
    
    epoch_train_loss_amber = train_epoch(model_amber, optimizer_amber, criterion_amber, dataloader, m)
    full_data_amber.append([t, epoch_train_loss_amber])
    
    epoch_train_loss_brian = train_epoch(model_brian, optimizer_brian, criterion_brian, dataloader, m)
    full_data_brian.append([t, epoch_train_loss_brian])
    
    if t % checkpoint_rate == 0:
        acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber = test_model(model_amber, dataloader, m, criterion_amber)
        checkpoint_name_amber = f'amber_m{m}_n{n}_checkpoint{t}.pt'
        checkpoint_data_amber.append([t, checkpoint_name_amber, acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber])
        print(f'Amber: Epoch: {t}/{epochs+era2epochs}, Test Loss: {epoch_test_loss_amber}, Incorrect: {num_wrong_amber}/1000 examples')
        torch.save(model_amber, f'./models/{checkpoint_name_amber}')
        
        acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian = test_model(model_brian, dataloader, m, criterion_brian)
        checkpoint_name_brian = f'brian_m{m}_n{n}_checkpoint{t}.pt'
        checkpoint_data_brian.append([t, checkpoint_name_brian, acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian])
        print(f'Brian: Epoch: {t}/{epochs+era2epochs}, Test Loss: {epoch_test_loss_brian}, Incorrect: {num_wrong_brian}/1000 examples')
        torch.save(model_amber, f'./models/{checkpoint_name_brian}')
        
print("END OF ERA 2")
print("DONE!")

Amber: Epoch: 100/1000, Test Loss: 0.24755781888961792, Incorrect: 1000/1000 examples
Brian: Epoch: 100/1000, Test Loss: 0.22196242213249207, Incorrect: 1000/1000 examples
Amber: Epoch: 200/1000, Test Loss: 0.22412323951721191, Incorrect: 1000/1000 examples
Brian: Epoch: 200/1000, Test Loss: 0.20985013246536255, Incorrect: 1000/1000 examples
Amber: Epoch: 300/1000, Test Loss: 0.21021032333374023, Incorrect: 1000/1000 examples
Brian: Epoch: 300/1000, Test Loss: 0.20256714522838593, Incorrect: 1000/1000 examples
Amber: Epoch: 400/1000, Test Loss: 0.2017800360918045, Incorrect: 1000/1000 examples
Brian: Epoch: 400/1000, Test Loss: 0.19804072380065918, Incorrect: 1000/1000 examples
Amber: Epoch: 500/1000, Test Loss: 0.19629181921482086, Incorrect: 1000/1000 examples
Brian: Epoch: 500/1000, Test Loss: 0.19474750757217407, Incorrect: 1000/1000 examples
Amber: Epoch: 600/1000, Test Loss: 0.19305594265460968, Incorrect: 1000/1000 examples
Brian: Epoch: 600/1000, Test Loss: 0.1927444040775299, 

KeyboardInterrupt: 

In [None]:
df_full_data_amber = pd.DataFrame(full_data_amber, columns =['epoch', 'training_loss'])
df_full_data_brian = pd.DataFrame(full_data_brian, columns =['epoch', 'training_loss'])

df_checkpoint_data_amber = pd.DataFrame(checkpoint_data_amber, columns =['epoch', 'checkpoint_name', 'accuracy', 'test_loss', 'num_correct', 'num_wrong'])
df_checkpoint_data_brian = pd.DataFrame(checkpoint_data_brian, columns =['epoch', 'checkpoint_name', 'accuracy', 'test_loss', 'num_correct', 'num_wrong'])

In [None]:
df_full_data_amber.to_csv('./logs/amber_full_data.csv')
df_full_data_brian.to_csv('./logs/brian_full_data.csv')

df_checkpoint_data_amber.to_csv('./logs/amber_checkpoint_data.csv')
df_checkpoint_data_brian.to_csv('./logs/brian_checkpoint_data.csv')