In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd

from GOLDataset import generateDataset
from GOLCNN import OPNet, train_epoch, test_model
from MinimalSolution import MinNet

device = "cuda"

In [2]:
# Seed everything for reproducibility
# seed = 11 for carl and denise m=16, n=2
# seed = 12 for ethan and fred m=8, n=2
# seed = 13 for greg and harold m=8, n=2
seed = 13
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7fd191b8a610>

In [3]:
# Ensure test_model() works on the minimal solution CNN
dataset_size = 1000
dataloader = generateDataset(dataSetSize=dataset_size, size=32, n_steps=3)
min_model = MinNet(3)
min_model.to(device)
criterion = nn.MSELoss()
acc, epoch_test_loss, num_correct, num_wrong = test_model(min_model, dataloader, 1, criterion)
print(f'Accuracy: {acc}, Test Loss: {epoch_test_loss}, Correct: {num_correct}/{dataset_size}, Incorrect: {num_wrong}/{dataset_size}')

Accuracy: 1.0, Test Loss: 3.1796076216035555e-18, Correct: 1000/1000, Incorrect: 0/1000


In [4]:
# Data parameters
dataset_size = 1000
datapoint_size = 32

# Training Parameters
learning_rate = 1e-3
batch_size_param = 1
epochs = 1500
era2epochs = 0
checkpoint_rate = 100

m = 8 # Overparameterization Factor
n = 2  # Steps of GOL simulation

model_amber = OPNet(m, n)
model_brian = OPNet(m, n)

criterion_amber = nn.MSELoss()
criterion_brian = nn.MSELoss()
optimizer_amber = torch.optim.SGD(model_amber.parameters(), lr=learning_rate)
optimizer_brian = torch.optim.SGD(model_brian.parameters(), lr=learning_rate)

In [5]:
model_amber.to(device)
model_brian.to(device)
print('models loaded to device')

models loaded to device


In [6]:
full_data_amber = []
full_data_brian = []
checkpoint_data_amber = []
checkpoint_data_brian = []

for t in range(1, epochs + 1):
    dataloader = generateDataset(dataSetSize=dataset_size, 
                                 size=datapoint_size, 
                                 n_steps=n)
    
    epoch_train_loss_amber = train_epoch(model_amber, optimizer_amber, criterion_amber, dataloader, m)
    full_data_amber.append([t, epoch_train_loss_amber])
    
    epoch_train_loss_brian = train_epoch(model_brian, optimizer_brian, criterion_brian, dataloader, m)
    full_data_brian.append([t, epoch_train_loss_brian])
    
    if t % checkpoint_rate == 0:
        acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber = test_model(model_amber, dataloader, m, criterion_amber)
        checkpoint_name_amber = f'greg_m{m}_n{n}_checkpoint{t}.pt'
        checkpoint_data_amber.append([t, checkpoint_name_amber, acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber])
        print(f'Greg: Epoch: {t}/{epochs}, Test Loss: {epoch_test_loss_amber}, Incorrect: {num_wrong_amber}/1000 examples')
        torch.save(model_amber, f'./models/{checkpoint_name_amber}')
        
        acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian = test_model(model_brian, dataloader, m, criterion_brian)
        checkpoint_name_brian = f'harold_m{m}_n{n}_checkpoint{t}.pt'
        checkpoint_data_brian.append([t, checkpoint_name_brian, acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian])
        print(f'Harold: Epoch: {t}/{epochs}, Test Loss: {epoch_test_loss_brian}, Incorrect: {num_wrong_brian}/1000 examples')
        torch.save(model_amber, f'./models/{checkpoint_name_brian}')
        
print("END OF ERA 1")

# optimizer_amber = torch.optim.SGD(model_amber.parameters(), lr=learning_rate*0.1)
# optimizer_brian = torch.optim.SGD(model_brian.parameters(), lr=learning_rate*0.1)

# for t in range(epochs + 1, epochs+era2epochs+1):
#     dataloader = generateDataset(dataSetSize=dataset_size, 
#                                  size=datapoint_size, 
#                                  n_steps=n)
    
#     epoch_train_loss_amber = train_epoch(model_amber, optimizer_amber, criterion_amber, dataloader, m)
#     full_data_amber.append([t, epoch_train_loss_amber])
    
#     epoch_train_loss_brian = train_epoch(model_brian, optimizer_brian, criterion_brian, dataloader, m)
#     full_data_brian.append([t, epoch_train_loss_brian])
    
#     if t % checkpoint_rate == 0:
#         acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber = test_model(model_amber, dataloader, m, criterion_amber)
#         checkpoint_name_amber = f'ethan_m{m}_n{n}_checkpoint{t}.pt'
#         checkpoint_data_amber.append([t, checkpoint_name_amber, acc_amber, epoch_test_loss_amber, num_correct_amber, num_wrong_amber])
#         print(f'Ethan: Epoch: {t}/{epochs+era2epochs}, Test Loss: {epoch_test_loss_amber}, Incorrect: {num_wrong_amber}/1000 examples')
#         torch.save(model_amber, f'./models/{checkpoint_name_amber}')
        
#         acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian = test_model(model_brian, dataloader, m, criterion_brian)
#         checkpoint_name_brian = f'fred_m{m}_n{n}_checkpoint{t}.pt'
#         checkpoint_data_brian.append([t, checkpoint_name_brian, acc_brian, epoch_test_loss_brian, num_correct_brian, num_wrong_brian])
#         print(f'Fred: Epoch: {t}/{epochs+era2epochs}, Test Loss: {epoch_test_loss_brian}, Incorrect: {num_wrong_brian}/1000 examples')
#         torch.save(model_amber, f'./models/{checkpoint_name_brian}')
        
# print("END OF ERA 2")
print("DONE!")

Greg: Epoch: 100/1500, Test Loss: 0.1765715330839157, Incorrect: 1000/1000 examples
Harold: Epoch: 100/1500, Test Loss: 0.17821846902370453, Incorrect: 1000/1000 examples
Greg: Epoch: 200/1500, Test Loss: 0.17261932790279388, Incorrect: 1000/1000 examples
Harold: Epoch: 200/1500, Test Loss: 0.16698187589645386, Incorrect: 1000/1000 examples
Greg: Epoch: 300/1500, Test Loss: 0.16792279481887817, Incorrect: 1000/1000 examples
Harold: Epoch: 300/1500, Test Loss: 0.15850381553173065, Incorrect: 1000/1000 examples
Greg: Epoch: 400/1500, Test Loss: 0.15910843014717102, Incorrect: 1000/1000 examples
Harold: Epoch: 400/1500, Test Loss: 0.1475035697221756, Incorrect: 1000/1000 examples
Greg: Epoch: 500/1500, Test Loss: 0.1488684117794037, Incorrect: 1000/1000 examples
Harold: Epoch: 500/1500, Test Loss: 0.13822147250175476, Incorrect: 1000/1000 examples
Greg: Epoch: 600/1500, Test Loss: 0.14163683354854584, Incorrect: 1000/1000 examples
Harold: Epoch: 600/1500, Test Loss: 0.12569941580295563, I

In [7]:
df_full_data_amber = pd.DataFrame(full_data_amber, columns =['epoch', 'training_loss'])
df_full_data_brian = pd.DataFrame(full_data_brian, columns =['epoch', 'training_loss'])

df_checkpoint_data_amber = pd.DataFrame(checkpoint_data_amber, columns =['epoch', 'checkpoint_name', 'accuracy', 'test_loss', 'num_correct', 'num_wrong'])
df_checkpoint_data_brian = pd.DataFrame(checkpoint_data_brian, columns =['epoch', 'checkpoint_name', 'accuracy', 'test_loss', 'num_correct', 'num_wrong'])

In [8]:
df_full_data_amber.to_csv('./logs/greg_full_data.csv')
df_full_data_brian.to_csv('./logs/harold_full_data.csv')

df_checkpoint_data_amber.to_csv('./logs/greg_checkpoint_data.csv')
df_checkpoint_data_brian.to_csv('./logs/harold_checkpoint_data.csv')