In [1]:
import numpy as np
import utils
import config

import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
BASE_IMAGE = config.BASE_IMAGE
S3_END_POINT = config.S3_END_POINT
S3_ACCESS_ID = config.S3_ACCESS_ID
S3_ACCESS_KEY = config.S3_ACCESS_KEY
bucket_name = config.BUCKET_NAME

In [3]:
get_client = utils.get_client
create_bucket = utils.create_bucket
read_from_store = utils.read_from_store
write_to_store = utils.write_to_store

In [7]:
class Net(nn.Module):
    def __init__(self, n_inputs=2, n_outputs=1, n_hidden_nodes=10, n_hidden_layers=1, activation=nn.ReLU(), output_activation=None):
        super(Net, self).__init__()

        self.layer_list = nn.ModuleList()

        for i in range(n_hidden_layers):
            if i==0:
                self.layer_list.append(nn.Linear(n_inputs, n_hidden_nodes))
            else:
                self.layer_list.append(nn.Linear(n_hidden_nodes, n_hidden_nodes))
        
        self.output_layer = nn.Linear(n_hidden_nodes, n_outputs)

        self.activation = activation
        self.output_activation = output_activation

    def forward(self, x):
        out = x

        for layer in self.layer_list:
            out = self.activation(layer(out))

        out = self.output_layer(out)
        if self.output_activation is not None:
            out = self.output_activation(out)

        return out

In [4]:
def train_model(hyperparam_idx: int, retcode_download: int, N_gridsize: int) -> int:
    '''Look up hyperparams from store
    and train model
    '''

    if hyperparam_idx >= N_gridsize:
        raise ValueError("hyperparam_idx cannot be >= N_gridsize")

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f'Device = {device}')

    client = get_client()

    features_train = torch.from_numpy(read_from_store(bucket_name, 'features_train', client)).float()
    target_train = torch.from_numpy(read_from_store(bucket_name, 'target_train', client)).float()
    features_test = torch.from_numpy(read_from_store(bucket_name, 'features_test', client)).float()
    target_test = torch.from_numpy(read_from_store(bucket_name, 'target_test', client)).float()

    conf = read_from_store(bucket_name, 'hyperparam_grid', client)[hyperparam_idx]
    lr = float(conf.get('lr', 1e-2))
    N_epochs = int(conf.get('N_epochs', 1000))
    num_hidden_layers = int(conf.get('num_hidden_layers', 1))
    num_nodes = int(conf.get('num_nodes', 2))
    activation = conf.get('activation', 'relu')

    #should be dependent on vars read from config
    if activation=='relu':
        activation = nn.ReLU()
    elif activation=='sigmoid':
        activation = nn.Sigmoid()

    model = Net(n_inputs=2, n_outputs=1, n_hidden_nodes=num_nodes, n_hidden_layers=num_hidden_layers, activation=activation, output_activation=nn.Sigmoid())
    #model = nn.Sequential(nn.Linear(2, 10), nn.ReLU(), nn.Linear(10, 1), nn.Sigmoid())
    
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) #Adam optimizer
    model.train()    

    if device!='cpu':
        model = model.to(device)
        features_train = features_train.to(device)
        target_train = target_train.to(device)

    for epoch in range(N_epochs): #N_epochs = number of iterations over the full dataset
        features_shuffled = features_train
        target_shuffled = target_train

        out = model(features_shuffled) #predictions from model
        loss = criterion(out.squeeze(), target_shuffled.squeeze()) #loss between predictions and labels

        if epoch % 1000 == 0:
            print(f'epoch = {epoch} loss = {loss}')

        optimizer.zero_grad()
        loss.backward() #compute gradients
        optimizer.step() #update model

    out = model(features_shuffled) #predictions from model
    train_loss = criterion(out.squeeze(), target_shuffled.squeeze()) #loss between predictions and labels
    print(f'Train Loss : {train_loss}')

    def evaluate_model(model, features_test, target_test):
        '''Evaluate model on test set
        and store result
        '''
        model.eval()

        if device!='cpu':
            features_test = features_test.to(device)
            target_test = target_test.to(device)

        out = model(features_test)
        loss = criterion(out.squeeze(), target_test.squeeze())
        

        return loss

    test_loss = evaluate_model(model, features_test, target_test)
    print(f'Test  Loss : {test_loss}')

    #write_to_store(bucket_name, {'test_loss': test_loss.item(), 'model': model}, f'score_{hyperparam_idx}', client)
    write_to_store(bucket_name, {'test_loss': test_loss.item()}, f'score_{hyperparam_idx}', client)

    return hyperparam_idx


In [8]:
retcode_download = 0
N_gridsize = 18
hyperparam_idx = 0 #this should be a notebook parameter

retcode = train_model(hyperparam_idx, retcode_download, N_gridsize)

Device = cuda:0
epoch = 0 loss = 0.6913952827453613
Train Loss : 0.5236560702323914
Test  Loss : 0.5384036898612976
