In [1]:
import sys
from typing import Dict

import torch
from torch import nn
from torch.utils.data import DataLoader

try:
    from google.colab import drive

    IN_COLAB = True

    !pip3 install ax-platform
    !pip3 install tqdm

    drive.mount('/content/drive')
    path = "/content/drive/My Drive/Colab Notebooks/"


    # for python imports from google drive
    sys.path.append(path)
except:
    IN_COLAB = False
    path = "./"

from ax.service.managed_loop import optimize

from utils.datasets import Characters as TextDataset
from utils import Trainer, print_cuda_info, is_notebook
from models.baseline import BaselineNetwork

if is_notebook() or IN_COLAB:
    from tqdm import tqdm_notebook as tqdm
    print("running in notebook and/or colab")
    
else:
    from tqdm import tqdm


running in notebook and/or colab


In [2]:
def train(architecture, data_loader: DataLoader, parameters: Dict[str, float], device: torch.device) -> nn.Module:
    epochs = parameters.get("epochs", 1) 
    embedding_size = parameters.get("embedding_size", 16) 
    hidden_size = parameters.get("hidden_size", 128) 
    lr = parameters.get("lr", 0.01)

    samples = len(data_loader.dataset)
    batch_size = data_loader.batch_size
    updates = int((samples * epochs) / batch_size)
     
    print(f"epochs: {epochs} / embedding_size: {embedding_size} / hidden_size: {hidden_size} / lr: {lr:5f}")
     
    model = architecture(
        input_size=parameters.get("input_size", 1.0),
        embedding_size=embedding_size,
        hidden_sizes=[hidden_size],
        output_size=parameters.get("output_size", -1),
        linear_sizes=[],
        layer_norm=False)
     
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=lr)
     
    pbar = tqdm(total=updates)
     
     
    model.to(device)
    criterion.to(device)
     
    model.train()
     
    for e in range(epochs):
        for idx, (inputs, labels) in enumerate(data_loader):
            inputs = inputs.to(device=device)
            labels = labels.to(device=device)
     
            optimizer.zero_grad()
     
            (out) = model(inputs)
            logits = out[0]
     
            pred = logits.view(-1, logits.shape[2])
            true = labels.view(-1)
     
            loss = criterion(pred, true)
            loss.backward()
            optimizer.step()
     
            pbar.set_postfix_str(
                f"epoch: {e + 1}/{epochs} , train_loss: {loss:5f}")
     
            pbar.update()
     
    pbar.close()
     
    return model

In [3]:
def evaluate(model: nn.Module, data_loader: DataLoader, device: torch.device):
    criterion = nn.CrossEntropyLoss()

    model.eval()

    with torch.no_grad():
        losses = []
        # x, y = iter(val_loader).next()
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # TODO find better generic way
            (out) = model(inputs)
            logits = out[0]

            loss = criterion(logits.view(-1, logits.shape[2]), labels.view(-1))

            losses.append(loss)

    losses = torch.stack(losses)

    score = float(torch.mean(losses))

    print(f"validation loss: {score}")

    return score


In [4]:
use_cuda = torch.cuda.is_available()
# use_cuda = False

if use_cuda:
    print_cuda_info()
    torch.cuda.empty_cache()

device = torch.device('cuda' if use_cuda else 'cpu')

project_name = "shakespeare"

seq_length = 100

path_data = path + "projects/" + project_name + "/data/"
path_states = path + "projects/" + project_name + "/states/"
dataset = TextDataset(path_data + "data.txt", seq_length)

train_dataset, valid_dataset = dataset.split()

batch_size_train = 64
batch_size_valid = int(len(valid_dataset) / 4) + 1
train_loader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, pin_memory=use_cuda)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size_valid, shuffle=True, pin_memory=use_cuda)


cuda device: 0 / name: GeForce GTX 1080 Ti / cuda-capability: (6, 1) / memory: 11.0 GB


In [5]:
def train_evaluate(parameterization):
    model = train(BaselineNetwork, train_loader, parameters=parameterization, device=device)
    return evaluate(model, valid_loader, device)


In [None]:
best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "input_size", "type": "fixed", "value": 1},
        {"name": "embedding_size", "type": "range", "bounds": [22, 128]},
        {"name": "hidden_size", "type": "range", "bounds": [655, 1024]},
        {"name": "output_size", "type": "fixed", "value": len(dataset._vocabulary)},
        {"name": "lr", "type": "range", "bounds": [0.001, 0.01], "log_scale": True},
        {"name": "epochs", "type": "fixed", "value": 2},
    ],
    evaluation_function=train_evaluate,
    objective_name='cross entropy loss',
    minimize=True
)

print(best_parameters)

[INFO 03-03 19:46:45] ax.modelbridge.dispatch_utils: Using Bayesian Optimization generation strategy: GenerationStrategy(name='Sobol+GPEI', steps=[Sobol for 6 arms, GPEI for subsequent arms], generated 0 arm(s) so far). Iterations after 6 will take longer to generate due to model-fitting.
[INFO 03-03 19:46:45] ax.service.managed_loop: Started full optimization with 20 steps.
[INFO 03-03 19:46:45] ax.service.managed_loop: Running optimization trial 1...


epochs: 2 / embedding_size: 122 / hidden_size: 685 / lr: 0.003751



This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`



HBox(children=(FloatProgress(value=0.0, max=1306.0), HTML(value='')))




[INFO 03-03 19:47:47] ax.service.managed_loop: Running optimization trial 2...


validation loss: 1.2351291179656982
epochs: 2 / embedding_size: 112 / hidden_size: 916 / lr: 0.001177


HBox(children=(FloatProgress(value=0.0, max=1306.0), HTML(value='')))

In [None]:
print(values)

({'cross entropy loss': 1.1822179519846951}, {'cross entropy loss': {'cross entropy loss': 2.6354923964628006e-08}})
