In [12]:
import verta
HOST = "http://localhost:3000"

PROJECT_NAME = "MNIST Multiclassification"
EXPERIMENT_NAME = "FC-NN"

In [13]:
from verta import Client

client = Client(HOST)
proj = client.set_project(PROJECT_NAME)
expt = client.set_experiment(EXPERIMENT_NAME)
run = client.set_experiment_run()

connection successfully established
got existing Project: MNIST Multiclassification
got existing Experiment: FC-NN
created new ExperimentRun: Run 28116033024339126236


In [14]:
from __future__ import print_function

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import itertools
import time

import six

import numpy as np
import pandas as pd

from sklearn import datasets

import torch
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as optim
import torch.utils.data as data_utils

In [15]:
data = datasets.load_digits()

X = data['data']
y = data['target']

In [16]:
df = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))),
                  columns=["pixel_{}".format(i) for i in range(X.shape[-1])] + ['digit'])

df.head()

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_55,pixel_56,pixel_57,pixel_58,pixel_59,pixel_60,pixel_61,pixel_62,pixel_63,digit
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4.0


In [17]:
shuffled_idxs = np.random.permutation(len(y))
idxs_train = shuffled_idxs[int(len(shuffled_idxs)/10):]  # last 90%
idxs_val = shuffled_idxs[:int(len(shuffled_idxs)/10)]  # first 10%

X_train, y_train = (torch.tensor(X[idxs_train], dtype=torch.float),
                    torch.tensor(y[idxs_train], dtype=torch.long))
X_val, y_val = (torch.tensor(X[idxs_val], dtype=torch.float),
                torch.tensor(y[idxs_val], dtype=torch.long))

In [18]:
class TrainingDataset(data_utils.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (self.features[idx], self.labels[idx])

In [19]:
hidden_size = 512
run.log_hyperparameter("hidden_size", hidden_size)
dropout = 0.2
run.log_hyperparameter("dropout", dropout)

class Net(nn.Module):
    def __init__(self, num_features=X.shape[1],
                 hidden_size=hidden_size):
        super(Net, self).__init__()
        self.fc      = nn.Linear(num_features, hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.output  = nn.Linear(hidden_size, 10)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)  # flatten non-batch dimensions
        x = func.relu(self.fc(x))
        x = self.dropout(x)
        x = func.softmax(self.output(x), dim=-1)
        return x

In [20]:
model = Net()

criterion = torch.nn.CrossEntropyLoss()
run.log_hyperparameter("loss_fn", "cross entropy")
optimizer = torch.optim.Adam(model.parameters())
run.log_hyperparameter("optimizer", "adam")

num_epochs = 5
run.log_hyperparameter("num_epochs", num_epochs)
batch_size = 32
run.log_hyperparameter("batch_size", batch_size)

In [21]:
dataset = TrainingDataset(X_train, y_train)
dataloader = data_utils.DataLoader(dataset,
                                   batch_size=batch_size,
                                   shuffle=True)

In [22]:
for i_epoch in range(num_epochs):
    for i_batch, (X_batch, y_batch) in enumerate(dataloader):
        model.zero_grad()  # reset model gradients

        output = model(X_batch)  # conduct forward pass

        loss = criterion(output, y_batch)  # compare model output w/ ground truth
        
        print("\repoch {}/{} | ".format(i_epoch+1, num_epochs), end='')
        print("iteration {}/{} | ".format(i_batch+1, len(dataloader)), end='')
        print("epoch loss avg: {}".format(loss.item()), end='')

        loss.backward()  # backpropogate loss to calculate gradients
        optimizer.step()  # update model weights
    with torch.no_grad():  # no need to calculate gradients when assessing accuracy
        print()
        
        pred_train = model(X_train).numpy().argmax(axis=1)
        train_acc = (pred_train == y_train.numpy()).mean()
        print("Training accuracy: {}".format(train_acc))
        run.log_observation("train_acc", train_acc)
        
        pred_val = model(X_val).numpy().argmax(axis=1)
        val_acc = (pred_val == y_val.numpy()).mean()
        print("Validation accuracy: {}".format(val_acc))
        run.log_observation("val_acc", val_acc)

epoch 1/5 | iteration 51/51 | epoch loss avg: 1.7983374595642095
Training accuracy: 0.7447466007416563
Validation accuracy: 0.7318435754189944
epoch 2/5 | iteration 51/51 | epoch loss avg: 1.6461865901947021
Training accuracy: 0.8702101359703337
Validation accuracy: 0.8491620111731844
epoch 3/5 | iteration 51/51 | epoch loss avg: 1.4948086738586426
Training accuracy: 0.9610630407911002
Validation accuracy: 0.9776536312849162
epoch 4/5 | iteration 51/51 | epoch loss avg: 1.4719872474670417epoch loss avg: 1.5479751825332642
Training accuracy: 0.9734239802224969
Validation accuracy: 0.9608938547486033
epoch 5/5 | iteration 51/51 | epoch loss avg: 1.5179731845855713
Training accuracy: 0.980840543881335
Validation accuracy: 0.9776536312849162


In [33]:
from verta.utils import ModelAPI
model_api = ModelAPI(X_train.tolist(), model(X_train).tolist())
requirements = ["torch"]

# save and log model
run.log_model(model, model_api=model_api)
run.log_requirements(requirements)

upload complete (custom_modules)
upload complete (model.pkl)
upload complete (model_api.json)
upload complete (requirements.txt)


In [34]:
repo = client.set_repository('MNIST Multiclassification')
commit = repo.get_commit(branch='master')

set existing Repository: MNIST Multiclassification from personal workspace


In [None]:
from verta.code import Notebook
code_version = Notebook()
commit.update('code', code_version)
commit.save("Capture model artifacts")

<IPython.core.display.Javascript object>

In [29]:
code_version

Notebook Version
    MNIST.ipynb
        16770 bytes
        last modified: 2020-10-21 23:18:51.777000
        MD5 checksum: c8b6cd1021cd211b9192bf4761e8f472
    Git Version
        dirty commit 50763abf8b426394442681a3df79b09c6e8d436e
        on branch master
        in repo https://github.com/adhikari23/sample.git

SyntaxError: invalid syntax (<ipython-input-31-1bfda9cd8331>, line 1)