# Single FC-NN (PyTorch) Experiment Versioning

<a href="https://colab.research.google.com/github/VertaAI/modeldb/blob/master/client/workflows/demos/PyTorch-Experiment-Versioning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This example is based on our [basic PyTorch example](../examples/pytorch.ipynb).

The example features:
- Single FC-NN (PyTorch) model 
- Experiment tracking and versioning with Verta

# Basic Verta Setup

In [1]:
# restart your notebook if prompted on Colab
try:
    import verta
except ImportError:
    !pip install verta

In [2]:
HOST = "app.verta.ai"

PROJECT_NAME = "Census Income Classification"
EXPERIMENT_NAME = "Single FC-NN"
WORKSPACE = "XXXXXX"

In [3]:
import os
os.environ['VERTA_EMAIL'] = 'XXXXXXXXXXXX'
os.environ['VERTA_DEV_KEY'] = 'XXXXXXXXXXXXXXXXXXXXXXXX'

## Imports

In [4]:
from __future__ import print_function

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import itertools
import time

import numpy as np
import pandas as pd

from sklearn import datasets

import torch
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as optim
import torch.utils.data as data_utils

In [5]:
try:
    import wget
except ImportError:
    !pip install wget  # you may need pip3
    import wget

# Phase 1: Model Development

This section demonstrates logging model metadata and training artifacts to ModelDB.

## Instantiate Client

In [6]:
from verta import Client

client = Client(HOST)
proj = client.set_project(PROJECT_NAME, workspace=WORKSPACE, public_within_org=True)
expt = client.set_experiment(EXPERIMENT_NAME)

## Prepare Data

In [7]:
from verta.dataset import S3

dataset = client.set_dataset(name="Census Income S3")
dataset_version = dataset.create_version(S3("s3://verta-starter"))

In [8]:
train_data_url = "http://s3.amazonaws.com/verta-starter/census-train.csv"
train_data_filename = wget.detect_filename(train_data_url)
if not os.path.isfile(train_data_filename):
    wget.download(train_data_url)

test_data_url = "http://s3.amazonaws.com/verta-starter/census-test.csv"
test_data_filename = wget.detect_filename(test_data_url)
if not os.path.isfile(test_data_filename):
    wget.download(test_data_url)

In [9]:
df_train = pd.read_csv(train_data_filename)
X = df_train.iloc[:,:-1]
y = df_train.iloc[:, -1]

df_train.head()

In [10]:
# gather indices to split training data into training and validation sets
shuffled_idxs = np.random.permutation(len(y))
idxs_train = shuffled_idxs[int(len(shuffled_idxs)/10):]  # last 90%
idxs_val = shuffled_idxs[:int(len(shuffled_idxs)/10)]  # first 10%

X_train, y_train = (torch.tensor(X.values[idxs_train], dtype=torch.float),
                    torch.tensor(y.values[idxs_train], dtype=torch.long))
X_val, y_val = (torch.tensor(X.values[idxs_val], dtype=torch.float),
                torch.tensor(y.values[idxs_val], dtype=torch.long))

In [11]:
# create Dataset object to support batch training
class TrainingDataset(data_utils.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (self.features[idx], self.labels[idx])

## Define Model

In [12]:
class Net(nn.Module):
    def __init__(self, num_features, hidden_size, dropout):
        super(Net, self).__init__()
        self.fc      = nn.Linear(num_features, hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.output  = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)  # flatten non-batch dimensions
        x = func.relu(self.fc(x))
        x = self.dropout(x)
        x = func.softmax(self.output(x), dim=-1)
        return x

## Prepare hyperparameters

In [13]:
hyperparam_candidates = {
    "hidden_size" : [256, 512],
    "dropout" : [0.2, 0.3],
    "loss_fn" : ["cross_entropy"],
    "optimizer" : ["adam"],
    "num_epochs" : [5, 10], 
    "batch_size" : [128, 256]
}
hyperparam_sets = [dict(zip(hyperparam_candidates.keys(), values))
                   for values
                   in itertools.product(*hyperparam_candidates.values())]

## Train models

In [14]:
from verta.environment import Python

def train_model(hyperparam_set):
    run = client.set_experiment_run()
    
    run.log_attributes({
        'library': "pytorch",
        'architecture': "fully-connected",
    })
    
    # log git information
    run.log_code()
    
    # create model and training optimizer
    run.log_hyperparameters(hyperparam_set)
    model = Net(
        num_features=X.shape[1],
        hidden_size=hyperparam_set["hidden_size"],
        dropout=hyperparam_set["dropout"],
    )
    if hyperparam_set["loss_fn"] == "cross_entropy":   
        criterion = torch.nn.CrossEntropyLoss()
    if hyperparam_set["optimizer"] == "adam":
        optimizer = torch.optim.Adam(model.parameters())
    
    # enable batching of training data
    dataloader = data_utils.DataLoader(
        TrainingDataset(X_train, y_train),
        batch_size=hyperparam_set["batch_size"],
        shuffle=True,
    )
    run.log_dataset_version("census_data", dataset_version)  # log dataset metadata
    run.log_training_data(X, y)  # log histogram of training data
    
    for i_epoch in range(hyperparam_set["num_epochs"]):
        for i_batch, (X_batch, y_batch) in enumerate(dataloader):
            model.zero_grad()  # reset model gradients
            
            output = model(X_batch)  # conduct forward pass
            
            loss = criterion(output, y_batch)  # compare model output w/ ground truth
            
            print(
                "\repoch {}/{} | iteration {}/{} | epoch loss avg: {}"
                .format(i_epoch+1, hyperparam_set["num_epochs"], i_batch+1, len(dataloader), loss.item()),
                end=''
            )
            
            loss.backward()  # backpropogate loss to calculate gradients
            optimizer.step()  # update model weights
            
        with torch.no_grad():  # no need to calculate gradients when assessing accuracy
            print()
            
            pred_train = model(X_train).numpy().argmax(axis=1)
            train_acc = (pred_train == y_train.numpy()).mean()
            print("Training accuracy: {}".format(train_acc))
            run.log_observation("train_acc", train_acc)

            pred_val = model(X_val).numpy().argmax(axis=1)
            val_acc = (pred_val == y_val.numpy()).mean()
            print("Validation accuracy: {}".format(val_acc))
            run.log_observation("val_acc", val_acc)
            
            run.log_artifact("epoch_{}_checkpoint".format(i_epoch), model)
    
    with torch.no_grad():  # no need to calculate gradients when assessing accuracy
        pred_train = model(X_train).numpy().argmax(axis=1)
        train_acc = (pred_train == y_train.numpy()).mean()
        print("Training accuracy: {}".format(train_acc))
    run.log_metric("train_acc", train_acc)
    run.log_model(model)
    run.log_requirements(["torch"])

In [15]:
for hyperparams in hyperparam_sets:
    train_model(hyperparams)

---

# Phase 2: Staging

## Retrieve the best run

In [16]:
best_run = proj.expt_runs.sort("metrics.train_acc", descending=True)[0]
print("Training Accuracy: {:.4f}".format(best_run.get_metric("train_acc")))

best_hyperparams = best_run.get_hyperparameters()
print("Hyperparameters: {}".format(best_hyperparams))

## Register the best model

The best-performing model can be staged as a *registered model*, for use downstream.

In [17]:
REGISTERED_MODEL_NAME = "Fully-Connected Census Classifier"
MODEL_VERSION_NAME = "v0"

In [18]:
registered_model = client.get_or_create_registered_model(
    name=REGISTERED_MODEL_NAME,
    workspace=WORKSPACE, public_within_org=True,
)

In [19]:
registered_model.create_version_from_run(best_run.id, name=MODEL_VERSION_NAME)

---

# Phase 3: Deployment

This registered model version can be deployed to an endpoint, whereupon predictions can be made via a REST endpoint or through the client.

In [20]:
registered_model = client.get_registered_model(name=REGISTERED_MODEL_NAME, workspace=WORKSPACE)
model_version = registered_model.get_version(name=MODEL_VERSION_NAME)

## Create and update an endpoint

In [21]:
endpoint = client.get_or_create_endpoint(path="/census", workspace=WORKSPACE, public_within_org=True)

In [22]:
endpoint.update(model_version, wait=True)

## Prepare "live" data

In [23]:
df_test = pd.read_csv(test_data_filename)
X_test = df_test.iloc[:,:-1]

## Query deployed model

In [24]:
model_version

In [25]:
deployed_model = endpoint.get_deployed_model()

for x in itertools.cycle(X_test.sample(frac=1).values.tolist()):
    print(np.around(deployed_model.predict([x]), decimals=8))
    time.sleep(.5)

---

In [26]:
endpoint.delete()
registered_model.delete()
proj.delete()