# Transformer test

__Obejctive:__ test a transformer model on the root inference problem.

In [1]:
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
import torch
from torch import nn

import sys

sys.path.append('../modules/')

from training import train_model
from plotting import plot_training_history
from models import TransformerClassifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Running on device:', device)

%load_ext autoreload
%autoreload 2

Running on device: cuda


In [2]:
# Naive Bayes benchmark
def get_NB_accuracy(x0s,xis_int,train_frac):
    xi_train, xi_test, x0_train, x0_test = train_test_split(xis_int.T, x0s, test_size=1-train_frac, random_state=0)
    gnb = GaussianNB()
    gnb.fit(xi_train, x0_train)
    y_pred = gnb.predict(xi_test)
    return np.sum((x0_test == y_pred))/len(x0_test) 

## Generate data

In [3]:
# Data that will be used
q = 4
l = 4
sigma = 1.0
epsilon = 0.0
seed = 31

N_learn = 500

[q,l,sigma,epsilon,x0s,xis,M_s] = np.load('../data/labeled_data_{}_{}_{}_{:.5f}.npy'.format(q,l,sigma,epsilon),allow_pickle=True)
x0 = x0s[:,seed]
xi = xis[:,:,seed]
train_frac = N_learn/len(x0)
phi_NB = get_NB_accuracy(x0,xi,train_frac)
print('Naives Bayes accuracy: {:.2f}, trained on {} and tested on {} trees.'.format(phi_NB,int(train_frac*len(x0)),int((1-train_frac)*len(x0))))

Naives Bayes accuracy: 0.64, trained on 500 and tested on 9500 trees.


## Data preprocessing

In [None]:
# convert data to be used in pytorch
x_train = torch.from_numpy(xi[:,:N_learn].T).to(device=device).int()
x_test = torch.from_numpy(xi[:,N_learn:].T).to(device=device).int()

# Target labels WITHOUT one-hot encoding.
# y_train = torch.from_numpy(x0[:N_learn]).to(device=device).to(dtype=torch.int64)
# y_test = torch.from_numpy(x0[N_learn:]).to(device=device).to(dtype=torch.int64)

# Target labels WITH one-hot encoding.
y_train = nn.functional.one_hot(torch.from_numpy(x0[:N_learn]).to(dtype=torch.int64), num_classes=q).to(dtype=torch.float32).to(device=device)
y_test = nn.functional.one_hot(torch.from_numpy(x0[N_learn:]).to(dtype=torch.int64), num_classes=q).to(dtype=torch.float32).to(device=device)

## Model training

In [None]:
model = TransformerClassifier(
    seq_len=16,
    embedding_size=128,
    n_tranformer_layers=1,
    n_heads=1,
    n_classes=4,
    embedding_agg='mean'
).to(device=device)

n_params_model = sum(p.numel() for p in model.parameters())

print(f'N params (model): {n_params_model}')

model(x_train)

In [None]:
# Number of epochs
num_epochs = 200

# Define the loss function and the optimizer
loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)

_, training_history = train_model(
    model=model,
    training_data=(x_train, y_train),
    test_data=(x_test, y_test),
    n_epochs=num_epochs,
    loss_fn=loss_fn,
    learning_rate=1e-3,
    batch_size=32,
    early_stopper=None
)

In [None]:
plot_training_history(
    training_history,
    baseline_accuracy={
        'NB': phi_NB,
        'Census': .5772
    }
)

In [5]:
phi_NB

0.6428421052631579