Prep Dataset

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.data.sets import load_sets

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_sets()

In [4]:
from src.models.pytorch import PytorchDataset

train_dataset = PytorchDataset(X=X_train, y=y_train)
val_dataset = PytorchDataset(X=X_val, y=y_val)
test_dataset = PytorchDataset(X=X_test, y=y_test)

Baseline Model

In [5]:
from src.models.null import NullModel
baseline_model = NullModel(target_type='classification')
y_base = baseline_model.fit_predict(y_train)

In [6]:
from src.models.performance import print_class_perf
print_class_perf(y_base, y_train, set_name='Training', average='weighted')

Accuracy Training: 0.07444192974099043
F1 Training: 0.010315310209270104


Define Architecture

This versin uses the model modified from lab5

In [7]:
from src.models.pytorch import PytorchMultiClass
model = PytorchMultiClass(X_train.shape[1])

In [8]:
from src.models.pytorch import get_device

device = get_device()
model.to(device)

PytorchMultiClass(
  (layer_1): Linear(in_features=6, out_features=104, bias=True)
  (layer_out): Linear(in_features=104, out_features=104, bias=True)
  (softmax): Softmax(dim=1)
)

Train Model

In [10]:
import torch
import torch.nn as nn

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

In [12]:
N_EPOCHS = 30
BATCH_SIZE = 1000

In [13]:
#use train_classification, test_classification defined in lab5
from src.models.pytorch import train_classification, test_classification

for epoch in range(N_EPOCHS):
    train_loss, train_acc = train_classification(train_dataset, model=model, criterion=criterion, optimizer=optimizer, batch_size=BATCH_SIZE, device=device, scheduler=scheduler)
    valid_loss, valid_acc = test_classification(val_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)

    print(f'Epoch: {epoch}')
    print(f'\t(train)\t|\tLoss: {train_loss:.4f}\t|\tAcc: {train_acc * 100:.1f}%')
    print(f'\t(valid)\t|\tLoss: {valid_loss:.4f}\t|\tAcc: {valid_acc * 100:.1f}%')

Epoch: 0
	(train)	|	Loss: 0.0045	|	Acc: 15.7%
	(valid)	|	Loss: 0.0045	|	Acc: 16.5%
Epoch: 1
	(train)	|	Loss: 0.0045	|	Acc: 16.3%
	(valid)	|	Loss: 0.0045	|	Acc: 16.5%
Epoch: 2
	(train)	|	Loss: 0.0045	|	Acc: 16.3%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 3
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.5%
Epoch: 4
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 5
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 6
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 7
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 8
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 9
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 10
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Epoch: 11
	(train)	|	Loss: 0.0045	|	Acc: 16.4%
	(valid)	|	Loss: 0.0045	|	Acc: 16.6%
Ep

KeyboardInterrupt: 

In [13]:
torch.save(model, "../models/pytorch_nn_v1.pt")

In [14]:
test_loss, test_acc = test_classification(test_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)
print(f'\tLoss: {test_loss:.4f}\t|\tAccuracy: {test_acc:.1f}')

	Loss: 0.0005	|	Accuracy: 0.2


Potential Reasons to low score:
1. Imbalanced data
2. Only 1 hidden layer constructed
3. Label encoder used instead of ordinal encoder, for scores maybe ordinal encoder is better.
4. Too many classes