# Neural Network Model 4
by Wenying Wu 14007025

Prerequisite:
- Docker

Sections:
- EDA
- Data Preperation

**NOTE:** This model is created because the previous versions are not performing well. And I found Akshaj Verma's article on https://towardsdatascience.com/pytorch-tabular-multiclass-classification-9f8211a123ab. He used Weighted Sampling when splitting dataset and a more complex neural network architecture than the one in lab 3. This notebook and data_prep_v3.ipynb are following the methodology Akshaj used to experiment the result. 

## 1. Load and Custom Datasets

### 1.1 Load datasets

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.data.sets import load_sets
X_train, y_train, X_val, y_val, X_test, y_test = load_sets()

In [3]:
from src.models.pytorch import ClassifierDataset
import torch
train_dataset = ClassifierDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long())
val_dataset = ClassifierDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).long())
test_dataset = ClassifierDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).long())

### 1.2 Weighted Sampling

Though stratified split was used to create the train, validation, and test sets, given the class imbalance is significant and there are 104 classes in this dataset. I decided to use WeightedRandomSampler to over-sample the classes with less entries

#### 1.2.1 Obtain target_list containning all outputs. Then convert it to a tensor.

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
target_list = []
for _, t in train_dataset:
    target_list.append(t)
    
target_list = torch.tensor(target_list)

#### 1.2.2 Obtain count of all classes in training set. Then examine weights

Read dictionaries to enable get_class_distribution() function

In [6]:
from src.data.data_ast2 import read_dicts
brewery_name_dict, beer_style_dict = read_dicts()

In [7]:
from src.data.data_ast2 import get_class_distribution
class_count = [i for i in get_class_distribution(y_train, beer_style_dict).values()]
class_weights = 1./torch.tensor(class_count, dtype=torch.float) 
print(class_weights)

tensor([2.2962e-04, 5.7537e-05, 3.9994e-05, 1.9157e-04, 6.3460e-05, 1.5165e-04,
        1.5415e-04, 6.9027e-05, 1.4749e-03, 1.9632e-05, 3.0845e-04, 3.3517e-05,
        1.4744e-05, 4.3802e-04, 2.8755e-05, 1.9916e-04, 7.2939e-05, 3.5805e-05,
        7.3432e-05, 5.3206e-05, 9.9147e-05, 1.4584e-04, 2.6350e-04, 1.3539e-04,
        8.8051e-05, 4.4661e-05, 5.3447e-05, 5.2966e-04, 1.6077e-03, 2.4931e-04,
        7.4074e-04, 1.5083e-04, 1.6694e-03, 4.2644e-04, 8.0515e-04, 3.6075e-04,
        1.3742e-04, 8.0444e-05, 3.9293e-04, 8.5771e-05, 2.5113e-04, 6.1767e-04,
        1.2442e-04, 2.0190e-04, 9.2558e-05, 7.8927e-04, 1.0779e-04, 7.2929e-05,
        2.6738e-03, 1.5228e-04, 5.9172e-04, 3.5336e-04, 3.7793e-04, 9.4796e-05,
        6.2500e-04, 1.0319e-04, 2.7933e-03, 3.4048e-04, 2.5478e-04, 2.8225e-04,
        5.2157e-05, 7.8691e-05, 2.5707e-03, 2.8321e-04, 6.9930e-03, 6.4103e-05,
        1.6611e-04, 1.4104e-04, 2.2655e-04, 1.1062e-03, 6.7522e-04, 7.7942e-04,
        7.4074e-03, 2.1501e-04, 1.6327e-

In [8]:
from torch.utils.data import WeightedRandomSampler, DataLoader
class_weights_all = class_weights[target_list]
weighted_sampler = WeightedRandomSampler(
                                        weights=class_weights_all,
                                        num_samples=len(class_weights_all),
                                        replacement=True
                                       )

## 2. Define Model parameters and Model 

### 2.1 Define Model Parameters

In [9]:
EPOCHS = 200
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_FEATURES = 6
NUM_CLASSES = 104

### 2.2 Initiate Dataloader

In [10]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          sampler=weighted_sampler
                         )
val_loader = DataLoader(dataset=val_dataset, batch_size=1)
test_loader = DataLoader(dataset=test_dataset, batch_size=1)

### 2.3 Define Neural Net Architecture

Modified from Akshaj Verma

In [10]:
class MulticlassClassification(nn.Module):
    def __init__(self, num_feature, num_class):
        super(MulticlassClassification, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 256)
        self.layer_3 = nn.Linear(256, 128)
        self.layer_out = nn.Linear(128, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(256)
        self.batchnorm3 = nn.BatchNorm1d(128)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x


Check for GPU availablity

In [11]:
from src.models.pytorch import get_device
device = get_device()

In [12]:
model = MulticlassClassification(num_feature = NUM_FEATURES, num_class=NUM_CLASSES)
model.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
print(model)

MulticlassClassification(
  (layer_1): Linear(in_features=6, out_features=512, bias=True)
  (layer_2): Linear(in_features=512, out_features=256, bias=True)
  (layer_3): Linear(in_features=256, out_features=128, bias=True)
  (layer_out): Linear(in_features=128, out_features=104, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


## 3. Train the model

Define 2 dictionaries that will store the accuracy/epoch and loss/epoch for both train and validation sets.

In [13]:
accuracy_stats = {'train': [], "val": []}
loss_stats = {'train': [], "val": []}

Baseline Model

In [20]:
from src.models.null import NullModel
baseline_model = NullModel(target_type='classification')
y_base = baseline_model.fit_predict(y_train)

In [21]:
from src.models.performance import print_class_perf
print_class_perf(y_base, y_train, set_name='Training', average='weighted')

Accuracy Training: 0.07451090868679588
F1 Training: 0.010333772265033979


Define Architecture

This model tests trial and error for different neural network architectures

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [35]:
class PytorchMultiClass(nn.Module):
    def __init__(self, num_features):
        super(PytorchMultiClass, self).__init__()
        
        self.fc1 = nn.Linear(num_features, 104)
        self.fc2 = nn.Linear(104, 104)
      
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
    
model = PytorchMultiClass(X_train.shape[1])

In [14]:
from src.models.pytorch import get_device

device = get_device()
model.to(device)

MulticlassClassification(
  (layer_1): Linear(in_features=6, out_features=512, bias=True)
  (layer_2): Linear(in_features=512, out_features=256, bias=True)
  (layer_3): Linear(in_features=256, out_features=128, bias=True)
  (layer_out): Linear(in_features=128, out_features=104, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

Train Model

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

In [16]:
N_EPOCHS = 30
BATCH_SIZE = 10000

In [18]:
#use train_classification, test_classification defined in lab5
from src.models.pytorch import train_classification, test_classification

for epoch in range(N_EPOCHS):
    train_loss, train_acc = train_classification(train_dataset, model=model, criterion=criterion, optimizer=optimizer, batch_size=BATCH_SIZE, device=device, scheduler=scheduler, accuracy_stats=accuracy_stats, loss_stats=loss_stats, sampler=weighted_sampler, shuffle=False)
    valid_loss, valid_acc = test_classification(val_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device, accuracy_stats=accuracy_stats, loss_stats=loss_stats)

    print(f'Epoch: {epoch}')
    print(f'\t(train)\t|\tLoss: {train_loss:.4f}\t|\tAcc: {train_acc * 100:.1f}%')
    print(f'\t(valid)\t|\tLoss: {valid_loss:.4f}\t|\tAcc: {valid_acc * 100:.1f}%')

Epoch: 0
	(train)	|	Loss: 0.0004	|	Acc: 10.0%
	(valid)	|	Loss: 0.0004	|	Acc: 9.7%
Epoch: 1
	(train)	|	Loss: 0.0003	|	Acc: 16.2%
	(valid)	|	Loss: 0.0004	|	Acc: 9.8%
Epoch: 2
	(train)	|	Loss: 0.0003	|	Acc: 19.1%
	(valid)	|	Loss: 0.0004	|	Acc: 12.7%
Epoch: 3
	(train)	|	Loss: 0.0003	|	Acc: 21.1%
	(valid)	|	Loss: 0.0003	|	Acc: 12.8%
Epoch: 4
	(train)	|	Loss: 0.0003	|	Acc: 22.4%
	(valid)	|	Loss: 0.0003	|	Acc: 13.9%
Epoch: 5
	(train)	|	Loss: 0.0003	|	Acc: 23.2%
	(valid)	|	Loss: 0.0003	|	Acc: 14.6%
Epoch: 6
	(train)	|	Loss: 0.0003	|	Acc: 24.0%
	(valid)	|	Loss: 0.0003	|	Acc: 15.6%
Epoch: 7
	(train)	|	Loss: 0.0003	|	Acc: 24.4%
	(valid)	|	Loss: 0.0003	|	Acc: 14.9%
Epoch: 8
	(train)	|	Loss: 0.0003	|	Acc: 25.2%
	(valid)	|	Loss: 0.0003	|	Acc: 17.3%
Epoch: 9
	(train)	|	Loss: 0.0003	|	Acc: 25.4%
	(valid)	|	Loss: 0.0003	|	Acc: 17.1%
Epoch: 10
	(train)	|	Loss: 0.0003	|	Acc: 26.1%
	(valid)	|	Loss: 0.0003	|	Acc: 17.3%
Epoch: 11
	(train)	|	Loss: 0.0003	|	Acc: 26.3%
	(valid)	|	Loss: 0.0003	|	Acc: 16.8%
Epoc

In [23]:
torch.save(model, "../models/pytorch_nn_v4.pt")

In [24]:
test_loss, test_acc = test_classification(test_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)
print(f'\tLoss: {test_loss:.4f}\t|\tAccuracy: {test_acc:.2f}')

	Loss: 0.0003	|	Accuracy: 0.3


Model1 Potential Reasons to low score:
1. Imbalanced data
2. Only 1 hidden layer constructed
3. Too many classes

Fully connected layer seems working better, 1 hidden layer -> train/test 28%

In [None]:
from src.models.pytorch import ClassifierDataset
import torch
train_dataset = ClassifierDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long())
val_dataset = ClassifierDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).long())
test_dataset = ClassifierDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).long())

Define 2 dictionaries that will store the accuracy/epoch and loss/epoch for both train and validation sets.

In [21]:
from src.models.pytorch import multi_acc
from tqdm.notebook import tqdm

In [22]:
accuracy_stats = {'train': [], "val": []}
loss_stats = {'train': [], "val": []}

In [23]:
train_dataset.y_data

tensor([65, 44, 25,  ..., 13, 12, 29])

In [24]:
print("Begin training.")
for e in tqdm(range(1, EPOCHS+1)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    model.train()
    for X_train_batch, y_train_batch in train_loader:
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        optimizer.zero_grad()
        
        y_train_pred = model(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        model.eval()
        for X_val_batch, y_val_batch in val_loader:
            X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
            
            y_val_pred = model(X_val_batch)
                        
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = multi_acc(y_val_pred, y_val_batch)
            
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['val'].append(val_epoch_loss/len(val_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['val'].append(val_epoch_acc/len(val_loader))
                              
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(val_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Val Acc: {val_epoch_acc/len(val_loader):.3f}')

Begin training.


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))

Epoch 001: | Train Loss: 2.26243 | Val Loss: 5.08217 | Train Acc: 10.089| Val Acc: 1.763
Epoch 002: | Train Loss: 2.04585 | Val Loss: 4.98489 | Train Acc: 12.509| Val Acc: 2.120
Epoch 003: | Train Loss: 1.96061 | Val Loss: 4.90544 | Train Acc: 13.433| Val Acc: 2.596
Epoch 004: | Train Loss: 1.90892 | Val Loss: 4.92945 | Train Acc: 14.038| Val Acc: 2.339
Epoch 005: | Train Loss: 1.87645 | Val Loss: 4.96554 | Train Acc: 14.567| Val Acc: 2.744


KeyboardInterrupt: 