# Load Preprocessed Dataset

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
X_valid_bag = pd.read_csv('../data/beer_valid.csv', index_col='index')
y_valid = pd.read_csv('../data/beer_target_valid.csv', index_col='index')
X_train_bag = pd.read_csv('../data/beer_train.csv', index_col='index')
y_train = pd.read_csv('../data/beer_target_train.csv', index_col='index')

In [3]:
print(X_valid_bag.shape)
print(y_valid.shape)
print(X_train_bag.shape)
print(y_train.shape)

(523583, 918)
(523583, 1)
(1063030, 918)
(1063030, 1)


# Find Solution for NN

In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [5]:
X_ttrain, X_test, y_ttrain, y_test = train_test_split(X_train_bag.values, y_train.values, test_size=0.3, random_state=42)

In [6]:
y_ttrain = y_ttrain.reshape(y_ttrain.shape[0])
y_test = y_test.reshape(y_test.shape[0])
y_valid = y_valid.values
y_valid = y_valid.reshape(y_valid.shape[0])

In [7]:
print(X_ttrain)
print(y_ttrain)
print(y_ttrain.max())

[[3.5 3.5 3.  ... 0.  0.  0. ]
 [2.  2.5 2.5 ... 0.  0.  0. ]
 [4.  4.  3.5 ... 0.  0.  0. ]
 ...
 [4.  4.5 4.  ... 0.  0.  0. ]
 [5.  4.5 5.  ... 0.  0.  0. ]
 [4.  4.5 4.  ... 0.  0.  0. ]]
[ 80  54  17 ... 100  26  12]
103


## Build torch dataset

In [8]:
assert not np.any(np.isnan(X_ttrain))
assert not np.any(np.isnan(y_ttrain))
assert not np.any(np.isnan(X_test))
assert not np.any(np.isnan(y_test))

In [9]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# convert a df to tensor to be used in pytorch
def X_to_tensor(df):
    return torch.from_numpy(df).float().to(device)

def y_to_tensor(df):
    return torch.from_numpy(df).long().to(device)

X_train_tensor = X_to_tensor(X_ttrain)
y_train_tensor = y_to_tensor(y_ttrain)

X_test_tensor = X_to_tensor(X_test)
y_test_tensor = y_to_tensor(y_test)

X_valid_tensor = X_to_tensor(X_valid_bag.values)
y_valid_tensor = y_to_tensor(y_valid)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)
valid_ds = TensorDataset(X_valid_tensor, y_valid_tensor)

Using cpu device


In [10]:
del X_ttrain
del X_test

del y_ttrain
del y_test

In [11]:
batch_size = 1280

# Create data loaders.
train_dataloader = DataLoader(train_ds, batch_size=batch_size)
test_dataloader = DataLoader(test_ds, batch_size=batch_size)
valid_dataloader = DataLoader(valid_ds, batch_size=len(valid_ds))

for XX, yy in train_dataloader:
    print(f"Shape of X [N, C, H, W]: {XX.shape}")
    print(f"Shape of y: {yy.shape} {yy.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([1280, 918])
Shape of y: torch.Size([1280]) torch.int64


## Creating Models

In [117]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(len(train_ds[0][0]), 250),
            nn.ReLU(),
            nn.Linear(250, 164),
            nn.ReLU(),
            nn.Linear(164, 164),
            nn.ReLU(),
            nn.Linear(164, 104)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=918, out_features=250, bias=True)
    (1): ReLU()
    (2): Linear(in_features=250, out_features=164, bias=True)
    (3): ReLU()
    (4): Linear(in_features=164, out_features=164, bias=True)
    (5): ReLU()
    (6): Linear(in_features=164, out_features=104, bias=True)
  )
)


## Train the Model

In [118]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.02)

In [119]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.train()
    train_loss = 0
    for batch, (XX, yy) in enumerate(dataloader):
        XX, yy = XX.to(device), yy.to(device)

        # Compute prediction error
        pred = model(XX)
        loss = loss_fn(pred, yy)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss = loss.item()
        train_loss += loss

        if batch % 100 == 0:
            current = (batch + 1) * len(XX)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    return test_loss / num_batches

In [53]:
from sklearn.metrics import f1_score, classification_report

def test(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for XX, yy in dataloader:
            XX, yy = XX.to(device), yy.to(device)
            pred = model(XX)
            test_loss += loss_fn(pred, yy).item()
            # correct += (pred.argmax(1) == yy).type(torch.float).sum().item()
            correct += f1_score(yy, pred.argmax(1), average='weighted')
    test_loss /= num_batches
    correct /= num_batches
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return test_loss, correct

In [121]:
losses = []
test_losses = []
accs = []

epochs = 50
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    losses.append(train(train_dataloader, model, loss_fn, optimizer))
    test_loss, acc = test(test_dataloader, model, loss_fn)

    accs.append(acc)
    test_losses.append(test_loss)
print("Done!")

Epoch 1
-------------------------------
loss: 4.651258  [ 1280/744121]
loss: 4.590582  [129280/744121]
loss: 4.317019  [257280/744121]
loss: 4.137877  [385280/744121]
loss: 4.086305  [513280/744121]
loss: 4.127110  [641280/744121]
Test Error: 
 Accuracy: 1.0%, Avg loss: 4.100077 

Epoch 2
-------------------------------
loss: 4.085429  [ 1280/744121]
loss: 4.158247  [129280/744121]
loss: 4.107521  [257280/744121]
loss: 4.067199  [385280/744121]
loss: 4.017262  [513280/744121]
loss: 4.048603  [641280/744121]
Test Error: 
 Accuracy: 2.8%, Avg loss: 3.987358 

Epoch 3
-------------------------------
loss: 3.967106  [ 1280/744121]
loss: 4.003461  [129280/744121]
loss: 3.889530  [257280/744121]
loss: 3.830221  [385280/744121]
loss: 3.732576  [513280/744121]
loss: 3.799212  [641280/744121]
Test Error: 
 Accuracy: 4.4%, Avg loss: 3.739028 

Epoch 4
-------------------------------
loss: 3.723816  [ 1280/744121]
loss: 3.792524  [129280/744121]
loss: 3.719739  [257280/744121]
loss: 3.705720  [38

In [122]:
print('losses', losses)
print('test_losses', test_losses)
print('accs', accs)

losses [4.281891352532246, 4.065095214909294, 3.8438163127276495, 3.687708723176386, 3.587566351972495, 3.457934481171808, 3.3195913341856493, 3.220066019759555, 3.1029259437547925, 2.9684141349956343, 2.8289376898729515, 2.7059300941290316, 2.6310529209084526, 2.5672429607496228, 2.502283347840981, 2.4452972207282415, 2.3849764124224686, 2.328687124235933, 2.284892273932388, 2.2351583557849897, 2.182858495163344, 2.1256918827283013, 2.0576167561344265, 2.0115473100409886, 1.9689460118202, 1.924617920954203, 1.870258873270959, 1.822097195587617, 1.7748977673012776, 1.720839798040816, 1.6776668376119686, 1.641430725141899, 1.5988564878394924, 1.5611950411010034, 1.5238676454193403, 1.4791595827263246, 1.4475888882306023, 1.409057339442145, 1.380136363694758, 1.3526473266562236, 1.3213375022321223, 1.297745835944959, 1.2701060827245418, 1.2479393560042495, 1.2182026284257161, 1.1987428767574613, 1.17598285144547, 1.1567707189784426, 1.1377410856923698, 1.1169076702234262]
test_losses [4.

In [123]:
sns.set()

plt.plot(range(len(accs)), accs)
plt.xlabel('Epochs')
plt.ylabel("Accuracy")
plt.savefig(f"../results/beer_init_nn_acc.png", bbox_inches="tight")
plt.clf()

plt.plot(range(len(losses)), losses, label="Training")
plt.plot(range(len(test_losses)), test_losses, label="Test")
plt.xlabel('Epochs')
plt.ylabel("Loss")
plt.savefig(f"../results/beer_init_nn_loss.png", bbox_inches="tight")
plt.legend(loc="upper left")
plt.clf()

<Figure size 640x480 with 0 Axes>

## Test model

In [54]:
def validate(dataloader, model):
    num_batches = len(dataloader)
    assert num_batches == 1
    model.eval()
    with torch.no_grad():
        for XX, yy in dataloader:
            XX, yy = XX.to(device), yy.to(device)
            pred = model(XX)
            print(classification_report(yy, pred.argmax(1)))

In [126]:
print(test(valid_dataloader, model, loss_fn))
validate(valid_dataloader, model)

Test Error: 
 Accuracy: 71.3%, Avg loss: 1.087334 

(1.0873336791992188, 0.7130123746836958)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.78      0.43      0.55      2528
           1       0.59      0.85      0.70     10196
           2       0.57      0.84      0.68     15084
           3       0.85      0.44      0.58      3100
           4       0.72      0.81      0.76      8778
           5       0.79      0.56      0.66      3821
           6       0.54      0.52      0.53      4123
           7       0.91      0.64      0.75      8397
           8       0.00      0.00      0.00       498
           9       0.84      0.91      0.87     28284
          10       0.67      0.76      0.72      1869
          11       0.83      0.86      0.84     16721
          12       0.87      0.93      0.90     38886
          13       0.87      0.72      0.79      1325
          14       0.72      0.82      0.77     20859
          15       0.56      0.17      0.26      3022
          16       0.46      0.80      0.58      7963
          17       0.88    

  _warn_prf(average, modifier, msg_start, len(result))


In [127]:
del model
del loss_fn
del optimizer

del train_dataloader
del test_dataloader

# Parameter Search

Parameter to test: Learning Rate, Batch Size, Layer Nodes, Activation Function, Dropout \
Activation Function: relu, sigmoid, linear

## Shrink Dataset

Use only 1% of the data for parameter testing.

In [13]:
_, X_train_bag_small, _, y_train_small = train_test_split(X_train_bag, y_train, test_size=0.01, random_state=42)

In [14]:
print(len(X_train_bag_small))
print(len(y_train_small))

10631
10631


In [15]:
print(X_train_bag_small.shape)
print(y_train_small)

(10631, 918)
         class
index         
1152728      9
1107736     17
546778      47
528381      14
90768       14
...        ...
1222870    103
660811       9
822156      14
840078       9
788215      25

[10631 rows x 1 columns]


In [16]:
X_ttrain_small, X_test_small, y_ttrain_small, y_test_small = train_test_split(X_train_bag_small.values, y_train_small.values, test_size=0.3, random_state=42)

In [17]:
y_ttrain_small = y_ttrain_small.reshape(y_ttrain_small.shape[0])
y_test_small = y_test_small.reshape(y_test_small.shape[0])

In [18]:
X_train_small_tensor = X_to_tensor(X_ttrain_small)
y_train_small_tensor = y_to_tensor(y_ttrain_small)

X_test_small_tensor = X_to_tensor(X_test_small)
y_test_small_tensor = y_to_tensor(y_test_small)

train_small_ds = TensorDataset(X_train_small_tensor, y_train_small_tensor)
test_small_ds = TensorDataset(X_test_small_tensor, y_test_small_tensor)

## Run Searches

In [19]:
%load_ext autoreload

In [20]:
%autoreload 2

In [21]:
from sklearn.metrics import f1_score

In [29]:
def acc_func(loc_pred, loc_y):
    # return (loc_pred.argmax(1) == loc_y).type(torch.float).sum().item()
    return f1_score(loc_y, loc_pred.argmax(1), average='weighted') * 100

In [23]:
from NNModel import NNModel

In [24]:
layer = [len(train_small_ds[0][0]), 250, 164, 164, 104]
nnmodel = NNModel(layer, device, acc_func=acc_func, loss_func=nn.CrossEntropyLoss)

## Grid Search

In [86]:
test_layer = [[len(train_small_ds[0][0]), 250, 164, 164, 104], [len(train_small_ds[0][0]), 25, 16, 16, 104], [len(train_small_ds[0][0]), 250, 164, 104]]
dict_param_1 = {"learning_rate": [0.001, 0.01, 0.05], "batch_size": [320, 640, 1280]}
best, acc = nnmodel.grid_search(dict_param_1, train_small_ds, test_small_ds, epochs=50)
print(best)

Parameter Combination (0.001, 320) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.8

Parameter Combination (0.001, 640) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.1

Parameter Combination (0.001, 1280) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.0

Parameter Combination (0.01, 320) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.6

Parameter Combination (0.01, 640) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.9

Parameter Combination (0.01, 1280) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.6

Early stopping at epoch: 40
Parameter Combination (0.05, 320) with keys ['learning_rate', 'batch_size']
 Accuracy: 3.2

Parameter Combination (0.05, 640) with keys ['learning_rate', 'batch_size']
 Accuracy: 1.7

Parameter Combination (0.05, 1280) with keys ['learning_rate', 'batch_size']
 Accuracy: 1.0

Grid search took 1.3 minutes.
{'learning_rate': 0.05, 'batch_size': 320}


In [87]:
nnmodel.defaults["learning_rate"] = best["learning_rate"]
nnmodel.defaults["batch_size"] = best["batch_size"]
dict_param_2 = {"activation": [nn.ReLU, nn.Sigmoid, nn.Identity], "dropout": [0, 0.2, 0.3, 0.5], "layer": test_layer}
best, acc = nnmodel.grid_search(dict_param_2, train_small_ds, test_small_ds, epochs=50)
print(best)

Early stopping at epoch: 48
Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0, [918, 250, 164, 164, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 4.3

Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0, [918, 25, 16, 16, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 2.4

Early stopping at epoch: 45
Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0, [918, 250, 164, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 8.3

Early stopping at epoch: 49
Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0.2, [918, 250, 164, 164, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 4.1

Early stopping at epoch: 44
Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0.2, [918, 25, 16, 16, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 2.7

Early stopping at epoch: 43
Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0.2, [918,

In [88]:
nnmodel.defaults["activation"] = best["activation"]
nnmodel.defaults["dropout"] = best["dropout"]
nnmodel.defaults["layer"] = best["layer"]

In [143]:
nnmodel.defaults = {'learning_rate': 0.05, 'batch_size': 320, 'layer': [918, 250, 164, 104], 'activation': nn.Identity, 'dropout': 0.2}

In [144]:
print(nnmodel.defaults)
acc = nnmodel.run(nnmodel.defaults, train_ds, test_ds, 100, out=True, name="beer_grid_res")
print(acc)

{'learning_rate': 0.05, 'batch_size': 320, 'layer': [918, 250, 164, 104], 'activation': <class 'torch.nn.modules.linear.Identity'>, 'dropout': 0.2}
Epoch 1
-------------------------------
loss: 4.659156  [  320/744121]
loss: 436.894220  [32320/744121]
loss: 855.936353  [64320/744121]
loss: 1264.658367  [96320/744121]
loss: 1666.793379  [128320/744121]
loss: 2061.762254  [160320/744121]
loss: 2452.023403  [192320/744121]
loss: 2835.468275  [224320/744121]
loss: 3214.234700  [256320/744121]
loss: 3588.991687  [288320/744121]
loss: 3956.158355  [320320/744121]
loss: 4316.220711  [352320/744121]
loss: 4672.891688  [384320/744121]
loss: 5021.162419  [416320/744121]
loss: 5367.060753  [448320/744121]
loss: 5706.399689  [480320/744121]
loss: 6042.238549  [512320/744121]
loss: 6374.064678  [544320/744121]
loss: 6702.711900  [576320/744121]
loss: 7026.497931  [608320/744121]
loss: 7348.562871  [640320/744121]
loss: 7665.578415  [672320/744121]
loss: 7982.876078  [704320/744121]
loss: 8295.89990

<Figure size 640x480 with 0 Axes>

In [145]:
acc = test(valid_dataloader, nnmodel.model, nnmodel.loss_fn)
print(acc)
validate(valid_dataloader, nnmodel.model)

Test Error: 
 Accuracy: 81.3%, Avg loss: 0.786533 

(0.786532998085022, 0.8127010633546179)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.97      0.73      0.83      2528
           1       0.81      0.78      0.80     10196
           2       0.91      0.77      0.83     15084
           3       0.95      0.63      0.76      3100
           4       0.72      0.90      0.80      8778
           5       0.73      0.82      0.77      3821
           6       0.78      0.61      0.69      4123
           7       0.77      0.84      0.80      8397
           8       0.83      0.15      0.26       498
           9       0.92      0.89      0.90     28284
          10       0.96      0.90      0.93      1869
          11       0.94      0.81      0.87     16721
          12       0.93      0.90      0.92     38886
          13       0.95      0.73      0.82      1325
          14       0.63      0.91      0.75     20859
          15       0.54      0.54      0.54      3022
          16       0.72      0.85      0.78      7963
          17       0.91    

  _warn_prf(average, modifier, msg_start, len(result))


In [146]:
grid_best = nnmodel.defaults

In [None]:
%%capture cap
nnmodel.defaults['learning_rate'] = 0.05
nnmodel.defaults['layer'] = [918, 250, 164, 104]
nnmodel.defaults['batch_size'] = 320
nnmodel.defaults['dropout'] = 0.2
nnmodel.defaults['activation'] = nn.Linear
nnmodel.acc_func = acc_func
cv_acc = nnmodel.run_cv(nnmodel.defaults, train_small_ds, test_small_ds, epochs=100, k_folds=5)
print(cv_acc)

In [None]:
with open('../results/beer_grid_search_res_cv.txt', 'w+') as f:
    f.write(cap.stdout)

## Local Search

In [128]:
nnmodel = NNModel(layer, device, acc_func=acc_func, loss_func=nn.CrossEntropyLoss)

In [129]:
init_param = {"learning_rate": grid_best["learning_rate"], "batch_size": grid_best["batch_size"]}
best, acc = nnmodel.local_search(init_param, train_small_ds, test_small_ds, steps=5, epochs=50)
print(best)

Early stopping at epoch: 48
Step 0
Best Params, Parameter Combination {'learning_rate': 0.05, 'batch_size': 320}
 Accuracy: 3.3

Parameter Combination (0.03928, 226) with keys ['learning_rate', 'batch_size']
 Accuracy: 5.4

Parameter Combination (0.03928, 393) with keys ['learning_rate', 'batch_size']
 Accuracy: 2.8

Early stopping at epoch: 31
Parameter Combination (0.06394, 226) with keys ['learning_rate', 'batch_size']
 Accuracy: 3.1

Parameter Combination (0.06394, 393) with keys ['learning_rate', 'batch_size']
 Accuracy: 4.0

Grid search took 0.9 minutes.
Step 1
Best Params, Parameter Combination {'learning_rate': 0.03928, 'batch_size': 226}
 Accuracy: 5.4

Early stopping at epoch: 42
Parameter Combination (0.02879, 166) with keys ['learning_rate', 'batch_size']
 Accuracy: 3.3

Parameter Combination (0.02879, 278) with keys ['learning_rate', 'batch_size']
 Accuracy: 3.8

Early stopping at epoch: 31
Parameter Combination (0.04604, 166) with keys ['learning_rate', 'batch_size']
 Acc

In [130]:
nnmodel.defaults["learning_rate"] = best["learning_rate"]
nnmodel.defaults["batch_size"] = best["batch_size"]
init_param = {"layer": grid_best["layer"], "dropout": grid_best["dropout"]}
best, acc = nnmodel.local_search(init_param, train_small_ds, test_small_ds, steps=5, epochs=50)

Step 0
Best Params, Parameter Combination {'layer': [918, 250, 164, 104], 'dropout': 0.2}
 Accuracy: 11.6

Early stopping at epoch: 43
Parameter Combination ([918, 202, 145, 104], 0.15407) with keys ['layer', 'dropout']
 Accuracy: 14.2

Parameter Combination ([918, 202, 145, 104], 0.25395) with keys ['layer', 'dropout']
 Accuracy: 9.8

Parameter Combination ([918, 309, 200, 104], 0.15407) with keys ['layer', 'dropout']
 Accuracy: 11.3

Parameter Combination ([918, 309, 200, 104], 0.25395) with keys ['layer', 'dropout']
 Accuracy: 15.2

Grid search took 1.0 minutes.
Step 1
Best Params, Parameter Combination {'layer': [918, 309, 200, 104], 'dropout': 0.25395}
 Accuracy: 15.2

Parameter Combination ([918, 259, 145, 104], 0.19604) with keys ['layer', 'dropout']
 Accuracy: 10.8

Early stopping at epoch: 28
Parameter Combination ([918, 259, 145, 104], 0.31174) with keys ['layer', 'dropout']
 Accuracy: 10.8

Parameter Combination ([918, 383, 244, 104], 0.19604) with keys ['layer', 'dropout']


In [133]:
nnmodel.defaults["dropout"] = best["dropout"]
nnmodel.defaults["layer"] = best["layer"]
nnmodel.defaults["activation"] = grid_best["activation"]

In [141]:
print(nnmodel.defaults)
acc = nnmodel.run(nnmodel.defaults, train_ds, test_ds, 100, out=True, name="beer_local_res")
print(acc)

{'learning_rate': 0.05916, 'batch_size': 220, 'layer': [918, 316, 196, 104], 'activation': <class 'torch.nn.modules.linear.Identity'>, 'dropout': 0.23525}
Epoch 1
-------------------------------
loss: 4.633439  [  220/744121]
loss: 434.442266  [22220/744121]
loss: 851.367460  [44220/744121]
loss: 1257.674482  [66220/744121]
loss: 1657.493434  [88220/744121]
loss: 2050.411250  [110220/744121]
loss: 2437.781193  [132220/744121]
loss: 2818.604261  [154220/744121]
loss: 3192.540667  [176220/744121]
loss: 3559.655570  [198220/744121]
loss: 3919.524061  [220220/744121]
loss: 4274.038711  [242220/744121]
loss: 4624.321822  [264220/744121]
loss: 4969.303653  [286220/744121]
loss: 5308.135767  [308220/744121]
loss: 5645.444393  [330220/744121]
loss: 5979.252656  [352220/744121]
loss: 6309.299515  [374220/744121]
loss: 6634.927290  [396220/744121]
loss: 6954.938415  [418220/744121]
loss: 7277.023932  [440220/744121]
loss: 7593.155709  [462220/744121]
loss: 7909.512070  [484220/744121]
loss: 8222

<Figure size 640x480 with 0 Axes>

In [142]:
acc = test(valid_dataloader, nnmodel.model, nnmodel.loss_fn)
print(acc)
validate(valid_dataloader, nnmodel.model)

Test Error: 
 Accuracy: 81.4%, Avg loss: 0.791916 

(0.791915774345398, 0.8142797309804871)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.87      0.80      0.83      2528
           1       0.54      0.92      0.68     10196
           2       0.92      0.72      0.81     15084
           3       0.75      0.78      0.77      3100
           4       0.83      0.87      0.85      8778
           5       0.83      0.79      0.81      3821
           6       0.83      0.63      0.71      4123
           7       0.87      0.79      0.83      8397
           8       0.39      0.50      0.44       498
           9       0.93      0.88      0.90     28284
          10       0.97      0.89      0.93      1869
          11       0.92      0.84      0.88     16721
          12       0.94      0.88      0.91     38886
          13       0.93      0.75      0.83      1325
          14       0.86      0.82      0.84     20859
          15       0.51      0.59      0.55      3022
          16       0.58      0.88      0.70      7963
          17       0.78    

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
%%capture cap
nnmodel.defaults['learning_rate'] = 0.05916
nnmodel.defaults['layer'] = [918, 316, 196, 104]
nnmodel.defaults['batch_size'] = 220
nnmodel.defaults['dropout'] = 0.23525
nnmodel.defaults['activation'] = nn.Linear
nnmodel.acc_func = acc_func
cv_acc = nnmodel.run_cv(nnmodel.defaults, train_small_ds, test_small_ds, epochs=100, k_folds=5)
print(cv_acc)

In [None]:
with open('../results/beer_local_search_res_cv.txt', 'w+') as f:
    f.write(cap.stdout)

## Cross Validation

### Grid Search

In [30]:
layer = [len(train_small_ds[0][0]), 250, 164, 164, 104]
nnmodel = NNModel(layer, device, acc_func=acc_func, loss_func=nn.CrossEntropyLoss)

In [33]:
test_layer = [[len(train_small_ds[0][0]), 250, 164, 164, 104], [len(train_small_ds[0][0]), 25, 16, 16, 104], [len(train_small_ds[0][0]), 250, 164, 104]]
dict_param_1 = {"learning_rate": [0.001, 0.01, 0.05], "batch_size": [320, 640, 1280]}
best, acc = nnmodel.grid_search(dict_param_1, train_small_ds, test_small_ds, epochs=50, cv=True, k_folds=3)
print(best)

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 1.18205

FOLD 1
--------------------------------
The actual fold accuracy is 0.59869

FOLD 2
--------------------------------
The actual fold accuracy is 0.94789

Parameter Combination (0.001, 320) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.90954

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 0.92391

FOLD 1
--------------------------------
The actual fold accuracy is 0.46196

FOLD 2
--------------------------------
The actual fold accuracy is 0.01358

Parameter Combination (0.001, 640) with keys ['learning_rate', 'batch_size']
 Accuracy: 0.46648

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 0.25185

FOLD 1
--------------------------------
The actual fold accuracy is 0.74325

FOLD 2
--------------------------------
The actual fold accuracy is 0.13113

Parameter Comb

In [35]:
nnmodel.defaults["learning_rate"] = best["learning_rate"]
nnmodel.defaults["batch_size"] = best["batch_size"]
dict_param_2 = {"activation": [nn.ReLU, nn.Sigmoid, nn.Identity], "dropout": [0, 0.2, 0.3, 0.5], "layer": test_layer}
best, acc = nnmodel.grid_search(dict_param_2, train_small_ds, test_small_ds, epochs=50, cv=True, k_folds=3)
print(best)

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 3.00168

FOLD 1
--------------------------------
The actual fold accuracy is 4.64105

FOLD 2
--------------------------------
The actual fold accuracy is 1.64407

Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0, [918, 250, 164, 164, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 3.09560

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 2.08413

FOLD 1
--------------------------------
The actual fold accuracy is 2.81337

FOLD 2
--------------------------------
The actual fold accuracy is 0.68734

Parameter Combination (<class 'torch.nn.modules.activation.ReLU'>, 0, [918, 25, 16, 16, 104]) with keys ['activation', 'dropout', 'layer']
 Accuracy: 1.86161

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 2.67620

FOLD 1
--------------------------------
T

In [36]:
nnmodel.defaults["activation"] = best["activation"]
nnmodel.defaults["dropout"] = best["dropout"]
nnmodel.defaults["layer"] = best["layer"]

In [41]:
print(nnmodel.defaults)
grid_best = nnmodel.defaults

{'learning_rate': 0.05, 'batch_size': 320, 'layer': [918, 250, 164, 104], 'activation': <class 'torch.nn.modules.linear.Identity'>, 'dropout': 0.2}


Same resulting parameter as normal splitting.

### Local Search

In [42]:
nnmodel = NNModel(layer, device, acc_func=acc_func, loss_func=nn.CrossEntropyLoss)

In [46]:
init_param = {"learning_rate": grid_best["learning_rate"], "batch_size": grid_best["batch_size"]}
best, acc = nnmodel.local_search(init_param, train_small_ds, test_small_ds, steps=5, epochs=50, cv=True, k_folds=3)
print(best)

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 3.54474

FOLD 1
--------------------------------
The actual fold accuracy is 3.93098

FOLD 2
--------------------------------
Early stopping at epoch: 15
The actual fold accuracy is 0.89006

Step 0
Best Params, Parameter Combination {'learning_rate': 0.05, 'batch_size': 320}
 Accuracy: 2.78859

--------------------------------
FOLD 0
--------------------------------
The actual fold accuracy is 2.24433

FOLD 1
--------------------------------
The actual fold accuracy is 3.47200

FOLD 2
--------------------------------
Early stopping at epoch: 13
The actual fold accuracy is 1.40718

Parameter Combination (0.04018, 257) with keys ['learning_rate', 'batch_size']
 Accuracy: 2.37450

--------------------------------
FOLD 0
--------------------------------
Early stopping at epoch: 17
The actual fold accuracy is 1.04107

FOLD 1
--------------------------------
Early stopping at epoch: 18
The ac

In [47]:
nnmodel.defaults["learning_rate"] = best["learning_rate"]
nnmodel.defaults["batch_size"] = best["batch_size"]
init_param = {"layer": grid_best["layer"], "dropout": grid_best["dropout"]}
best, acc = nnmodel.local_search(init_par6am, train_small_ds, test_small_ds, steps=5, epochs=50, cv=True, k_folds=3)

--------------------------------
FOLD 0
--------------------------------
Early stopping at epoch: 41
The actual fold accuracy is 10.48273

FOLD 1
--------------------------------
Early stopping at epoch: 34
The actual fold accuracy is 10.91913

FOLD 2
--------------------------------
Early stopping at epoch: 41
The actual fold accuracy is 9.73639

Step 0
Best Params, Parameter Combination {'layer': [918, 250, 164, 104], 'dropout': 0.2}
 Accuracy: 10.37942

--------------------------------
FOLD 0
--------------------------------
Early stopping at epoch: 37
The actual fold accuracy is 8.61152

FOLD 1
--------------------------------
Early stopping at epoch: 32
The actual fold accuracy is 6.60532

FOLD 2
--------------------------------
Early stopping at epoch: 34
The actual fold accuracy is 9.86726

Parameter Combination ([918, 197, 115, 104], 0.1602) with keys ['layer', 'dropout']
 Accuracy: 8.36136

--------------------------------
FOLD 0
--------------------------------
The actual fol

In [48]:
nnmodel.defaults["dropout"] = best["dropout"]
nnmodel.defaults["layer"] = best["layer"]
nnmodel.defaults["activation"] = grid_best["activation"]

In [51]:
print(nnmodel.defaults)
acc = nnmodel.run(nnmodel.defaults, train_ds, test_ds, 100, out=True, name="beer_local_cv_res")
print(acc)

{'learning_rate': 0.05197, 'batch_size': 196, 'layer': [918, 406, 238, 104], 'activation': <class 'torch.nn.modules.linear.Identity'>, 'dropout': 0.11181}
Epoch 1
-------------------------------
loss: 4.655267  [  196/744121]
loss: 428.313416  [19796/744121]
loss: 838.697327  [39396/744121]
loss: 1236.876504  [58996/744121]
loss: 1626.860841  [78596/744121]
loss: 2010.543652  [98196/744121]
loss: 2388.569301  [117796/744121]
loss: 2760.119589  [137396/744121]
loss: 3122.979672  [156996/744121]
loss: 3480.046861  [176596/744121]
loss: 3830.370941  [196196/744121]
loss: 4170.875550  [215796/744121]
loss: 4505.335046  [235396/744121]
loss: 4830.843104  [254996/744121]
loss: 5154.126434  [274596/744121]
loss: 5473.111553  [294196/744121]
loss: 5783.617405  [313796/744121]
loss: 6091.482241  [333396/744121]
loss: 6398.070493  [352996/744121]
loss: 6698.490442  [372596/744121]
loss: 6992.364316  [392196/744121]
loss: 7284.916224  [411796/744121]
loss: 7574.535567  [431396/744121]
loss: 7860.

<Figure size 640x480 with 0 Axes>

In [55]:
acc = test(valid_dataloader, nnmodel.model, nnmodel.loss_fn)
print(acc)
validate(valid_dataloader, nnmodel.model)

Test Error: 
 Accuracy: 82.6%, Avg loss: 0.690558 

(0.6905580759048462, 0.8260817822388964)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.85      0.85      0.85      2528
           1       0.68      0.88      0.77     10196
           2       0.78      0.85      0.82     15084
           3       0.91      0.68      0.78      3100
           4       0.83      0.87      0.85      8778
           5       0.76      0.81      0.79      3821
           6       0.76      0.77      0.77      4123
           7       0.98      0.71      0.82      8397
           8       0.85      0.35      0.49       498
           9       0.67      0.95      0.79     28284
          10       0.94      0.90      0.92      1869
          11       0.92      0.87      0.89     16721
          12       0.86      0.93      0.89     38886
          13       0.91      0.77      0.84      1325
          14       0.86      0.85      0.85     20859
          15       0.57      0.69      0.63      3022
          16       0.78      0.84      0.81      7963
          17       0.94    

  _warn_prf(average, modifier, msg_start, len(result))


# Random Forest

In [105]:
from sklearn.ensemble import RandomForestClassifier
import time

start = time.time()
rf = RandomForestClassifier(n_estimators=20, max_features=100, random_state=42)  

rf.fit(X_train_bag, y_train['class'])
end = time.time()
print(f"Random Forest took {round((end - start)/60, 1)} minutes.")
y_prediction = rf.predict(X_valid_bag)

Random Forest took 6.2 minutes.


In [106]:
from sklearn.metrics import f1_score, accuracy_score

accuracy = accuracy_score(y_valid, y_prediction)
print(f'Accuracy: {accuracy}')

f1 = f1_score(y_valid, y_prediction, average='weighted')
print(f'F1-Score: {f1}')

print(classification_report(y_valid, y_prediction))

Accuracy: 0.9619831048754448
F1-Score: 0.9619926208541042
              precision    recall  f1-score   support

           0       0.95      0.93      0.94      2528
           1       0.90      0.95      0.93     10196
           2       0.95      0.97      0.96     15084
           3       0.95      0.93      0.94      3100
           4       0.98      0.98      0.98      8778
           5       0.96      0.96      0.96      3821
           6       0.91      0.93      0.92      4123
           7       0.97      0.97      0.97      8397
           8       0.99      0.94      0.96       498
           9       0.98      0.99      0.98     28284
          10       0.99      0.98      0.98      1869
          11       0.98      0.98      0.98     16721
          12       0.98      0.99      0.98     38886
          13       0.99      0.97      0.98      1325
          14       0.96      0.97      0.96     20859
          15       0.91      0.92      0.91      3022
          16       0.96

In [None]:
from sklearn.model_selection import cross_validate

cv_results = cross_validate(rf, X_train_bag, y_train, scoring=('accuracy', 'f1_weighted'), return_train_score=False)

accuracy = np.mean(cv_results['accuracy'])
f1 = np.mean(cv_results['f1_weighted'])
