### First model

First model had a final soft max layer. This caused the model's accuracy to hover around 25-27%

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from joblib import dump
import category_encoders as ce

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
df = pd.read_csv("../data/raw/beer_reviews.csv")

In [3]:
df_clean = df.copy()
df_clean = df_clean.dropna(subset=['brewery_name'])

In [4]:
#Reset the index
df_clean.reset_index(drop=True, inplace=True)

In [5]:
#Replace na alcohol levels by the average alcohol level for the beer style
df_clean["beer_abv"]=df_clean.groupby("beer_style")["beer_abv"].transform(lambda x: x.fillna(x.mean()))

In [6]:
#pop target into another y list
target = df_clean['beer_style']

In [7]:
le = LabelEncoder()

In [8]:
fitted_target = le.fit_transform(target)

In [9]:
brewery_id_new = pd.DataFrame(df_clean['brewery_id'])

brewery_id_new['brewery_id_count']=brewery_id_new.groupby('brewery_id')['brewery_id'].transform('count')
brewery_id_new['id_new'] = brewery_id_new['brewery_id_count'].transform(lambda x: x if x > 100 else 0)
brewery_id_new['id_new'] = brewery_id_new.loc[brewery_id_new['id_new'] > 100, 'brewery_id'].fillna(0)


brewery_id_new.fillna(0, inplace=True)


In [10]:
ce_target = ce.TargetEncoder(cols = ['id_new'], min_samples_leaf=270, smoothing = 0.5) #was 100,250 before, smoothing was 5

In [11]:
X=pd.DataFrame(brewery_id_new['id_new'])

In [12]:
Y=pd.DataFrame(fitted_target)

In [13]:
ce_target.fit(X,Y)

TargetEncoder(cols=['id_new'], drop_invariant=False, handle_missing='value',
              handle_unknown='value', min_samples_leaf=270, return_df=True,
              smoothing=0.5, verbose=0)

In [14]:
encoded_brewery_id=ce_target.transform(X,Y)

In [15]:
brewery_id_new['encoded_brewery_id'] = encoded_brewery_id

In [16]:
df_clean['encoded_brewery_id'] = encoded_brewery_id

In [17]:
num_cols=['review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'encoded_brewery_id']

In [18]:
#Create matrix of X variables
X_analysis = df_clean[num_cols]

In [19]:
#All the features are now numerical. Scale the features.
sc = StandardScaler()
X_analysis=  sc.fit_transform(X_analysis)

In [20]:
#Split model into train, validation, and test dataset
X_train, X_test, y_train, y_test = train_test_split(X_analysis, fitted_target, test_size=0.2, random_state=8)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=8) 

In [21]:
from src.models.pytorch import PytorchDataset

train_dataset = PytorchDataset(X=X_train, y=y_train)
val_dataset = PytorchDataset(X=X_val, y=y_val)
test_dataset = PytorchDataset(X=X_test, y=y_test)

In [22]:
class PytorchMultiClass(nn.Module):
    def __init__(self, num_features):
        super(PytorchMultiClass, self).__init__()
        self.layer_1 = nn.Linear(num_features, 512)
        self.layer_2 = nn.Linear(512, 256)
        self.layer_3 = nn.Linear(256, 128)
        self.layer_out = nn.Linear(128, 104)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.tanh(self.layer_1(x))
        x = F.tanh(self.layer_2(x))
        x = F.tanh(self.layer_3(x))
        x = self.layer_out(x)
        return self.softmax(x)


In [23]:
model = PytorchMultiClass(X_train.shape[1])

In [24]:
from src.models.pytorch import get_device

device = get_device()
model.to(device)

PytorchMultiClass(
  (layer_1): Linear(in_features=6, out_features=512, bias=True)
  (layer_2): Linear(in_features=512, out_features=256, bias=True)
  (layer_3): Linear(in_features=256, out_features=128, bias=True)
  (layer_out): Linear(in_features=128, out_features=104, bias=True)
  (softmax): Softmax(dim=1)
)

In [25]:
criterion = nn.CrossEntropyLoss()

In [26]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [27]:
from src.models.pytorch import train_classification
from src.models.pytorch import test_classification

In [49]:
N_EPOCHS = 10
BATCH_SIZE = 1000

In [62]:
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train_classification(train_dataset, model=model, criterion=criterion, optimizer=optimizer, batch_size=BATCH_SIZE, device=device)
    valid_loss, valid_acc = test_classification(val_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)

    print(f'Epoch: {epoch}')
    print(f'\t(train)\t|\tLoss: {train_loss:.4f}\t|\tAcc: {train_acc * 100:.1f}%')
    print(f'\t(valid)\t|\tLoss: {valid_loss:.4f}\t|\tAcc: {valid_acc * 100:.1f}%')

Epoch: 0
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.5%
Epoch: 1
	(train)	|	Loss: 0.0044	|	Acc: 25.4%
	(valid)	|	Loss: 0.0044	|	Acc: 25.3%
Epoch: 2
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.4%
Epoch: 3
	(train)	|	Loss: 0.0044	|	Acc: 25.4%
	(valid)	|	Loss: 0.0044	|	Acc: 25.5%
Epoch: 4
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.4%
Epoch: 5
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.5%
Epoch: 6
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.5%
Epoch: 7
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.4%
Epoch: 8
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.3%
Epoch: 9
	(train)	|	Loss: 0.0044	|	Acc: 25.5%
	(valid)	|	Loss: 0.0044	|	Acc: 25.6%


In [63]:
#torch.save(model, "../models/beeroracle_final.pt")

In [64]:
test_loss, test_acc = test_classification(test_dataset, model=model, criterion=criterion, batch_size=BATCH_SIZE, device=device)
print(f'\tLoss: {test_loss:.4f}\t|\tAccuracy: {test_acc:.2f}')

	Loss: 0.0044	|	Accuracy: 0.26


It was re-run and accuracy increased to 27%

Epoch: 0
	(train)	|	Loss: 0.0044	|	Acc: 26.6%
	(valid)	|	Loss: 0.0044	|	Acc: 26.5%
Epoch: 1
	(train)	|	Loss: 0.0044	|	Acc: 26.6%
	(valid)	|	Loss: 0.0044	|	Acc: 26.7%
Epoch: 2
	(train)	|	Loss: 0.0044	|	Acc: 26.7%
	(valid)	|	Loss: 0.0044	|	Acc: 26.8%
Epoch: 3
	(train)	|	Loss: 0.0044	|	Acc: 26.7%
	(valid)	|	Loss: 0.0044	|	Acc: 26.9%
Epoch: 4
	(train)	|	Loss: 0.0044	|	Acc: 26.8%
	(valid)	|	Loss: 0.0044	|	Acc: 26.8%
Epoch: 5
	(train)	|	Loss: 0.0044	|	Acc: 26.7%
	(valid)	|	Loss: 0.0044	|	Acc: 26.8%
Epoch: 6
	(train)	|	Loss: 0.0044	|	Acc: 26.8%
	(valid)	|	Loss: 0.0044	|	Acc: 26.7%
Epoch: 7
	(train)	|	Loss: 0.0044	|	Acc: 26.7%
	(valid)	|	Loss: 0.0044	|	Acc: 26.7%
Epoch: 8
	(train)	|	Loss: 0.0044	|	Acc: 26.8%
	(valid)	|	Loss: 0.0044	|	Acc: 26.8%
Epoch: 9
	(train)	|	Loss: 0.0044	|	Acc: 26.8%
	(valid)	|	Loss: 0.0044	|	Acc: 26.8%