# Beer Prediction Full Model Process

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, LabelEncoder

import torch
import torch.nn as nn
import torch.nn.functional as F

## Data Transformations

In [2]:
#Solution
%load_ext autoreload
%autoreload 2

In [3]:
df_raw_beer = pd.read_csv('/wd/data/raw/beer_reviews.csv')

In [4]:
df_cleaned = df_raw_beer.copy()
df_cleaned.drop(['brewery_id',
                 'review_profilename', 
                 'review_time',
                 'beer_name',
                 'beer_beerid',
                 'review_overall', # temp exclude
                 'brewery_name' # not include in the model
                ], axis=1, inplace=True)
df_cleaned.head()

Unnamed: 0,review_aroma,review_appearance,beer_style,review_palate,review_taste,beer_abv
0,2.0,2.5,Hefeweizen,1.5,1.5,5.0
1,2.5,3.0,English Strong Ale,3.0,3.0,6.2
2,2.5,3.0,Foreign / Export Stout,3.0,3.0,6.5
3,3.0,3.5,German Pilsener,2.5,3.0,5.0
4,4.5,4.0,American Double / Imperial IPA,4.0,4.5,7.7


-- # Create a numeric version of the categorical features 

le = LabelEncoder()

cats_dict = dict(enumerate(df_cleaned.brewery_name.unique()))
df_cleaned['brewery_name'] = le.fit_transform(df_cleaned['brewery_name'])
df_cleaned

-- # Standardise the numeric features 

num_cols = ['brewery_name',
            'review_overall',
            'review_aroma',
            'review_appearance',
            'review_palate',
            'review_taste',
            'beer_abv']

sc = StandardScaler()
df_cleaned[num_cols] = sc.fit_transform(df_cleaned[num_cols])

df_cleaned.head()


In [5]:
# Standardise the numeric features 


num_cols = ['review_aroma',
            'review_appearance',
            'review_palate',
            'review_taste',
            'beer_abv']
sc = StandardScaler()
df_cleaned[num_cols] = sc.fit_transform(df_cleaned[num_cols])

df_cleaned.head()


Unnamed: 0,review_aroma,review_appearance,beer_style,review_palate,review_taste,beer_abv
0,-2.487952,-2.177663,Hefeweizen,-3.288833,-3.132454,-0.879382
1,-1.771225,-1.366096,English Strong Ale,-1.090123,-1.083188,-0.362703
2,-1.771225,-1.366096,Foreign / Export Stout,-1.090123,-1.083188,-0.233533
3,-1.054499,-0.55453,German Pilsener,-1.823026,-1.083188,-0.879382
4,1.095679,0.257037,American Double / Imperial IPA,0.375684,0.966078,0.283146


In [6]:
# Create a numeric (integer) version of the target variable 

cats_dict = dict(enumerate(df_cleaned.beer_style.unique()))
le = LabelEncoder()
df_cleaned['beer_style'] = le.fit_transform(df_cleaned['beer_style'])
df_cleaned

Unnamed: 0,review_aroma,review_appearance,beer_style,review_palate,review_taste,beer_abv
0,-2.487952,-2.177663,65,-3.288833,-3.132454,-0.879382
1,-1.771225,-1.366096,51,-1.090123,-1.083188,-0.362703
2,-1.771225,-1.366096,59,-1.090123,-1.083188,-0.233533
3,-1.054499,-0.554530,61,-1.823026,-1.083188,-0.879382
4,1.095679,0.257037,9,0.375684,0.966078,0.283146
...,...,...,...,...,...,...
1586609,0.378953,-0.554530,85,0.375684,0.282989,-0.793269
1586610,1.812405,-2.177663,85,-2.555929,0.282989,-0.793269
1586611,-0.337773,-1.366096,85,-0.357219,0.282989,-0.793269
1586612,1.095679,1.068603,85,1.108588,0.966078,-0.793269


## Split the data

In [7]:
from src.data.sets import split_sets_random, save_sets

# Split the data into training and testing sets with 80-20 ratio
X_train, y_train, X_val, y_val, X_test, y_test = split_sets_random(df_cleaned,
                                                                   target_col='beer_style',
                                                                   test_ratio=0.2)


In [None]:
!mkdir ../data/processed/beer_type

In [8]:
save_sets(X_train=X_train,
          y_train=y_train,
          X_val=X_val,
          y_val=y_val,
          X_test=X_test,
          y_test=y_test,
          path='/wd/data/processed/beer_type/'
         )



## Load Stored Data

In [9]:
# Import this class from src/models/pytorch and convert all sets to PytorchDataset

from src.models.pytorch import PytorchDataset

train_dataset = PytorchDataset(X=X_train, y=y_train)
val_dataset = PytorchDataset(X=X_val, y=y_val)
test_dataset = PytorchDataset(X=X_test, y=y_test)

## Baseline Model

In [10]:
# Import NullModel from src.models.null
from src.models.null import NullModel

baseline_model = NullModel(target_type='classification')
y_base = baseline_model.fit_predict(y_train)

  self.pred_value = mode(y)[0][0]


In [11]:
# Import print_class_perf from src.models.performance
from src.models.performance import print_class_perf

# Print the classification metrics for this baseline model
print_class_perf(y_base, y_train, set_name='Training', average='weighted')

Accuracy Training: 0.0742157299405022
F1 Training: 0.01025487603110527


## Training

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F