In [43]:
# Dependencies to Visualize the model
import matplotlib.pyplot as plt
%matplotlib inline

In [44]:
# Filepaths, numpy, and Tensorflow
import os
import numpy as np
import tensorflow as tf
import pandas as pd

In [45]:
# Sklearn scaling
from sklearn.preprocessing import MinMaxScaler

In [46]:
# Keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

# Loading and Preprocessing Data

In [47]:
#import data from Ethan
all_beer_df = pd.read_csv("data/data_add_3param_cluster.csv", encoding="latin1" )

#trim data to needed X colums
print(all_beer_df.columns)
beer_char = all_beer_df[["OG","FG","ABV","IBU","Color","BoilTime","Efficiency"]]

#Set beer_char as X 
X=beer_char
X.head()

#set y data
y=all_beer_df["clusters_7param"]
print(X.shape, y.shape)

Index(['BeerID', 'Name', 'Style', 'StyleID', 'OG', 'FG', 'ABV', 'IBU', 'Color',
       'BoilSize', 'BoilTime', 'Efficiency', 'ViewCount', 'BrewCount',
       'LastUpdated', 'Category', 'clusters_7param', 'clusters_3param'],
      dtype='object')
(73861, 7) (73861,)


In [48]:
#find the number of unique beer styles and set as variable

#create an array of unique values from the output dataset
style_array = pd.unique(y.values)

#set the count as the length of the output array
style_count = len(style_array)

print(style_count)

13


In [49]:
###Scale and pre-process the data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)


In [50]:
#check the categorical results
print(y_train_categorical[0])

[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


Create Deep Learning Model

In [51]:
# setup a sequential model
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
# Add the first layer where the input dimensions are the 7 inputs
# We can also choose our activation function
model.add(Dense(units=100, activation='relu', input_dim=X_train.shape[1]))

# add a second hidden layer
model.add(Dense(units=100, activation='relu'))

# add a second hidden layer
model.add(Dense(units=100, activation='relu'))

# add a third hidden layer
model.add(Dense(units=100, activation='relu'))

# specify the output
model.add(Dense(units=style_count, activation='softmax'))

In [52]:
# compile and fit the model

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.fit(
    X_train,
    y_train_categorical,
    epochs=30,
    shuffle=True,
    verbose=2
)

Epoch 1/30
 - 3s - loss: 0.4356 - acc: 0.8510
Epoch 2/30
 - 3s - loss: 0.2218 - acc: 0.9182
Epoch 3/30
 - 3s - loss: 0.1739 - acc: 0.9335
Epoch 4/30
 - 3s - loss: 0.1562 - acc: 0.9406
Epoch 5/30
 - 3s - loss: 0.1406 - acc: 0.9454
Epoch 6/30
 - 3s - loss: 0.1268 - acc: 0.9519
Epoch 7/30
 - 3s - loss: 0.1232 - acc: 0.9528
Epoch 8/30
 - 3s - loss: 0.1143 - acc: 0.9563
Epoch 9/30
 - 3s - loss: 0.1103 - acc: 0.9592
Epoch 10/30
 - 3s - loss: 0.1088 - acc: 0.9587
Epoch 11/30
 - 3s - loss: 0.1029 - acc: 0.9610
Epoch 12/30
 - 3s - loss: 0.1013 - acc: 0.9619
Epoch 13/30
 - 3s - loss: 0.0962 - acc: 0.9639
Epoch 14/30
 - 3s - loss: 0.0930 - acc: 0.9657
Epoch 15/30
 - 3s - loss: 0.0928 - acc: 0.9656
Epoch 16/30
 - 3s - loss: 0.0918 - acc: 0.9654
Epoch 17/30
 - 3s - loss: 0.0888 - acc: 0.9668
Epoch 18/30
 - 3s - loss: 0.0879 - acc: 0.9673
Epoch 19/30
 - 3s - loss: 0.0859 - acc: 0.9679
Epoch 20/30
 - 3s - loss: 0.0822 - acc: 0.9690
Epoch 21/30
 - 3s - loss: 0.0852 - acc: 0.9682
Epoch 22/30
 - 3s - lo

<keras.callbacks.History at 0x29d4da62710>

In [53]:
# quantify the trained model
model_loss, model_accuracy = model.evaluate(X_test, y_test_categorical, verbose=2) 
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

model.summary()

# save the model
model.save("models/beer_13style_7p_unscaled.h5")

Normal Neural Network - Loss: 0.06435701272199829, Accuracy: 0.978013646702047
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 100)               800       
_________________________________________________________________
dense_12 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_13 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_14 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_15 (Dense)             (None, 13)                1313      
Total params: 32,413
Trainable params: 32,413
Non-trainable params: 0
_________________________________________________________________


In [54]:
testing = X_test[:10].values.tolist()
testing

[[1.038, 1.007, 4.09, 59.71, 4.62, 50.0, 35.0],
 [1.041, 1.012, 3.79, 33.91, 14.61, 60.0, 78.0],
 [1.089, 1.021, 8.86, 57.19, 39.74, 75.0, 75.0],
 [1.064, 1.011, 6.98, 328.96, 6.52, 90.0, 60.0],
 [1.048, 1.012, 4.71, 23.49, 3.95, 60.0, 35.0],
 [1.0590000000000002, 1.01, 6.42, 62.87, 10.51, 60.0, 75.0],
 [1.051, 1.01, 5.4, 16.95, 6.77, 60.0, 35.0],
 [1.048, 1.012, 4.82, 32.87, 6.17, 70.0, 80.0],
 [1.06, 1.014, 6.04, 31.37, 26.54, 60.0, 30.0],
 [1.12, 1.031, 11.74, 70.04, 49.25, 90.0, 70.0]]

In [55]:
# re-test for visual relation
encoded_predictions = model.predict_classes(np.array(testing))

#decode the 
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

#print predicted vs actual
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")
### END SOLUTION

Predicted classes: [ 6  0  2  9  6 12  6  0 10  2]
Actual Labels: [6, 0, 2, 9, 6, 12, 6, 0, 10, 2]
