In [1]:
# Dependencies to Visualize the model
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Filepaths, numpy, and Tensorflow
import os
import numpy as np
import tensorflow as tf
import pandas as pd

In [3]:
# Sklearn scaling
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Loading and Preprocessing Data

In [5]:
#import data from Ethan
all_beer_df = pd.read_csv("data_add_3param_cluster.csv", encoding="latin1" )

#trim data to needed X colums
print(all_beer_df.columns)
beer_char = all_beer_df[["OG","FG","ABV","IBU","Color","BoilTime","Efficiency"]]

#Set beer_char as X 
X=beer_char
X.head()

#set y data
y=all_beer_df["StyleID"]
print(X.shape, y.shape)
y.head()

Index(['BeerID', 'Name', 'Style', 'StyleID', 'OG', 'FG', 'ABV', 'IBU', 'Color',
       'BoilSize', 'BoilTime', 'Efficiency', 'ViewCount', 'BrewCount',
       'LastUpdated', 'Category', 'clusters_7param', 'clusters_3param'],
      dtype='object')
(73861, 7) (73861,)


0    45
1    45
2    45
3    45
4    45
Name: StyleID, dtype: int64

In [6]:
#find the number of unique beer styles and set as variable

#create an array of unique values from the output dataset
style_array = pd.unique(y.values)

#set the count as the length of the output array
style_count = len(style_array)

print(style_count)

176


In [7]:
###Scale and pre-process the data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)


In [8]:
#check the categorical results
print(y_train_categorical[0])

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


Create Deep Learning Model

In [9]:
#setup a sequential model
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
# Add the first layer where the input dimensions are the 5 inputs (don't have to specify batch size)
# We can also choose our activation function. `relu` is a common
model.add(Dense(units=300, activation='relu', input_dim=X_train.shape[1]))

#add a second hidden layer
model.add(Dense(units=300, activation='relu'))

#add a second hidden layer
model.add(Dense(units=300, activation='relu'))

#add a third hidden layer
model.add(Dense(units=300, activation='relu'))

#specify the output
model.add(Dense(units=style_count, activation='softmax'))

In [10]:
#compile and fit the model

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=600,
    shuffle=True,
    verbose=2
)

Epoch 1/600
 - 20s - loss: 2.9629 - acc: 0.2940
Epoch 2/600
 - 25s - loss: 2.7319 - acc: 0.3218
Epoch 3/600
 - 25s - loss: 2.6909 - acc: 0.3265
Epoch 4/600
 - 27s - loss: 2.6637 - acc: 0.3297
Epoch 5/600
 - 26s - loss: 2.6406 - acc: 0.3350
Epoch 6/600
 - 26s - loss: 2.6201 - acc: 0.3371
Epoch 7/600
 - 26s - loss: 2.6027 - acc: 0.3384
Epoch 8/600
 - 26s - loss: 2.5884 - acc: 0.3395
Epoch 9/600
 - 28s - loss: 2.5715 - acc: 0.3403
Epoch 10/600
 - 28s - loss: 2.5558 - acc: 0.3433
Epoch 11/600
 - 29s - loss: 2.5435 - acc: 0.3432
Epoch 12/600
 - 28s - loss: 2.5291 - acc: 0.3441
Epoch 13/600
 - 28s - loss: 2.5133 - acc: 0.3472
Epoch 14/600
 - 27s - loss: 2.5036 - acc: 0.3468
Epoch 15/600
 - 26s - loss: 2.4895 - acc: 0.3495
Epoch 16/600
 - 25s - loss: 2.4763 - acc: 0.3505
Epoch 17/600
 - 25s - loss: 2.4653 - acc: 0.3514
Epoch 18/600
 - 25s - loss: 2.4503 - acc: 0.3544
Epoch 19/600
 - 25s - loss: 2.4382 - acc: 0.3548
Epoch 20/600
 - 25s - loss: 2.4266 - acc: 0.3561
Epoch 21/600
 - 25s - loss: 2

 - 24s - loss: 1.5312 - acc: 0.5211
Epoch 168/600
 - 24s - loss: 1.5156 - acc: 0.5259
Epoch 169/600
 - 24s - loss: 1.5240 - acc: 0.5223
Epoch 170/600
 - 24s - loss: 1.5110 - acc: 0.5268
Epoch 171/600
 - 24s - loss: 1.5239 - acc: 0.5228
Epoch 172/600
 - 24s - loss: 1.5149 - acc: 0.5250
Epoch 173/600
 - 24s - loss: 1.5276 - acc: 0.5238
Epoch 174/600
 - 24s - loss: 1.5047 - acc: 0.5258
Epoch 175/600
 - 24s - loss: 1.5019 - acc: 0.5287
Epoch 176/600
 - 24s - loss: 1.5148 - acc: 0.5259
Epoch 177/600
 - 24s - loss: 1.4832 - acc: 0.5337
Epoch 178/600
 - 24s - loss: 1.5030 - acc: 0.5302
Epoch 179/600
 - 24s - loss: 1.4986 - acc: 0.5305
Epoch 180/600
 - 25s - loss: 1.5084 - acc: 0.5282
Epoch 181/600
 - 26s - loss: 1.5086 - acc: 0.5297
Epoch 182/600
 - 25s - loss: 1.4971 - acc: 0.5328
Epoch 183/600
 - 26s - loss: 1.4825 - acc: 0.5329
Epoch 184/600
 - 26s - loss: 1.4848 - acc: 0.5347
Epoch 185/600
 - 26s - loss: 1.4907 - acc: 0.5316
Epoch 186/600
 - 26s - loss: 1.5014 - acc: 0.5293
Epoch 187/600


 - 24s - loss: 1.3673 - acc: 0.5757
Epoch 332/600
 - 24s - loss: 1.4037 - acc: 0.5724
Epoch 333/600
 - 24s - loss: 1.3487 - acc: 0.5803
Epoch 334/600
 - 24s - loss: 1.3653 - acc: 0.5782
Epoch 335/600
 - 24s - loss: 1.4090 - acc: 0.5700
Epoch 336/600
 - 24s - loss: 1.3652 - acc: 0.5789
Epoch 337/600
 - 24s - loss: 1.3643 - acc: 0.5766
Epoch 338/600
 - 24s - loss: 1.3918 - acc: 0.5710
Epoch 339/600
 - 24s - loss: 1.3633 - acc: 0.5793
Epoch 340/600
 - 24s - loss: 1.3647 - acc: 0.5770
Epoch 341/600
 - 24s - loss: 1.3786 - acc: 0.5735
Epoch 342/600
 - 24s - loss: 1.3409 - acc: 0.5800
Epoch 343/600
 - 24s - loss: 1.3776 - acc: 0.5748
Epoch 344/600
 - 24s - loss: 1.3662 - acc: 0.5771
Epoch 345/600
 - 24s - loss: 1.3824 - acc: 0.5743
Epoch 346/600
 - 24s - loss: 1.3498 - acc: 0.5821
Epoch 347/600
 - 24s - loss: 1.3776 - acc: 0.5763
Epoch 348/600
 - 24s - loss: 1.3579 - acc: 0.5829
Epoch 349/600
 - 24s - loss: 1.3675 - acc: 0.5756
Epoch 350/600
 - 24s - loss: 1.3790 - acc: 0.5749
Epoch 351/600


 - 24s - loss: 1.3582 - acc: 0.5862
Epoch 496/600
 - 24s - loss: 1.3620 - acc: 0.5853
Epoch 497/600
 - 24s - loss: 1.3788 - acc: 0.5824
Epoch 498/600
 - 24s - loss: 1.4010 - acc: 0.5795
Epoch 499/600
 - 24s - loss: 1.4126 - acc: 0.5778
Epoch 500/600
 - 24s - loss: 1.4004 - acc: 0.5796
Epoch 501/600
 - 24s - loss: 1.3920 - acc: 0.5813
Epoch 502/600
 - 24s - loss: 1.4038 - acc: 0.5800
Epoch 503/600
 - 24s - loss: 1.3529 - acc: 0.5899
Epoch 504/600
 - 24s - loss: 1.3584 - acc: 0.5908
Epoch 505/600
 - 24s - loss: 1.3888 - acc: 0.5828
Epoch 506/600
 - 24s - loss: 1.3937 - acc: 0.5797
Epoch 507/600
 - 24s - loss: 1.3976 - acc: 0.5808
Epoch 508/600
 - 24s - loss: 1.3853 - acc: 0.5832
Epoch 509/600
 - 24s - loss: 1.3549 - acc: 0.5884
Epoch 510/600
 - 24s - loss: 1.4386 - acc: 0.5720
Epoch 511/600
 - 24s - loss: 1.3974 - acc: 0.5825
Epoch 512/600
 - 24s - loss: 1.4055 - acc: 0.5820
Epoch 513/600
 - 24s - loss: 1.3741 - acc: 0.5862
Epoch 514/600
 - 24s - loss: 1.3710 - acc: 0.5860
Epoch 515/600


<keras.callbacks.History at 0x217e7818358>

In [14]:
#Quantify the trained model
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

model.summary()

#save the model
model.save("beer_7p_all_styles.h5")

Normal Neural Network - Loss: 5.862837124965721, Accuracy: 0.259124878154446
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 300)               2400      
_________________________________________________________________
dense_2 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_3 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_4 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_5 (Dense)              (None, 176)               52976     
Total params: 326,276
Trainable params: 326,276
Non-trainable params: 0
_________________________________________________________________


In [15]:
X_test_scaled[:10]

array([[-0.15728519, -0.15349761,  0.19861203, -0.45382044, -0.12585185,
        -0.33940182, -0.45701855],
       [-0.15093001, -0.13477928,  0.64577732,  1.15854218,  0.83074592,
        -0.33940182,  0.25620315],
       [-0.15637731, -0.16285678,  0.70965808, -0.31960958,  1.30527867,
         1.64418508,  0.25620315],
       [-0.15773914, -0.13009969, -0.52004648, -0.39656325,  0.4348835 ,
        -0.33940182,  0.3988475 ],
       [-0.16364038, -0.1558374 , -0.67442497,  0.34457389, -0.86317699,
        -0.33940182, -1.17024026],
       [-0.14775242, -0.13711907,  1.21538073,  0.35946076, -0.21791288,
         1.64418508,  0.61281401],
       [-0.1541076 , -0.14647824,  0.49672223, -0.2999131 , -0.59368925,
         1.64418508,  1.18339137],
       [-0.1627325 , -0.14647824, -0.80750988, -0.14165421,  1.18308858,
        -0.33940182, -0.1004077 ],
       [-0.15592337, -0.13945886, -0.03561741, -0.41419846, -0.54431107,
        -0.33940182,  0.25620315],
       [-0.15683125, -0.1417

In [16]:
###TEST THE MODEL - THIS DOESN'T GO IN FINAL CODE
encoded_predictions = model.predict_classes(X_test_scaled[:10])

#decode the 
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

#print predicted vs actual
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")
### END SOLUTION

Predicted classes: [102  13  19  92  13  63   7  39 124 146]
Actual Labels: [4, 67, 134, 92, 7, 147, 10, 6, 7, 169]


In [17]:
###Call the saved model and run a prediction

# Load the model
from keras.models import load_model
model = load_model("beer_7p_all_styles.h5")

In [None]:
#tell it what to run based on user input - AJAX here?

input_beer = #call user input

input_beer_scaled = #how to scale our input?

In [None]:
#run a prediction
# Make predictions

styleID_guess = model.predict_classes(input_beer_scaled) #Instead, do we want to show the % chance of each?

styleID = #translate the style ID to a style name