In [1]:
# Dependencies to Visualize the model
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Filepaths, numpy, and Tensorflow
import os
import numpy as np
import tensorflow as tf
import pandas as pd

In [3]:
# Sklearn scaling
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Loading and Preprocessing Data

In [5]:
#import data from Ethan
all_beer_df = pd.read_csv("data_add_3param_cluster.csv", encoding="latin1" )

#trim data to needed X colums
print(all_beer_df.columns)
beer_char = all_beer_df[["OG","FG","ABV","IBU","Color","BoilTime","Efficiency"]]

#Set beer_char as X 
X=beer_char
X.head()

#set y data
y=all_beer_df["clusters_7param"]
print(X.shape, y.shape)
y.head()

Index(['BeerID', 'Name', 'Style', 'StyleID', 'OG', 'FG', 'ABV', 'IBU', 'Color',
       'BoilSize', 'BoilTime', 'Efficiency', 'ViewCount', 'BrewCount',
       'LastUpdated', 'Category', 'clusters_7param', 'clusters_3param'],
      dtype='object')
(73861, 7) (73861,)


0    8
1    8
2    0
3    8
4    0
Name: clusters_7param, dtype: int64

In [6]:
#find the number of unique beer styles and set as variable

#create an array of unique values from the output dataset
style_array = pd.unique(y.values)

#set the count as the length of the output array
style_count = len(style_array)

print(style_count)

13


In [7]:
###Scale and pre-process the data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)


In [8]:
#check the categorical results
print(y_train_categorical[0])

[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


Create Deep Learning Model

In [9]:
#setup a sequential model
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
# Add the first layer where the input dimensions are the 5 inputs (don't have to specify batch size)
# We can also choose our activation function. `relu` is a common
model.add(Dense(units=100, activation='relu', input_dim=X_train.shape[1]))

#add a second hidden layer
model.add(Dense(units=100, activation='relu'))

#add a second hidden layer
model.add(Dense(units=100, activation='relu'))

#add a third hidden layer
model.add(Dense(units=100, activation='relu'))

#specify the output
model.add(Dense(units=style_count, activation='softmax'))

In [10]:
#compile and fit the model

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=25,
    shuffle=True,
    verbose=2
)

Epoch 1/25
 - 6s - loss: 0.1508 - acc: 0.9529
Epoch 2/25
 - 4s - loss: 0.0699 - acc: 0.9751
Epoch 3/25
 - 4s - loss: 0.0600 - acc: 0.9788
Epoch 4/25
 - 4s - loss: 0.0543 - acc: 0.9809
Epoch 5/25
 - 4s - loss: 0.0500 - acc: 0.9828
Epoch 6/25
 - 4s - loss: 0.0486 - acc: 0.9840
Epoch 7/25
 - 4s - loss: 0.0436 - acc: 0.9856
Epoch 8/25
 - 4s - loss: 0.0441 - acc: 0.9851
Epoch 9/25
 - 4s - loss: 0.0412 - acc: 0.9865
Epoch 10/25
 - 4s - loss: 0.0401 - acc: 0.9867
Epoch 11/25
 - 4s - loss: 0.0382 - acc: 0.9879
Epoch 12/25
 - 4s - loss: 0.0374 - acc: 0.9881
Epoch 13/25
 - 4s - loss: 0.0368 - acc: 0.9877
Epoch 14/25
 - 4s - loss: 0.0356 - acc: 0.9882
Epoch 15/25
 - 4s - loss: 0.0360 - acc: 0.9887
Epoch 16/25
 - 4s - loss: 0.0336 - acc: 0.9895
Epoch 17/25
 - 5s - loss: 0.0351 - acc: 0.9882
Epoch 18/25
 - 3s - loss: 0.0331 - acc: 0.9896
Epoch 19/25
 - 3s - loss: 0.0330 - acc: 0.9897
Epoch 20/25
 - 3s - loss: 0.0312 - acc: 0.9901
Epoch 21/25
 - 3s - loss: 0.0305 - acc: 0.9907
Epoch 22/25
 - 3s - lo

<keras.callbacks.History at 0x150e427eb70>

In [11]:
#Quantify the trained model
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

model.summary()

#save the model
model.save("beer_7p_13style.h5")

Normal Neural Network - Loss: 0.032368366613116185, Accuracy: 0.989764973464746
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               800       
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 13)                1313      
Total params: 32,413
Trainable params: 32,413
Non-trainable params: 0
_________________________________________________________________


In [12]:
X_test_scaled[:10]

array([[-0.1676544 , -0.15914084, -1.08693476,  0.35702107, -0.73255608,
        -1.00511989, -2.22600062],
       [-0.16628653, -0.1475736 , -1.24599745, -0.23998886,  0.10081883,
        -0.33834685,  0.82597032],
       [-0.14440069, -0.12675258,  1.44216215,  0.29870847,  2.19718635,
         0.66181272,  0.61304212],
       [-0.15579957, -0.14988705,  0.44536924,  6.58744445, -0.57405635,
         1.66197228, -0.45159891],
       [-0.16309485, -0.1475736 , -0.75820518, -0.48110683, -0.78844809,
        -0.33834685, -2.22600062],
       [-0.15807934, -0.1522005 ,  0.1484522 ,  0.43014321, -0.24120691,
        -0.33834685,  0.61304212],
       [-0.16172698, -0.1522005 , -0.39236097, -0.6324419 , -0.55320112,
        -0.33834685, -2.22600062],
       [-0.16309485, -0.1475736 , -0.69988219, -0.26405438, -0.60325367,
         0.3284262 ,  0.96792246],
       [-0.15762339, -0.14294671, -0.05302722, -0.29876426,  1.09603031,
        -0.33834685, -2.58088097],
       [-0.13026608, -0.1036

In [13]:
###TEST THE MODEL - THIS DOESN'T GO IN FINAL CODE
encoded_predictions = model.predict_classes(X_test_scaled[:10])

#decode the 
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

#print predicted vs actual
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")
### END SOLUTION

Predicted classes: [ 6  0  2  9  6 12  6  0 10  2]
Actual Labels: [6, 0, 2, 9, 6, 12, 6, 0, 10, 2]


In [14]:
###Call the saved model and run a prediction

# Load the model
from keras.models import load_model
model = load_model("beer_7p_13style.h5")

In [None]:
#tell it what to run based on user input - AJAX here?

input_beer = #call user input

input_beer_scaled = #how to scale our input?

In [None]:
#run a prediction
# Make predictions

styleID_guess = model.predict_classes(input_beer_scaled) #Instead, do we want to show the % chance of each?

styleID = #translate the style ID to a style name