In [1]:
# Dependencies to Visualize the model
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Filepaths, numpy, and Tensorflow
import os
import numpy as np
import tensorflow as tf
import pandas as pd

In [3]:
# Sklearn scaling
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Loading and Preprocessing Data

In [5]:
#import data from Ethan
all_beer_df = pd.read_csv("data_add_3param_cluster.csv", encoding="latin1" )

#trim data to needed X colums
print(all_beer_df.columns)
beer_char = all_beer_df[["ABV","IBU","Color"]]

#Set beer_char as X 
X=beer_char
X.head()

#set y data
y=all_beer_df["clusters_3param"]
print(X.shape, y.shape)
y.head()

Index(['BeerID', 'Name', 'Style', 'StyleID', 'OG', 'FG', 'ABV', 'IBU', 'Color',
       'BoilSize', 'BoilTime', 'Efficiency', 'ViewCount', 'BrewCount',
       'LastUpdated', 'Category', 'clusters_7param', 'clusters_3param'],
      dtype='object')
(73861, 3) (73861,)


0    11
1    11
2    11
3     1
4     1
Name: clusters_3param, dtype: int64

In [6]:
#find the number of unique beer styles and set as variable

#create an array of unique values from the output dataset
style_array = pd.unique(y.values)

#set the count as the length of the output array
style_count = len(style_array)

print(style_count)

13


In [7]:
###Scale and pre-process the data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)


In [8]:
#check the categorical results
print(y_train_categorical[0])

[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


Create Deep Learning Model

In [9]:
#setup a sequential model
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
# Add the first layer where the input dimensions are the 5 inputs (don't have to specify batch size)
# We can also choose our activation function. `relu` is a common
model.add(Dense(units=100, activation='relu', input_dim=X_train.shape[1]))

#add a second hidden layer
model.add(Dense(units=100, activation='relu'))

#add a second hidden layer
model.add(Dense(units=100, activation='relu'))

#add a third hidden layer
model.add(Dense(units=100, activation='relu'))

#specify the output
model.add(Dense(units=style_count, activation='softmax'))

In [10]:
#compile and fit the model

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=25,
    shuffle=True,
    verbose=2
)

Epoch 1/25
 - 3s - loss: 0.2098 - acc: 0.9297
Epoch 2/25
 - 3s - loss: 0.1006 - acc: 0.9625
Epoch 3/25
 - 3s - loss: 0.0859 - acc: 0.9686
Epoch 4/25
 - 3s - loss: 0.0811 - acc: 0.9707
Epoch 5/25
 - 3s - loss: 0.0759 - acc: 0.9724
Epoch 6/25
 - 3s - loss: 0.0708 - acc: 0.9745
Epoch 7/25
 - 3s - loss: 0.0711 - acc: 0.9743
Epoch 8/25
 - 3s - loss: 0.0660 - acc: 0.9765
Epoch 9/25
 - 4s - loss: 0.0639 - acc: 0.9767
Epoch 10/25
 - 4s - loss: 0.0614 - acc: 0.9784
Epoch 11/25
 - 4s - loss: 0.0595 - acc: 0.9793
Epoch 12/25
 - 4s - loss: 0.0585 - acc: 0.9791
Epoch 13/25
 - 4s - loss: 0.0568 - acc: 0.9800
Epoch 14/25
 - 4s - loss: 0.0548 - acc: 0.9807
Epoch 15/25
 - 4s - loss: 0.0563 - acc: 0.9806
Epoch 16/25
 - 4s - loss: 0.0543 - acc: 0.9809
Epoch 17/25
 - 4s - loss: 0.0525 - acc: 0.9816
Epoch 18/25
 - 4s - loss: 0.0502 - acc: 0.9830
Epoch 19/25
 - 4s - loss: 0.0510 - acc: 0.9826
Epoch 20/25
 - 4s - loss: 0.0493 - acc: 0.9835
Epoch 21/25
 - 4s - loss: 0.0490 - acc: 0.9835
Epoch 22/25
 - 4s - lo

<keras.callbacks.History at 0x271ad16e518>

In [11]:
#Quantify the trained model
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

model.summary()

#save the model
model.save("beer_3p_13style.h5")

Normal Neural Network - Loss: 0.06291205536803451, Accuracy: 0.9785010289180115
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               400       
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 13)                1313      
Total params: 32,013
Trainable params: 32,013
Non-trainable params: 0
_________________________________________________________________


In [12]:
X_test_scaled[:10]

array([[ 0.65466273, -0.4406149 ,  2.0653341 ],
       [ 0.03770917,  0.6346362 , -0.33022251],
       [-0.4421436 , -0.02460475, -0.04509692],
       [-0.52651332, -0.77393119,  0.84623409],
       [-0.16266891,  0.49073041, -0.04342463],
       [-0.27340417,  0.94416068, -0.42721538],
       [-0.50542089, -0.11399727, -0.23072121],
       [-0.37359321,  0.55101839,  0.63636159],
       [ 0.47010397,  0.17173541,  0.10791769],
       [ 0.05352849, -1.0250156 ,  0.13049361]])

In [13]:
###TEST THE MODEL - THIS DOESN'T GO IN FINAL CODE
encoded_predictions = model.predict_classes(X_test_scaled[:10])

#decode the 
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

#print predicted vs actual
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")
### END SOLUTION

Predicted classes: [ 3  5  9  9  5  5 11  9  5  9]
Actual Labels: [3, 5, 9, 9, 5, 5, 11, 9, 5, 9]


In [14]:
###Call the saved model and run a prediction

# Load the model
from keras.models import load_model
model = load_model("beer_3p_13style.h5")

In [None]:
#tell it what to run based on user input - AJAX here?

input_beer = #call user input

input_beer_scaled = #how to scale our input?

In [None]:
#run a prediction
# Make predictions

styleID_guess = model.predict_classes(input_beer_scaled) #Instead, do we want to show the % chance of each?

styleID = #translate the style ID to a style name