## Wine Example -- Deep Learning in Keras
From tutorial: https://www.datacamp.com/community/tutorials/deep-learning-python

In [2]:
# load data
import pandas as pd
white = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=';')
red = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=';')

In [3]:
# data info
print(white.info())
print(red.info())
print(red.head())
print(white.tail())
print(red.sample(5))
print(white.describe())
pd.isnull(red)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4898 entries, 0 to 4897
Data columns (total 12 columns):
fixed acidity           4898 non-null float64
volatile acidity        4898 non-null float64
citric acid             4898 non-null float64
residual sugar          4898 non-null float64
chlorides               4898 non-null float64
free sulfur dioxide     4898 non-null float64
total sulfur dioxide    4898 non-null float64
density                 4898 non-null float64
pH                      4898 non-null float64
sulphates               4898 non-null float64
alcohol                 4898 non-null float64
quality                 4898 non-null int64
dtypes: float64(11), int64(1)
memory usage: 459.3 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
fixed acidity           1599 non-null float64
volatile acidity        1599 non-null float64
citric acid             1599 non-null float64
residual sugar          1599 non-null fl

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False


In [4]:
# add label and put two categories together
red['type'] = 1
white['type'] = 0
wines = red.append(white, ignore_index=True)
print(wines.shape)

(6497, 13)


In [24]:
# split data
from sklearn.model_selection import train_test_split
import numpy as np
X=wines.iloc[:,0:11]
y=np.ravel(wines.type)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(X_test.sample(5))

(4352, 11)
(2145, 11)
      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
5776            6.4              0.25         0.33             1.7      0.037   
6138            8.3              0.14         0.36             8.8      0.026   
584            11.8              0.33         0.49             3.4      0.093   
3708            7.7              0.23         0.31            10.7      0.038   
3518            7.3              0.22         0.50            13.7      0.049   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
5776                 35.0                 113.0  0.99164  3.23       0.66   
6138                 13.0                  60.0  0.99560  3.13       0.35   
584                  54.0                  80.0  1.00020  3.30       0.76   
3708                 59.0                 186.0  0.99690  3.12       0.55   
3518                 56.0                 189.0  0.99940  3.24       0.66   

      alcohol  
5776    10.6

In [25]:
# standardize data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print(X_train[1:3])

[[-0.30927999 -1.33696832 -0.13753351  1.55028118 -0.19436877  0.08731957
  -0.49259069  0.61770376 -0.11001502 -1.23015908 -0.48851915]
 [ 0.39305527  0.27154459  0.55442698 -0.72216633 -0.52292887 -0.14179476
   0.65444433 -1.34986716 -0.23518768 -1.43644168  2.02465702]]


In [26]:
# create model
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(12, activation='relu', input_shape=(11,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# view model info
model.output_shape
model.summary
model.get_config()
# model.get_weights()

[{'class_name': 'Dense',
  'config': {'activation': 'relu',
   'activity_regularizer': None,
   'batch_input_shape': (None, 11),
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'dtype': 'float32',
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'name': 'dense_19',
   'trainable': True,
   'units': 12,
   'use_bias': True}},
 {'class_name': 'Dense',
  'config': {'activation': 'relu',
   'activity_regularizer': None,
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_r

In [27]:
# compile and train
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(X_train, y_train,epochs=20, batch_size=1, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fbf5c52fb50>

In [28]:
# evaluate
score = model.evaluate(X_test, y_test,verbose=1)
print(score)

  32/2145 [..............................] - ETA: 0s[0.021252163234212931, 0.99533799533799538]


In [33]:
# save model architecture to json and weights to hdf5
from keras.models import model_from_json
model_json = model.to_json()
with open("model1.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("model1.h5")
print("Saved model to disk")

Saved model to disk


In [34]:
# save model architecture to yaml
from keras.models import model_from_yaml
model_yaml = model.to_yaml()
with open("model1.yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)

In [35]:
# save the whole model
model.save("whole_model.h5")

In [37]:
del model

In [39]:
from keras.models import load_model
model = load_model("whole_model.h5")

In [43]:
score = model.evaluate(X_test, y_test,verbose=1)
print(score)

  32/2145 [..............................] - ETA: 0s[0.021252163234212931, 0.99533799533799538]
