In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import SGD, Adadelta, Adam, RMSprop, Adagrad, Nadam, Adamax

SEED = 2017

Using TensorFlow backend.


In [6]:
data = pd.read_csv('winequality-red.csv', sep=";")

In [7]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [8]:
len(data)

1599

In [9]:
y = data['quality']
X = data.drop(['quality'], axis=1)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=SEED)

In [11]:
def create_model(opt): 
    model = Sequential()
    model.add(Dense(100, input_dim=X_train.shape[1],
    activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='linear'))
    return model

In [16]:
def create_callbacks(opt):
    callbacks = [
    EarlyStopping(monitor='val_acc', patience=200, verbose=2),
    ModelCheckpoint('my_graph/optimizers_best_' + opt + '.h5', monitor='val_acc', save_best_only=True, verbose=0)
    ]
    return callbacks

In [13]:
opts = dict({
    'sgd': SGD(),
     'sgd-0001': SGD(lr=0.0001, decay=0.00001),
     'adam': Adam(),
     'adadelta': Adadelta(),
     'rmsprop': RMSprop(),
     'rmsprop-0001': RMSprop(lr=0.0001),
     'nadam': Nadam(),
     'adamax': Adamax()
    })

In [14]:
batch_size = 128
n_epochs = 1000

results = []
# Loop through the optimizers

In [18]:
for opt in opts:
    model = create_model(opt)
    callbacks = create_callbacks(opt)
    model.compile(loss='mse', optimizer=opts[opt], metrics=['accuracy'])
    hist = model.fit(X_train.values, y_train, batch_size=batch_size, epochs=n_epochs, validation_data=(X_val.values, y_val), verbose=0,
    callbacks=callbacks)
    best_epoch = np.argmax(hist.history['val_acc'])
    best_acc = hist.history['val_acc'][best_epoch] 
    best_model = create_model(opt)
    
    # Load the model weights with the highest validation accuracy 
    best_model.load_weights('my_graph/optimizers_best_' + opt + '.h5')
    best_model.compile(loss='mse', optimizer=opts[opt], metrics=['accuracy'])
    score = best_model.evaluate(X_test.values, y_test, verbose=0)
    results.append([opt, best_epoch, best_acc, score[1]])

Epoch 00451: early stopping
Epoch 00382: early stopping
Epoch 00201: early stopping
Epoch 00538: early stopping
Epoch 00332: early stopping
Epoch 00334: early stopping
Epoch 00300: early stopping
Epoch 00711: early stopping


In [19]:
res = pd.DataFrame(results)
res.columns = ['optimizer', 'epochs', 'val_accuracy', 'test_accuracy']
res

Unnamed: 0,optimizer,epochs,val_accuracy,test_accuracy
0,nadam,250,0.589844,0.621875
1,adamax,181,0.570312,0.596875
2,sgd,0,0.0,0.0
3,adadelta,337,0.589844,0.559375
4,sgd-0001,131,0.554688,0.571875
5,rmsprop,133,0.570312,0.559375
6,adam,99,0.578125,0.6
7,rmsprop-0001,510,0.578125,0.596875
