### This notebook focus at first explaining basic layers used in Keras , its' few key parameters, then it's applicability to regression problem on online news popularity dataset downloaded from UCI data repo

http://archive.ics.uci.edu/ml/machine-learning-databases/wine/

Abstract: Using chemical analysis determine the origin of wines

In [16]:
import keras
from keras.layers import Input, Embedding, Flatten, Dot, Reshape, Concatenate, Dense, Activation, Dropout
from keras.models import load_model,Sequential
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam,RMSprop,SGD
from keras.datasets import mnist
from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from keras import regularizers
from keras.models import model_from_yaml, save_model
from keras.utils import np_utils
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse
import random
import pickle
import os
import warnings
from sklearn import preprocessing, svm
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from IPython.display import SVG
warnings.filterwarnings('ignore')

In [2]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
project='Wine-source'
datapath = os.path.join('D:','\Learning','General','data',project)
data = pd.read_csv(os.path.join(datapath,'wine.data'),header=None)
data.columns = [  'name'
                 ,'alcohol',
                'malicAcid',
                'ash'
                ,'ashalcalinity'
                ,'magnesium'
                ,'totalPhenols'
                ,'flavanoids'
                ,'nonFlavanoidPhenols'
                ,'proanthocyanins'
                ,'colorIntensity'
                ,'hue'
                ,'od280_od315'
                ,'proline'
                ]
print(data.head())

   name  alcohol  malicAcid   ash  ashalcalinity  magnesium  totalPhenols  \
0     1    14.23       1.71  2.43           15.6        127          2.80   
1     1    13.20       1.78  2.14           11.2        100          2.65   
2     1    13.16       2.36  2.67           18.6        101          2.80   
3     1    14.37       1.95  2.50           16.8        113          3.85   
4     1    13.24       2.59  2.87           21.0        118          2.80   

   flavanoids  nonFlavanoidPhenols  proanthocyanins  colorIntensity   hue  \
0        3.06                 0.28             2.29            5.64  1.04   
1        2.76                 0.26             1.28            4.38  1.05   
2        3.24                 0.30             2.81            5.68  1.03   
3        3.49                 0.24             2.18            7.80  0.86   
4        2.69                 0.39             1.82            4.32  1.04   

   od280_od315  proline  
0         3.92     1065  
1         3.40     105

In [4]:
# Let's take a small sample for quick run
# data=data.sample(frac=0.5)
X = data.loc[:, data.columns != 'name']
y = data['name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [47]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_Y = encoder.transform(y_train)
# convert integers to one hot encoded
one_hot_y = np_utils.to_categorical(encoded_Y)

# encoder.fit(y_test)
# encoded_Ytest = encoder.transform(y_test)
# # convert integers to one hot encoded
# one_hot_ytest = np_utils.to_categorical(encoded_Ytest)

## Specifying Input shape : 
First layer need to tell about the input shape , following layers can do the automatic shape inference
Important layers to consider are 
a) Dense implements the operation: output = activation(dot(input, kernel) + bias) where activation is the element-wise activation function passed as the activation argument, kernel is a weights matrix created by the layer, and bias is a bias vector created by the layer (only applicable if use_bias is True).
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use. Nothing specified, no activation is applied (ie. "linear" activation: a(x) = x).
    use_bias: Boolean, whether the layer uses a bias vector
     & many more parameters

You can define model architecture in 2 ways :
1)

In [None]:
# Keras input layers 
## continuous variable input
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=100))
# now the model will take as input arrays of shape (*, 100)
# and output arrays of shape (*, 64)
model.add(Dense(units=10, activation='softmax'))

2) 

In [None]:
model = Sequential([Dense(64, input_shape=(100,)),
                    Activation('relu'),
                    Dense(10),
                    Activation('softmax'),
                    ])

Both model arch 1 and 2 are same.

## Regularizer
##### Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes.
##### L1 /L2 and also dropout

In [None]:
# L1 /L2 example
model.add(Dense(64, input_dim=64,kernel_regularizer=regularizers.l2(0.01),activity_regularizer=regularizers.l1(0.01)))
# Dropout example 
model.add(Dropout(rate=0.1, noise_shape=None, seed=3))
# where rate is dropout rate

## Compilation

### In compilation various loss functions, optimizer and metrics can be defined . Few of them are
##### Optimizers : sgd , rmsprop, adam, adagrad, adadelta, adamax, nadam
##### Loss: categorical_crossentropy for muti class problem
#####            mean_squared_error, mean_absolute_error, mean_absolute_percentage_error,mean_squared_logarithmic_error for continous variable
#####            binary_crossentropy for binary classification

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

## Training

In [None]:
# x_train and y_train are Numpy arrays --just like in the Scikit-Learn API.
model.fit(X_train, y_train, epochs=5, batch_size=32)
# if you want to train on batch input
model.train_on_batch(x_batch, y_batch)

## Evaluate on test dataset

In [None]:
loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)

## predict on test dataset

In [None]:
classes = model.predict(x_test, batch_size=128)

# Application on online news popularity dataset (Regression problem)

### Baseline model with first layer being input and output same dimension

In [51]:
# fix random seed for reproducibility
seed = 50
np.random.seed(seed)

In [52]:
# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
    return model

In [53]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1)

In [54]:

kfold = KFold(n_splits=3, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X_train, one_hot_y, cv=kfold)
print("Results: %.2f (%.2f) Accuracy" % (results.mean(), results.std()))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Results: 0.35 (0.06) Accuracy


# Standardize dataset

In [55]:
# # evaluate model with standardized dataset
# estimators = []
# # estimators.append(('standardize', StandardScaler()))
# estimators.append(('classify', KerasRegressor(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1)))
# pipeline = Pipeline(estimators)
# kfold = KFold(n_splits=3,shuffle=True, random_state=seed)
# results = cross_val_score(pipeline, X_train, one_hot_y, cv=kfold)
# print("Standardized: %.2f (%.2f) Accuracy" % (results.mean(), results.std()))

## Try deeper model with 4 layers

In [61]:
# define the model
def deeper_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13,activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(6, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(3,activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
    return model

In [63]:
np.random.seed(seed)
estimators = []
# estimators.append(('standardize', StandardScaler()))
estimators.append(('classify', KerasRegressor(build_fn=deeper_model, epochs=5, batch_size=20, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=3,shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X_train, one_hot_y, cv=kfold)
print(results)
print("Deeper model: %.2f (%.2f) Accuracy" % (results.mean(), results.std()))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[-11.41698416 -11.31696063  -5.19003553]
Deeper model: -9.31 (2.91) Accuracy


## predict on test dataset

In [64]:
estimator.fit(X_train, one_hot_y)
prediction = estimator.predict(X_test)
print(prediction)
print(encoder.inverse_transform(prediction))
print(mean_absolute_percentage_error(y_test, prediction))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
[3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
36.11111111111111


## Save your NN model in YAML

In [34]:
# save model to yaml
model_yaml = estimator.model.to_yaml()
with open(os.path.join(datapath,"output","model.yaml"), "w") as yaml_file:
    yaml_file.write(model_yaml)
# save weights to HDF5
estimator.model.save_weights(os.path.join(datapath,"output","model.h5"))
print("Saved model to directory")

Saved model to directory


 
## Load YAML and compile model

In [38]:
yaml_file = open(os.path.join(datapath,"output","model.yaml"), 'r')
loaded_model_yaml = yaml_file.read()
yaml_file.close()
loaded_model = model_from_yaml(loaded_model_yaml)
# load weights into new model
loaded_model.load_weights(os.path.join(datapath,"output","model.h5"))
print("Loaded model from disk")

Loaded model from disk


## Evaluate loaded model on test data

In [42]:
loaded_model.compile(loss='mean_absolute_percentage_error', optimizer='adam', metrics=['mean_absolute_percentage_error'])
score = loaded_model.evaluate(X_test, y_test, verbose=1)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]))

mean_absolute_percentage_error: 66.21%
