### This notebook focus at first explaining basic layers used in Keras , its' few key parameters, then it's applicability to regression problem on online news popularity dataset downloaded from UCI data repo

In [1]:
import keras
from keras.layers import Input, Embedding, Flatten, Dot, Reshape, Concatenate, Dense, Activation, Dropout
from keras.models import load_model,Sequential
from keras.callbacks import ModelCheckpoint
from IPython.display import SVG
from keras.optimizers import Adam,RMSprop,SGD
from keras.datasets import mnist
from keras.wrappers.scikit_learn import KerasRegressor
from keras import regularizers
from keras.models import model_from_yaml, save_model
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse
import random
import pickle
import os
import warnings
from sklearn import preprocessing, svm
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
warnings.filterwarnings('ignore')
from sklearn.utils import check_array

Using TensorFlow backend.


In [26]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = check_array(y_true, y_pred)
    ## Note: does not handle mix 1d representation
    if _is_1d(y_true): 
        y_true, y_pred = _check_1d_array(y_true, y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [2]:
project='OnlineNewsPopularity'
datapath=os.path.join('D:','\Learning','General','data',project)
data = pd.read_csv(os.path.join(datapath,'OnlineNewsPopularity.csv'))

In [3]:
# Let's take a small sample for quick run
data=data.sample(frac=0.5)
X = data.select_dtypes(include=np.number)
y = data[' shares']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Specifying Input shape : 
First layer need to tell about the input shape , following layers can do the automatic shape inference
Important layers to consider are 
a) Dense implements the operation: output = activation(dot(input, kernel) + bias) where activation is the element-wise activation function passed as the activation argument, kernel is a weights matrix created by the layer, and bias is a bias vector created by the layer (only applicable if use_bias is True).
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use. Nothing specified, no activation is applied (ie. "linear" activation: a(x) = x).
    use_bias: Boolean, whether the layer uses a bias vector
     & many more parameters

You can define model architecture in 2 ways :
1)

In [None]:
# Keras input layers 
## continuous variable input
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=100))
# now the model will take as input arrays of shape (*, 100)
# and output arrays of shape (*, 64)
model.add(Dense(units=10, activation='softmax'))

2) 

In [None]:
model = Sequential([Dense(64, input_shape=(100,)),
                    Activation('relu'),
                    Dense(10),
                    Activation('softmax'),
                    ])

Both model arch 1 and 2 are same.

## Regularizer
##### Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes.
##### L1 /L2 and also dropout

In [None]:
# L1 /L2 example
model.add(Dense(64, input_dim=64,kernel_regularizer=regularizers.l2(0.01),activity_regularizer=regularizers.l1(0.01)))
# Dropout example 
model.add(Dropout(rate=0.1, noise_shape=None, seed=3))
# where rate is dropout rate

## Compilation

### In compilation various loss functions, optimizer and metrics can be defined . Few of them are
##### Optimizers : sgd , rmsprop, adam, adagrad, adadelta, adamax, nadam
##### Loss: categorical_crossentropy for muti class problem
#####            mean_squared_error, mean_absolute_error, mean_absolute_percentage_error,mean_squared_logarithmic_error for continous variable
#####            binary_crossentropy for binary classification

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

## Training

In [None]:
# x_train and y_train are Numpy arrays --just like in the Scikit-Learn API.
model.fit(X_train, y_train, epochs=5, batch_size=32)
# if you want to train on batch input
model.train_on_batch(x_batch, y_batch)

## Evaluate on test dataset

In [None]:
loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)

## predict on test dataset

In [None]:
classes = model.predict(x_test, batch_size=128)

# Application on online news popularity dataset (Regression problem)

### Baseline model with first layer being input and output same dimension

In [4]:
# fix random seed for reproducibility
seed = 50
np.random.seed(seed)

In [10]:
# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_absolute_percentage_error', optimizer='adam')
    return model

In [11]:
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1)

In [12]:
kfold = KFold(n_splits=3, random_state=seed)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Results: -20.81 (6.52) MSE


# Standardize dataset

In [14]:
# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('regress', KerasRegressor(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=3, random_state=seed)
results = cross_val_score(pipeline, X_train, y_train, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Standardized: -68.63 (5.69) MSE


## Try deeper model with 4 layers

In [18]:
# define the model
def deeper_model():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(36, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(16, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(8, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_absolute_percentage_error', optimizer='adam')
    return model

In [19]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('regress', KerasRegressor(build_fn=deeper_model, epochs=5, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=3, random_state=seed)
results = cross_val_score(pipeline, X_train, y_train, cv=kfold)
print("Deeper model: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Deeper model: -65.02 (11.46) MSE


## predict on test dataset

In [27]:
estimator.fit(X_train, y_train)
prediction = estimator.predict(X_test)
print(mean_absolute_percentage_error(y_test, prediction))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


NameError: name 'check_arrays' is not defined

## Save your NN model in YAML

In [25]:
# save model to yaml
model_yaml = pipeline.named_steps['regress'].model.to_yaml()
with open(os.path.join(datapath,"output","model.yaml"), "w") as yaml_file:
    yaml_file.write(model_yaml)
# save weights to HDF5
model.save_weights(os.path.join(datapath,"output","model.h5"))
print("Saved model to directory")

AttributeError: 'KerasRegressor' object has no attribute 'model'

 
## Load YAML and compile model

In [None]:
yaml_file = open((os.path.join(datapath,"output","model.yaml"), 'r')
loaded_model_yaml = yaml_file.read()
yaml_file.close()
loaded_model = model_from_yaml(loaded_model_yaml)
# load weights into new model
loaded_model.load_weights((os.path.join(datapath,"output","model.h5"))
print("Loaded model from disk")

## Evaluate loaded model on test data

In [None]:
loaded_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
score = loaded_model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))