In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import load_boston, load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
import tensorflow as tf
import numpy as np
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Activation
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier

In [3]:
def algorithm_pipeline(X_train_data, X_test_data, y_train_data, y_test_data, model, param_grid, cv = 10, scoring_fit = "neg_mean_squared_error", do_probabilities = False):
  gs = GridSearchCV (estimator = model, param_grid = param_grid, cv =cv, n_jobs = -1, scoring = scoring_fit, verbose =2)

  fitted_model = gs.fit(X_train_data, y_train_data)
  if do_probabilities:
    pred = fitted_model.predict_proba(X_test_data)
  else:
    pred = fitted_model.predict(X_test_data)
    
    return fitted_model, pred

In [5]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
def preprocess_mnist(x_train, y_train, x_test, y_test):
    # Normalizing all images of 28x28 pixels
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
    input_shape = (28, 28, 1)
    
    # Float values for division
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # Normalizing the RGB codes by dividing it to the max RGB value
    x_train /= 255
    x_test /= 255
    
    # Categorical y values
    y_train = to_categorical(y_train, 10)
    y_test= to_categorical(y_test, 10)
    
    return x_train, y_train, x_test, y_test, input_shape
    
X_train, y_train, X_test, y_test, input_shape = preprocess_mnist(x_train, y_train, x_test, y_test)

In [7]:
def build_cnn(activation = 'relu',
              dropout_rate = 0.2,
              optimizer = 'Adam'):
    model = Sequential()
    
    model.add(Conv2D(32, kernel_size=(3, 3),
              activation=activation,
              input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation=activation))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout_rate))
    model.add(Flatten())
    model.add(Dense(128, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(10, activation='softmax'))
    
    model.compile(
        loss='categorical_crossentropy', 
        optimizer=optimizer, 
        metrics=['accuracy']
    )
    
    return model

In [8]:
param_grid = {
              'epochs':[1,2,3],
              'batch_size':[128]
              #'epochs' :              [100,150,200],
              #'batch_size' :          [32, 128],
              #'optimizer' :           ['Adam', 'Nadam'],
              #'dropout_rate' :        [0.2, 0.3],
              #'activation' :          ['relu', 'elu']
             }

model = KerasClassifier(build_fn = build_cnn, verbose=0)

model, pred = algorithm_pipeline(X_train, X_test, y_train, y_test, model, 
                                       param_grid, cv=5, scoring_fit='neg_log_loss')

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  1.6min finished


In [9]:
print(model.best_score_)
print(model.best_params_)

-0.04435932706569368
{'batch_size': 128, 'epochs': 3}


In [18]:
simpl_mlp = build_cnn()

In [20]:
simpl_mlp.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)              

In [21]:
boston = load_boston()
X = boston.data
y = boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
model = xgb.XGBRegressor()
param_grid = {
    'n_estimators': [400, 700, 1000],
    'colsample_bytree': [0.7, 0.8],
    'max_depth': [15,20,25],
    'reg_alpha': [1.1, 1.2, 1.3],
    'reg_lambda': [1.1, 1.2, 1.3],
    'subsample': [0.7, 0.8, 0.9]
}

model, pred = algorithm_pipeline(X_train, X_test, y_train, y_test, model, 
                                 param_grid, cv=5)

Fitting 5 folds for each of 486 candidates, totalling 2430 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   11.3s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:   43.2s
[Parallel(n_jobs=-1)]: Done 361 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 1009 tasks      | elapsed:  5.7min
[Parallel(n_jobs=-1)]: Done 1454 tasks      | elapsed:  8.3min
[Parallel(n_jobs=-1)]: Done 1981 tasks      | elapsed: 11.6min
[Parallel(n_jobs=-1)]: Done 2430 out of 2430 | elapsed: 14.4min finished




In [24]:
print(model.best_score_)
print(model.best_params_)

-12.148007954095755
{'colsample_bytree': 0.8, 'max_depth': 20, 'n_estimators': 400, 'reg_alpha': 1.2, 'reg_lambda': 1.3, 'subsample': 0.8}
