In [1]:
# Get required libraries 
import numpy as np
import pylab as pl
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV

In [2]:
#Load train and test datasets and shuffle the data 
import time
start = time.clock()
train = shuffle(pd.read_csv("train.csv"))
test = shuffle(pd.read_csv("test.csv"))

In [6]:
# Seperating Predictors and Outcome values from train and test sets
X_train = pd.DataFrame(train.drop(['Activity','subject'],axis=1))
Y_train_label = train.Activity.values.astype(object)
X_test = pd.DataFrame(test.drop(['Activity','subject'],axis=1))
Y_test_label = test.Activity.values.astype(object)

# Dimension of Train and Test set 
print("Dimension of Train set",X_train.shape)
print("Dimension of Test set",X_test.shape,"\n")

# Transforming non numerical labels into numerical labels
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()

# encoding train labels 
encoder.fit(Y_train_label)
Y_train = encoder.transform(Y_train_label)

# encoding test labels 
encoder.fit(Y_test_label)
Y_test = encoder.transform(Y_test_label)

#Total Number of Continous and Categorical features in the training set
num_cols = X_train._get_numeric_data().columns
print("Number of numeric features:",num_cols.size)
#list(set(X_train.columns) - set(num_cols))


names_of_predictors = list(X_train.columns.values)

# Scaling the Train and Test feature set 
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Dimension of Train set (7352, 561)
Dimension of Test set (2947, 561) 

Number of numeric features: 561


In [7]:
from sklearn.decomposition import PCA

pca = PCA(n_components=200)
X_train_scaled = pca.fit_transform(X_train_scaled, Y_train)
print(pca.explained_variance_ratio_.sum())
X_test_scaled = pca.transform(X_test_scaled)

0.9936101573619309


In [8]:
X_train_scaled.shape

(7352, 200)

In [9]:
import tensorflow as tf
tf.__version__

  from ._conv import register_converters as _register_converters


'2.0.0'

### Tuning Epochs & Batch_size

In [10]:
def Create_model1():
    #Create model
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(48, input_dim = 200,
                         kernel_initializer='uniform', activation='relu', ))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(24, kernel_initializer='uniform', activation='relu'))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(12, kernel_initializer='uniform', activation='relu'))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(6, kernel_initializer='uniform', activation='softmax'))
    
    #Compile model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])
    return model

In [11]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

# create model
model = KerasClassifier(build_fn=Create_model1, verbose=0)

# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train_scaled, Y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

end = time.clock()
print('Running time: %s Seconds'%(end-start))

Using TensorFlow backend.


Best: 0.982862 using {'batch_size': 10, 'epochs': 100}
0.975109 (0.000575) with: {'batch_size': 10, 'epochs': 10}
0.977829 (0.002164) with: {'batch_size': 10, 'epochs': 50}
0.982862 (0.002665) with: {'batch_size': 10, 'epochs': 100}
0.974565 (0.006844) with: {'batch_size': 20, 'epochs': 10}
0.979597 (0.003176) with: {'batch_size': 20, 'epochs': 50}
0.981910 (0.003002) with: {'batch_size': 20, 'epochs': 100}
0.976333 (0.002332) with: {'batch_size': 40, 'epochs': 10}
0.979461 (0.002710) with: {'batch_size': 40, 'epochs': 50}
0.980277 (0.003416) with: {'batch_size': 40, 'epochs': 100}
0.975925 (0.002600) with: {'batch_size': 60, 'epochs': 10}
0.977557 (0.004171) with: {'batch_size': 60, 'epochs': 50}
0.982046 (0.006327) with: {'batch_size': 60, 'epochs': 100}
0.974429 (0.004293) with: {'batch_size': 80, 'epochs': 10}
0.979053 (0.001499) with: {'batch_size': 80, 'epochs': 50}
0.978509 (0.001711) with: {'batch_size': 80, 'epochs': 100}
0.970076 (0.008023) with: {'batch_size': 100, 'epochs':

In [9]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

# create model
model = KerasClassifier(build_fn=Create_model1, verbose=0)

# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train_scaled, Y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

end = time.clock()
print('Running time: %s Seconds'%(end-start))

Using TensorFlow backend.


Best: 0.982454 using {'batch_size': 10, 'epochs': 100}
0.966948 (0.008833) with: {'batch_size': 10, 'epochs': 10}
0.980005 (0.003181) with: {'batch_size': 10, 'epochs': 50}
0.982454 (0.004934) with: {'batch_size': 10, 'epochs': 100}
0.974157 (0.004117) with: {'batch_size': 20, 'epochs': 10}
0.981910 (0.004034) with: {'batch_size': 20, 'epochs': 50}
0.982182 (0.004428) with: {'batch_size': 20, 'epochs': 100}
0.972252 (0.006229) with: {'batch_size': 40, 'epochs': 10}
0.980822 (0.004668) with: {'batch_size': 40, 'epochs': 50}
0.981910 (0.004502) with: {'batch_size': 40, 'epochs': 100}
0.965316 (0.008079) with: {'batch_size': 60, 'epochs': 10}
0.978645 (0.003673) with: {'batch_size': 60, 'epochs': 50}
0.982182 (0.004949) with: {'batch_size': 60, 'epochs': 100}
0.958379 (0.004514) with: {'batch_size': 80, 'epochs': 10}
0.980686 (0.004116) with: {'batch_size': 80, 'epochs': 50}
0.982182 (0.004859) with: {'batch_size': 80, 'epochs': 100}
0.941513 (0.006524) with: {'batch_size': 100, 'epochs':