# Import Necessary Libraries

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import math
import collections
import os
import json
import pickle

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

# Load the Train Test Split

In [26]:
X_train = np.load(os.path.join("..", "IPD", "x_train.npy"))
X_test = np.load(os.path.join("..", "IPD", "x_test.npy"))
Y_train = np.load(os.path.join("..", "IPD", "y_train.npy"))
Y_test = np.load(os.path.join("..", "IPD", "y_test.npy"))
print("Training samples : {} \nTesting Samples : {}".format(X_train.shape, X_test.shape))
print("Training Labels : {} \nTesting Labels : {}".format(Y_train.shape, Y_test.shape))

Training samples : (332, 250, 12) 
Testing Samples : (83, 250, 12)
Training Labels : (332,) 
Testing Labels : (83,)


In [27]:
_ , num_timeframes, num_features = X_train.shape

# Reshape data for the model
-----------------
Samples are a 2D matrix which need to be reshaped to 1D to feed into the model

In [28]:
X_train = X_train.reshape((-1,num_timeframes*num_features))
X_test = X_test.reshape((-1,num_timeframes*num_features))
print("Training samples : {} \nTesting Samples : {}".format(X_train.shape, X_test.shape))
print("Training Labels : {} \nTesting Labels : {}".format(Y_train.shape, Y_test.shape))

Training samples : (332, 3000) 
Testing Samples : (83, 3000)
Training Labels : (332,) 
Testing Labels : (83,)


# Grid search over Random Forest model

In [29]:
parameters = {'max_depth': [12, 14, 16],
              'max_features': ['sqrt','log2'],
              'n_estimators': [16,18,20]}

grid = GridSearchCV(RandomForestClassifier(bootstrap=False, oob_score=False), parameters, cv=5, verbose=2)
grid.fit(X_train, Y_train)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.2s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.2s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.2s
[CV] max_depth=12, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=18, total=   0.2s
[CV] max_depth=12, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=18, total=   0.2s
[CV] max_depth=12, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=18, total=   0.2s
[CV] max_depth=12, max_features=sqrt, n_estimators=20 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=20, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=20 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=20, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=20 ................
[CV] .

[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed:    8.7s finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=False, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rand

In [30]:
params = grid.best_params_
best_estimator = grid.best_estimator_
print(params)
print(best_estimator)

{'max_depth': 14, 'max_features': 'log2', 'n_estimators': 18}
RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=14, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=18,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)


In [31]:
clf = best_estimator
clf.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=14, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=18,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [32]:
Y_pred = clf.predict(X_test)
print(classification_report(Y_test, Y_pred))
print(confusion_matrix(Y_test,Y_pred))

              precision    recall  f1-score   support

           0       0.78      1.00      0.88        14
           1       0.86      0.86      0.86        14
           2       0.92      0.92      0.92        13
           3       1.00      0.88      0.93        16
           4       0.93      0.93      0.93        14
           5       0.90      0.75      0.82        12

    accuracy                           0.89        83
   macro avg       0.90      0.89      0.89        83
weighted avg       0.90      0.89      0.89        83

[[14  0  0  0  0  0]
 [ 2 12  0  0  0  0]
 [ 1  0 12  0  0  0]
 [ 0  2  0 14  0  0]
 [ 0  0  0  0 13  1]
 [ 1  0  1  0  1  9]]


In [33]:
pickle.dump(clf, open(os.path.join("..", "IPD", "RandomForest_model_half.pkl"), 'wb'))
print("Model Saved")

Model Saved


# Train on entire Data

In [39]:
X_train = np.load(os.path.join("..", "IPD", "x_train.npy"))
X_test = np.load(os.path.join("..", "IPD", "x_test.npy"))
Y_train = np.load(os.path.join("..", "IPD", "y_train.npy"))
Y_test = np.load(os.path.join("..", "IPD", "y_test.npy"))

X_train = np.concatenate((X_train,X_test), axis=0)
Y_train = np.concatenate((Y_train,Y_test), axis=0)

_ , num_timeframes, num_features = X_train.shape
X_train = X_train.reshape((-1,num_timeframes*num_features))
from scipy.sparse import coo_matrix
X_sparse = coo_matrix(X_train)

from sklearn.utils import shuffle
X_train, X_sparse, Y_train = shuffle(X_train, X_sparse, Y_train)
X_train = X_train.reshape((-1,num_timeframes,num_features))

In [40]:
X_train = X_train.reshape((-1,num_timeframes*num_features))
X_test = X_test.reshape((-1,num_timeframes*num_features))
print("Training samples : {} \nTesting Samples : {}".format(X_train.shape, X_test.shape))
print("Training Labels : {} \nTesting Labels : {}".format(Y_train.shape, Y_test.shape))

Training samples : (415, 3000) 
Testing Samples : (83, 3000)
Training Labels : (415,) 
Testing Labels : (83,)


In [41]:
parameters = {'max_depth': [12, 14, 16, 18],
              'max_features': ['sqrt','log2'],
              'n_estimators': [16,18,20,22]}

grid = GridSearchCV(RandomForestClassifier(bootstrap=False, oob_score=False), parameters, cv=10, verbose=2)
grid.fit(X_train, Y_train)

Fitting 10 folds for each of 32 candidates, totalling 320 fits
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] . max_depth=12, max_features=sqrt, n_estimators=16, total=   0.3s
[CV] max_depth=12, max_features=sqrt, n_estimators=16 ................
[CV] .

[CV] . max_depth=12, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] . max_depth=12, max_features=log2, n_estimators=20, total=   0.1s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] . max_depth=12, max_features=log2, n_estimators=20, total=   0.1s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] . max_depth=12, max_features=log2, n_estimators=20, total=   0.2s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] . max_depth=12, max_features=log2, n_estimators=20, total=   0.2s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] . max_depth=12, max_features=log2, n_estimators=20, total=   0.2s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] . max_depth=12, max_features=log2, n_estimators=20, total=   0.1s
[CV] max_depth=12, max_features=log2, n_estimators=20 ................
[CV] .

[CV] . max_depth=14, max_features=sqrt, n_estimators=22, total=   0.4s
[CV] max_depth=14, max_features=sqrt, n_estimators=22 ................
[CV] . max_depth=14, max_features=sqrt, n_estimators=22, total=   0.5s
[CV] max_depth=14, max_features=sqrt, n_estimators=22 ................
[CV] . max_depth=14, max_features=sqrt, n_estimators=22, total=   0.5s
[CV] max_depth=14, max_features=log2, n_estimators=16 ................
[CV] . max_depth=14, max_features=log2, n_estimators=16, total=   0.1s
[CV] max_depth=14, max_features=log2, n_estimators=16 ................
[CV] . max_depth=14, max_features=log2, n_estimators=16, total=   0.1s
[CV] max_depth=14, max_features=log2, n_estimators=16 ................
[CV] . max_depth=14, max_features=log2, n_estimators=16, total=   0.1s
[CV] max_depth=14, max_features=log2, n_estimators=16 ................
[CV] . max_depth=14, max_features=log2, n_estimators=16, total=   0.1s
[CV] max_depth=14, max_features=log2, n_estimators=16 ................
[CV] .

[CV] . max_depth=16, max_features=sqrt, n_estimators=18, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=16, max_features=sqrt, n_estimators=18, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=16, max_features=sqrt, n_estimators=18, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=16, max_features=sqrt, n_estimators=18, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=18 ................
[CV] . max_depth=16, max_features=sqrt, n_estimators=18, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=20 ................
[CV] . max_depth=16, max_features=sqrt, n_estimators=20, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=20 ................
[CV] . max_depth=16, max_features=sqrt, n_estimators=20, total=   0.4s
[CV] max_depth=16, max_features=sqrt, n_estimators=20 ................
[CV] .

[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.1s
[CV] max_depth=16, max_features=log2, n_estimators=22 ................
[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.1s
[CV] max_depth=16, max_features=log2, n_estimators=22 ................
[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.2s
[CV] max_depth=16, max_features=log2, n_estimators=22 ................
[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.2s
[CV] max_depth=16, max_features=log2, n_estimators=22 ................
[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.2s
[CV] max_depth=16, max_features=log2, n_estimators=22 ................
[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.2s
[CV] max_depth=16, max_features=log2, n_estimators=22 ................
[CV] . max_depth=16, max_features=log2, n_estimators=22, total=   0.2s
[CV] max_depth=18, max_features=sqrt, n_estimators=16 ................
[CV] .

[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] . max_depth=18, max_features=log2, n_estimators=18, total=   0.1s
[CV] max_depth=18, max_features=log2, n_estimators=18 ................
[CV] .

[Parallel(n_jobs=1)]: Done 320 out of 320 | elapsed:  1.4min finished


GridSearchCV(cv=10, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=False, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              ran

In [42]:
clf = grid.best_estimator_
clf.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=14, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=22,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

# Saving model

In [43]:
pickle.dump(clf, open(os.path.join("..", "models", "RandomForest_model.pkl"), 'wb'))
print("Model Saved")

Model Saved
