In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import time

In [2]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

## Get the vibration signal features data

In [3]:
features_data = pd.read_csv('C:/Users/intel/Desktop/Mtech_Proj/Digital twin_29 Jan 2021/Low_features_digitalTwin_fault.csv',
                            index_col=0)

In [4]:
features_data.head()

Unnamed: 0,x_Mean,x_Min,x_Kurtosis,x_Crest_Factor,x_Skewness,x_Form_Factor,fault_Class
0,-0.015542,-0.531908,1.559893,4.469807,-0.080251,-8.142677,C1
1,-0.014872,-0.609838,1.760466,4.377686,-0.269365,-8.633299,C1
2,-0.017018,-0.646336,1.809119,4.169048,-0.063207,-7.533026,C1
3,-0.011631,-0.547362,1.77447,4.277821,-0.079043,-11.263617,C1
4,-0.018077,-0.57301,1.413154,4.21109,-0.098778,-7.016276,C1


# Train Test Split

** Split your data into a training set and a testing set.**

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X = features_data.drop('fault_Class',axis=1)
y = features_data['fault_Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [7]:
model = XGBClassifier()

In [8]:
start = time.time()
model.fit(X_train,y_train,verbose=1)
stop = time.time()
print(f"Training time: {stop - start}s")


Training time: 1.291224718093872s


In [9]:
predictions = model.predict(X_test)

In [10]:
from sklearn.metrics import classification_report,confusion_matrix

In [11]:
print(confusion_matrix(y_test,predictions))

[[412   0  74   0   0]
 [  0 538   0   0   0]
 [109   0 423   0   0]
 [  0   0   0 525   0]
 [  0   0   0   0 559]]


In [12]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

          C1       0.79      0.85      0.82       486
          C2       1.00      1.00      1.00       538
          C3       0.85      0.80      0.82       532
          C4       1.00      1.00      1.00       525
          C5       1.00      1.00      1.00       559

    accuracy                           0.93      2640
   macro avg       0.93      0.93      0.93      2640
weighted avg       0.93      0.93      0.93      2640



We noticed that your model was pretty good! Let's see if we can tune the parameters to try to get even better.Let try to use GridSearch.

## Gridsearch Approach

** Import GridsearchCV from SciKit Learn.**

In [13]:
from sklearn.model_selection import GridSearchCV

**Create a dictionary called param_grid and fill out some parameters for C and gamma.**

In [14]:
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001]} 

** Create a GridSearchCV object and fit it to the training data.**

In [15]:
grid = GridSearchCV(XGBClassifier(),param_grid,refit=True,verbose=2)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=0.1, gamma=1 ..................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................................... C=0.1, gamma=1, total=   0.9s
[CV] C=0.1, gamma=1 ..................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s


[CV] ................................... C=0.1, gamma=1, total=   0.9s
[CV] C=0.1, gamma=1 ..................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................... C=0.1, gamma=1, total=   0.9s
[CV] C=0.1, gamma=1 ..................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................... C=0.1, gamma=1, total=   1.7s
[CV] C=0.1, gamma=1 ..................................................
Parameters: { C } mi

[CV] ............................... C=0.1, gamma=0.001, total=   0.8s
[CV] C=0.1, gamma=0.001 ..............................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ............................... C=0.1, gamma=0.001, total=   1.3s
[CV] C=0.1, gamma=0.001 ..............................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ............................... C=0.1, gamma=0.001, total=   1.2s
[CV] C=1, gamma=1 ....................................................
Parameters: { C } mi

[CV] .................................. C=1, gamma=0.01, total=   1.3s
[CV] C=1, gamma=0.01 .................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] .................................. C=1, gamma=0.01, total=   1.2s
[CV] C=1, gamma=0.001 ................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................. C=1, gamma=0.001, total=   0.8s
[CV] C=1, gamma=0.001 ................................................
Parameters: { C } mi

[CV] .................................. C=10, gamma=0.1, total=   1.2s
[CV] C=10, gamma=0.01 ................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................. C=10, gamma=0.01, total=   1.1s
[CV] C=10, gamma=0.01 ................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................. C=10, gamma=0.01, total=   1.4s
[CV] C=10, gamma=0.01 ................................................
Parameters: { C } mi

[CV] ................................. C=100, gamma=0.1, total=   1.2s
[CV] C=100, gamma=0.1 ................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................. C=100, gamma=0.1, total=   1.2s
[CV] C=100, gamma=0.1 ................................................
Parameters: { C } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] ................................. C=100, gamma=0.1, total=   1.1s
[CV] C=100, gamma=0.1 ................................................
Parameters: { C } mi

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:  1.5min finished


GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, random_state=None,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, validate_parameter

** Now take that grid model and create some predictions using the test set and create classification reports and confusion matrices for them. lets find if we were able to improve?**

In [16]:
grid_predictions = grid.predict(X_test)

In [17]:
print(confusion_matrix(y_test,grid_predictions))

[[405   0  81   0   0]
 [  0 538   0   0   0]
 [106   0 426   0   0]
 [  0   0   0 525   0]
 [  0   0   0   0 559]]


In [18]:
print(classification_report(y_test,grid_predictions))

              precision    recall  f1-score   support

          C1       0.79      0.83      0.81       486
          C2       1.00      1.00      1.00       538
          C3       0.84      0.80      0.82       532
          C4       1.00      1.00      1.00       525
          C5       1.00      1.00      1.00       559

    accuracy                           0.93      2640
   macro avg       0.93      0.93      0.93      2640
weighted avg       0.93      0.93      0.93      2640



In [22]:
# pwd

'C:\\Users\\intel\\Desktop\\Mtech_Proj\\Shallow_Methods_28Jul'