# CNC Mill Tool Wear
- https://www.kaggle.com/shasun/tool-wear-detection-in-cnc-mill
- Predict "Machining_Process"

## Setup

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [26]:
import os
path_parent = os.path.dirname(os.getcwd())
saveFolder = os.path.join(path_parent, 'Topic 02 - Classification', 'Data_CNC_Processed')
print(saveFolder)
#saveFolder = '..\Topic 02 - Classification\Data_CNC_Processed'
filename = 'pred_machining_process'
compression = 'gzip'
df = pd.read_pickle(f'{saveFolder}/{filename}.pkl',compression=compression)
df.head(3)

f:\Work\Coding\Class\ai-class-2564-1\Topic 02 - Classification\Data_CNC_Processed


Unnamed: 0,X1_ActualPosition,X1_ActualVelocity,X1_ActualAcceleration,X1_CommandPosition,X1_CommandVelocity,X1_CommandAcceleration,X1_CurrentFeedback,X1_DCBusVoltage,X1_OutputCurrent,X1_OutputVoltage,...,S1_OutputCurrent,S1_OutputVoltage,S1_OutputPower,S1_SystemInertia,M1_CURRENT_PROGRAM_NUMBER,M1_sequence_number,M1_CURRENT_FEEDRATE,Machining_Process,feedrate,clamp_pressure
0,198.0,0.0,0.0,198.0,0.0,0.0,0.18,0.0207,329.0,2.77,...,329.0,0.0,6.96e-07,12.0,1.0,0.0,50.0,7,6,4.0
1,198.0,-10.8,-350.0,198.0,-13.6,-358.0,-10.9,0.186,328.0,23.3,...,328.0,0.0,-5.27e-07,12.0,1.0,4.0,50.0,7,6,4.0
2,196.0,-17.8,-6.25,196.0,-17.9,-9.5e-05,-8.59,0.14,328.0,30.6,...,328.0,0.0,9.1e-07,12.0,1.0,7.0,50.0,7,6,4.0


In [27]:
colsX = np.array([item for item in df.columns if item not in 'Machining_Process'])
X = df[colsX].values
y = df['Machining_Process'].values

In [28]:
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)


## Contruct pipeline

In [29]:
from sklearn.feature_selection import VarianceThreshold
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(solver='liblinear', penalty='l1', C=0.1, multi_class='ovr')

var = VarianceThreshold(threshold=0)
l1 = SelectFromModel(estimator=lr, norm_order=1, max_features=20)
clf = RF(n_estimators=50)

# Construct pipeline object
pipe = Pipeline([
    ('var', var)
    ('scl', StandardScaler()),
    ('l1',l1),
    ('clf', clf)])

## Comparing training and validation accuracy


In [30]:
# WIthout cross validation
pipe.fit(X_train, y_train)
train_acc = pipe.score(X_train,y_train)
test_acc = pipe.score(X_test,y_test)

print(f"Training acc: {train_acc:4.3f}")
print(f"Test acc:     {test_acc:4.3f}")

Training acc: 1.000
Test acc:     0.694


In [31]:
# With cross validation
from sklearn.model_selection import cross_val_score
scores = cross_val_score(estimator=pipe,
                         X=X_train,
                         y=y_train,
                         cv=5,
                         n_jobs=1)

arrayPrint =   ', '.join([f'{el:3.2f}' for el in scores])
print(f"Score:", arrayPrint)
print("\nCrossed-validation accuracy")  
print(f"Mean:{np.mean(scores):6.3f}")
print(f"Std:{np.std(scores):6.3f}")

Score: 0.64, 0.62, 0.64, 0.71, 0.67

Crossed-validation accuracy
Mean: 0.656
Std: 0.030


In [33]:
# Get parameter names
for k, v in pipe.get_params().items():
    print(f"{k:25.25s}: {str(v)}")

memory                   : None
steps                    : [('scl', StandardScaler()), ('l1', SelectFromModel(estimator=LogisticRegression(C=0.1, multi_class='ovr',
                                             penalty='l1', solver='liblinear'),
                max_features=20)), ('clf', RandomForestClassifier(n_estimators=50))]
verbose                  : False
scl                      : StandardScaler()
l1                       : SelectFromModel(estimator=LogisticRegression(C=0.1, multi_class='ovr',
                                             penalty='l1', solver='liblinear'),
                max_features=20)
clf                      : RandomForestClassifier(n_estimators=50)
scl__copy                : True
scl__with_mean           : True
scl__with_std            : True
l1__estimator__C         : 0.1
l1__estimator__class_weig: None
l1__estimator__dual      : False
l1__estimator__fit_interc: True
l1__estimator__intercept_: 1
l1__estimator__l1_ratio  : None
l1__estimator__max_iter  : 100

In [34]:
set1 = {
    "l1__max_features": [20, 30, 40],
    "clf__n_estimators": [25, 50, 75],
    "clf__max_samples": [None],
    "clf__max_features": ["auto"],
    "clf__max_depth": [None],
      }
param_grid = [set1]

In [35]:
from sklearn.model_selection import GridSearchCV

gs = GridSearchCV(estimator=pipe, 
                  param_grid=param_grid, 
                  scoring='accuracy', 
                  cv=5,
                  n_jobs=-1)

In [36]:
gs.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('scl', StandardScaler()),
                                       ('l1',
                                        SelectFromModel(estimator=LogisticRegression(C=0.1,
                                                                                     multi_class='ovr',
                                                                                     penalty='l1',
                                                                                     solver='liblinear'),
                                                        max_features=20)),
                                       ('clf',
                                        RandomForestClassifier(n_estimators=50))]),
             n_jobs=-1,
             param_grid=[{'clf__max_depth': [None],
                          'clf__max_features': ['auto'],
                          'clf__max_samples': [None],
                          'clf__n_estimators': [25, 50, 75],
              

In [37]:
df = pd.DataFrame(gs.cv_results_)
df = df.sort_values(by=['rank_test_score'])
display(df)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__max_depth,param_clf__max_features,param_clf__max_samples,param_clf__n_estimators,param_l1__max_features,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
7,47.205173,4.302207,0.13085,0.076058,,auto,,75,30,"{'clf__max_depth': None, 'clf__max_features': ...",0.700282,0.677401,0.663277,0.691243,0.689831,0.684407,0.012835,1
8,37.568402,2.397548,0.07956,0.010759,,auto,,75,40,"{'clf__max_depth': None, 'clf__max_features': ...",0.681921,0.664689,0.677119,0.666667,0.673446,0.672768,0.006413,2
4,64.367832,11.596524,0.294869,0.030789,,auto,,50,30,"{'clf__max_depth': None, 'clf__max_features': ...",0.692938,0.677684,0.667232,0.671469,0.632768,0.668418,0.019843,3
5,69.933111,12.916035,0.291666,0.076516,,auto,,50,40,"{'clf__max_depth': None, 'clf__max_features': ...",0.666102,0.663559,0.675706,0.669774,0.660169,0.667062,0.005344,4
1,55.469833,12.09397,0.169237,0.030392,,auto,,25,30,"{'clf__max_depth': None, 'clf__max_features': ...",0.655367,0.672034,0.674859,0.661299,0.663277,0.665367,0.007148,5
6,55.047105,8.622972,0.320888,0.083752,,auto,,75,20,"{'clf__max_depth': None, 'clf__max_features': ...",0.578814,0.641243,0.631073,0.692655,0.687288,0.646215,0.041571,6
3,66.774073,23.228603,0.288678,0.033719,,auto,,50,20,"{'clf__max_depth': None, 'clf__max_features': ...",0.631638,0.628531,0.623446,0.65113,0.668079,0.640565,0.01665,7
2,61.207644,15.62429,0.17904,0.024366,,auto,,25,40,"{'clf__max_depth': None, 'clf__max_features': ...",0.641243,0.639548,0.639548,0.64209,0.634746,0.639435,0.002543,8
0,60.455384,10.944786,0.169746,0.021199,,auto,,25,20,"{'clf__max_depth': None, 'clf__max_features': ...",0.614124,0.595763,0.516384,0.645763,0.658757,0.606158,0.050132,9


In [38]:
print(gs.best_score_)
print(gs.best_params_)

0.6844067796610169
{'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__max_samples': None, 'clf__n_estimators': 75, 'l1__max_features': 30}


In [39]:
gs.score(X_test,y_test)

0.7417611389401529