In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_selection import SelectKBest, chi2, SelectFromModel

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score

In [4]:
df_train = pd.read_csv(r"C:\aiprojects\machine-learning\cobot_contact_detection\contact_detection_train.csv")
df_test = pd.read_csv(r"C:\aiprojects\machine-learning\cobot_contact_detection\contact_detection_test.csv")

In [5]:
df_train.shape

(1543, 785)

In [6]:
df_train.head()

Unnamed: 0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10,...,Var776,Var777,Var778,Var779,Var780,Var781,Var782,Var783,Var784,Var785
0,Intentional_Link5,-0.118291,-14.3191,0.111912,18.1387,0.182278,2.56566,0.112718,-0.15555,0.035588,...,-0.000457,0.001637,0.000349,0.007492,0.005073,0.006561,-0.004119,-0.002986,-0.008046,0.004796
1,Intentional_Link5,0.147518,-14.7317,0.342014,17.2024,0.076992,2.55467,0.13286,0.010584,0.179948,...,-0.000511,0.001799,0.000356,0.002025,0.00362,0.010739,-0.000917,-0.004338,-0.001741,-0.003709
2,Intentional_Link5,-0.078618,-12.8195,0.342014,19.4162,0.562222,2.5446,-0.445753,-0.282243,0.261724,...,-0.00043,0.001394,0.000185,0.005963,0.002064,0.011668,-0.00423,-0.005112,0.002598,-0.000224
3,Intentional_Link5,-0.269047,-13.4543,0.18729,19.3011,0.53384,2.61784,-0.385329,-0.337184,0.239106,...,-0.000275,0.001255,0.000104,0.007092,0.008016,0.006376,-0.00994,-0.003381,0.007005,0.001893
4,Intentional_Link5,0.222896,-12.3315,0.647495,19.2257,0.486233,2.49242,-0.455824,-0.133923,0.27095,...,-0.000117,0.001059,-0.000477,0.008044,0.001806,0.003109,0.002315,-0.001202,-0.008632,0.001837


In [7]:
current_column_names = []
for i in range(1,786):
    current_name = [f"Var{i}"]
    current_column_names.extend(current_name)

In [8]:
# We need to change the names of the columns in the dataframe
# Current names are Var1, Var2, ... Var785
# New names should be as per the value they represent
# Col: 1 to 7 = Joint Torques
# Col: 8 to 14 = External Torques
# Col: 15 to 21 = Difference between desired and actual joint positions
# Col: 22 to 28 = Difference between desired and actual joint velocities

new_column_names = []
new_column_names.append("Label")
sample_step = 0
for i in range (1,29):
    for j in range(1,8):
        new_name = [f"S{i}_Joint_Torque{j}"]
        new_column_names.extend(new_name)
    for k in range(1,8):
        new_name = [f"S{i}_External_Torque{k}"]
        new_column_names.extend(new_name)
    for l in range(1,8):
        new_name = [f"S{i}_Delta_Joint_Position{l}"]
        new_column_names.extend(new_name)
    for m in range(1,8):
        new_name = [f"S{i}_Delta_Joint_Velocity{m}"]
        new_column_names.extend(new_name)

In [9]:
new_column_dict = {}
for i in range(0,785):
    index = current_column_names[i]
    value = new_column_names[i]
    new_column_dict[index] = value

new_column_dict

{'Var1': 'Label',
 'Var2': 'S1_Joint_Torque1',
 'Var3': 'S1_Joint_Torque2',
 'Var4': 'S1_Joint_Torque3',
 'Var5': 'S1_Joint_Torque4',
 'Var6': 'S1_Joint_Torque5',
 'Var7': 'S1_Joint_Torque6',
 'Var8': 'S1_Joint_Torque7',
 'Var9': 'S1_External_Torque1',
 'Var10': 'S1_External_Torque2',
 'Var11': 'S1_External_Torque3',
 'Var12': 'S1_External_Torque4',
 'Var13': 'S1_External_Torque5',
 'Var14': 'S1_External_Torque6',
 'Var15': 'S1_External_Torque7',
 'Var16': 'S1_Delta_Joint_Position1',
 'Var17': 'S1_Delta_Joint_Position2',
 'Var18': 'S1_Delta_Joint_Position3',
 'Var19': 'S1_Delta_Joint_Position4',
 'Var20': 'S1_Delta_Joint_Position5',
 'Var21': 'S1_Delta_Joint_Position6',
 'Var22': 'S1_Delta_Joint_Position7',
 'Var23': 'S1_Delta_Joint_Velocity1',
 'Var24': 'S1_Delta_Joint_Velocity2',
 'Var25': 'S1_Delta_Joint_Velocity3',
 'Var26': 'S1_Delta_Joint_Velocity4',
 'Var27': 'S1_Delta_Joint_Velocity5',
 'Var28': 'S1_Delta_Joint_Velocity6',
 'Var29': 'S1_Delta_Joint_Velocity7',
 'Var30': 'S2_Joi

In [10]:
df_train.rename(columns=new_column_dict, inplace=True)
df_train.head()

Unnamed: 0,Label,S1_Joint_Torque1,S1_Joint_Torque2,S1_Joint_Torque3,S1_Joint_Torque4,S1_Joint_Torque5,S1_Joint_Torque6,S1_Joint_Torque7,S1_External_Torque1,S1_External_Torque2,...,S28_Delta_Joint_Position5,S28_Delta_Joint_Position6,S28_Delta_Joint_Position7,S28_Delta_Joint_Velocity1,S28_Delta_Joint_Velocity2,S28_Delta_Joint_Velocity3,S28_Delta_Joint_Velocity4,S28_Delta_Joint_Velocity5,S28_Delta_Joint_Velocity6,S28_Delta_Joint_Velocity7
0,Intentional_Link5,-0.118291,-14.3191,0.111912,18.1387,0.182278,2.56566,0.112718,-0.15555,0.035588,...,-0.000457,0.001637,0.000349,0.007492,0.005073,0.006561,-0.004119,-0.002986,-0.008046,0.004796
1,Intentional_Link5,0.147518,-14.7317,0.342014,17.2024,0.076992,2.55467,0.13286,0.010584,0.179948,...,-0.000511,0.001799,0.000356,0.002025,0.00362,0.010739,-0.000917,-0.004338,-0.001741,-0.003709
2,Intentional_Link5,-0.078618,-12.8195,0.342014,19.4162,0.562222,2.5446,-0.445753,-0.282243,0.261724,...,-0.00043,0.001394,0.000185,0.005963,0.002064,0.011668,-0.00423,-0.005112,0.002598,-0.000224
3,Intentional_Link5,-0.269047,-13.4543,0.18729,19.3011,0.53384,2.61784,-0.385329,-0.337184,0.239106,...,-0.000275,0.001255,0.000104,0.007092,0.008016,0.006376,-0.00994,-0.003381,0.007005,0.001893
4,Intentional_Link5,0.222896,-12.3315,0.647495,19.2257,0.486233,2.49242,-0.455824,-0.133923,0.27095,...,-0.000117,0.001059,-0.000477,0.008044,0.001806,0.003109,0.002315,-0.001202,-0.008632,0.001837


In [11]:
df_train.describe()

Unnamed: 0,S1_Joint_Torque1,S1_Joint_Torque2,S1_Joint_Torque3,S1_Joint_Torque4,S1_Joint_Torque5,S1_Joint_Torque6,S1_Joint_Torque7,S1_External_Torque1,S1_External_Torque2,S1_External_Torque3,...,S28_Delta_Joint_Position5,S28_Delta_Joint_Position6,S28_Delta_Joint_Position7,S28_Delta_Joint_Velocity1,S28_Delta_Joint_Velocity2,S28_Delta_Joint_Velocity3,S28_Delta_Joint_Velocity4,S28_Delta_Joint_Velocity5,S28_Delta_Joint_Velocity6,S28_Delta_Joint_Velocity7
count,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,...,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0,1543.0
mean,-0.588014,-14.559518,-0.702089,18.100982,0.406691,2.374222,0.07089,-0.531252,-0.064745,-0.504229,...,-6.7e-05,0.000116,0.000356,0.026101,0.006104,0.002085,-0.000712,0.000726,0.00563,0.023975
std,1.473687,5.720703,1.658356,3.484278,0.496687,0.349668,0.337685,1.317212,1.494911,1.458791,...,0.000234,0.001793,0.001478,0.119024,0.188105,0.010797,0.029532,0.006582,0.173662,0.110203
min,-8.9336,-28.4625,-8.97714,7.59364,-2.56888,0.108383,-0.557448,-8.48649,-7.75996,-8.42944,...,-0.001671,-0.008088,-0.006182,-0.511981,-0.614235,-0.094425,-0.182505,-0.030147,-0.621818,-0.482474
25%,-0.459477,-19.6829,-0.618069,15.5123,0.210659,2.2205,-0.182081,-0.332871,-0.302403,-0.251405,...,-0.000254,-0.000534,-0.000102,-0.002933,-0.004555,-0.001481,-0.004501,-0.00241,-0.004352,-0.003428
50%,-0.153996,-13.7161,-0.193569,19.0789,0.305874,2.3148,0.112718,-0.146199,0.105308,-0.11503,...,-2.7e-05,-7e-06,1.9e-05,0.000295,0.00058,0.000475,-9.3e-05,-8.2e-05,0.000316,-0.000275
75%,0.032466,-10.21095,0.072239,20.5389,0.6199,2.52354,0.391954,-0.014101,0.319299,0.071525,...,9.1e-05,0.000808,0.000773,0.00387,0.006671,0.003241,0.003506,0.002346,0.006233,0.003667
max,1.20678,-0.362209,1.68296,31.3855,3.60818,5.04308,0.661119,0.95555,6.75593,1.52618,...,0.000696,0.008696,0.007742,0.529089,0.596661,0.102435,0.146417,0.045398,0.564136,0.526618


In [12]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1543 entries, 0 to 1542
Columns: 785 entries, Label to S28_Delta_Joint_Velocity7
dtypes: float64(784), object(1)
memory usage: 9.2+ MB


In [13]:
# Finding NaN values
df_train.isna().sum().sum()
# No missing values

0

In [14]:
X = df_train.drop('Label', axis=1)
X.head()

Unnamed: 0,S1_Joint_Torque1,S1_Joint_Torque2,S1_Joint_Torque3,S1_Joint_Torque4,S1_Joint_Torque5,S1_Joint_Torque6,S1_Joint_Torque7,S1_External_Torque1,S1_External_Torque2,S1_External_Torque3,...,S28_Delta_Joint_Position5,S28_Delta_Joint_Position6,S28_Delta_Joint_Position7,S28_Delta_Joint_Velocity1,S28_Delta_Joint_Velocity2,S28_Delta_Joint_Velocity3,S28_Delta_Joint_Velocity4,S28_Delta_Joint_Velocity5,S28_Delta_Joint_Velocity6,S28_Delta_Joint_Velocity7
0,-0.118291,-14.3191,0.111912,18.1387,0.182278,2.56566,0.112718,-0.15555,0.035588,0.12862,...,-0.000457,0.001637,0.000349,0.007492,0.005073,0.006561,-0.004119,-0.002986,-0.008046,0.004796
1,0.147518,-14.7317,0.342014,17.2024,0.076992,2.55467,0.13286,0.010584,0.179948,0.261524,...,-0.000511,0.001799,0.000356,0.002025,0.00362,0.010739,-0.000917,-0.004338,-0.001741,-0.003709
2,-0.078618,-12.8195,0.342014,19.4162,0.562222,2.5446,-0.445753,-0.282243,0.261724,0.150801,...,-0.00043,0.001394,0.000185,0.005963,0.002064,0.011668,-0.00423,-0.005112,0.002598,-0.000224
3,-0.269047,-13.4543,0.18729,19.3011,0.53384,2.61784,-0.385329,-0.337184,0.239106,0.091486,...,-0.000275,0.001255,0.000104,0.007092,0.008016,0.006376,-0.00994,-0.003381,0.007005,0.001893
4,0.222896,-12.3315,0.647495,19.2257,0.486233,2.49242,-0.455824,-0.133923,0.27095,0.266215,...,-0.000117,0.001059,-0.000477,0.008044,0.001806,0.003109,0.002315,-0.001202,-0.008632,0.001837


In [15]:
y = pd.DataFrame(df_train['Label'], columns=['Label'])
y.head()

Unnamed: 0,Label
0,Intentional_Link5
1,Intentional_Link5
2,Intentional_Link5
3,Intentional_Link5
4,Intentional_Link5


In [16]:
column_names = X.columns

In [17]:
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns = column_names)
X_scaled.head()

Unnamed: 0,S1_Joint_Torque1,S1_Joint_Torque2,S1_Joint_Torque3,S1_Joint_Torque4,S1_Joint_Torque5,S1_Joint_Torque6,S1_Joint_Torque7,S1_External_Torque1,S1_External_Torque2,S1_External_Torque3,...,S28_Delta_Joint_Position5,S28_Delta_Joint_Position6,S28_Delta_Joint_Position7,S28_Delta_Joint_Velocity1,S28_Delta_Joint_Velocity2,S28_Delta_Joint_Velocity3,S28_Delta_Joint_Velocity4,S28_Delta_Joint_Velocity5,S28_Delta_Joint_Velocity6,S28_Delta_Joint_Velocity7
0,0.738655,0.006637,0.705247,-0.113557,-0.109234,-0.004082,0.099925,0.764648,0.074071,0.719242,...,0.025401,0.158832,-0.061848,-0.00204,0.022892,0.025971,0.08467,-0.280935,0.035069,-0.034242
1,0.79108,-0.022729,0.748418,-0.192265,-0.143323,-0.008536,0.132983,0.799839,0.093961,0.745941,...,-0.020205,0.178152,-0.060923,-0.012544,0.020492,0.068418,0.104141,-0.316713,0.045701,-0.051098
2,0.746479,0.113369,0.748418,-0.006168,0.013784,-0.012617,-0.816678,0.737812,0.105228,0.723698,...,0.048185,0.129938,-0.085511,-0.004978,0.017923,0.077856,0.083995,-0.337211,0.05302,-0.044191
3,0.708921,0.068188,0.719389,-0.015843,0.004594,0.017066,-0.717506,0.726175,0.102112,0.711782,...,0.179664,0.113347,-0.097172,-0.002808,0.027753,0.02409,0.049273,-0.291395,0.060451,-0.039994
4,0.805947,0.148102,0.805731,-0.022182,-0.01082,-0.033766,-0.833207,0.769229,0.106499,0.746884,...,0.313006,0.089993,-0.180543,-0.00098,0.017495,-0.009097,0.123794,-0.233692,0.034081,-0.040105


In [18]:
encoded_labels = pd.get_dummies(y, dtype=int)
encoded_labels.head()

Unnamed: 0,Label_Collision_Link5,Label_Collision_Link6,Label_Intentional_Link5,Label_Intentional_Link6,Label_Noncontact
0,0,0,1,0,0
1,0,0,1,0,0
2,0,0,1,0,0
3,0,0,1,0,0
4,0,0,1,0,0


In [19]:
# Total of each label
print(encoded_labels.sum(axis=0))

Label_Collision_Link5      129
Label_Collision_Link6      132
Label_Intentional_Link5    225
Label_Intentional_Link6    218
Label_Noncontact           839
dtype: int64


In [20]:
# Feature selection
num_feats = 30
feature_names = list(X.columns)

In [21]:
X = X_scaled
y = encoded_labels

In [22]:
num_feats = 100

def autoFeatureSelector(methods=[]):
    
    # CHI-SQUARED METHOD
    # Measures Chi-Squared Stats (independence between categorical variables)
    def chi_squared_selector(X, y, num_feats): 
        scaler = MinMaxScaler()
        X_norm = scaler.fit_transform(X)
        chi_selector = SelectKBest(chi2, k=num_feats)
        chi_selector.fit(X_norm, y)
        chi_support = chi_selector.get_support()
        chi_feature = X.loc[:,chi_support].columns.tolist()
        return chi_support, chi_feature


    # RF METHOD
    def embedded_rf_selector(X, y, num_feats):
        embedded_rf_selector = SelectFromModel(RandomForestClassifier(n_estimators=100), max_features = num_feats)
        embedded_rf_selector.fit(X,y)
        embedded_rf_support = embedded_rf_selector.get_support()
        embedded_rf_feature = X.loc[:,embedded_rf_support].columns.tolist()
        return embedded_rf_support, embedded_rf_feature
    

    if 'chi-square' in methods:
        chi_support, chi_feature = chi_squared_selector(X, y,num_feats)
    if 'rf' in methods:
        embedded_rf_support, embedded_rf_feature = embedded_rf_selector(X, y, num_feats)


    # Combine all the above feature list and count the maximum set of features that got selected by all methods
    pd.set_option('display.max_rows', None)
    # put all selection together
    feature_selection_df = pd.DataFrame({'Feature':feature_names, 'Chi-2':chi_support, 
                                        'Random Forest':embedded_rf_support})
    feature_selection_df['Total'] = feature_selection_df.iloc[:,1:].sum(axis=1)

    feature_selection_df = feature_selection_df.sort_values(['Total','Feature'] , ascending=False)
    feature_selection_df.index = range(1, len(feature_selection_df)+1)

    # Getting top 30 features
    best_features = feature_selection_df['Feature'].head(num_feats)
    return best_features

# best_features = autoFeatureSelector(dataset_path=r"C:\aiprojects\predictive-maintenance\ai4i2020.csv", methods=['pearson', 'chi-square', 'rfe', 'log-reg', 'rf', 'lgbm'])
# Removing Pearson
best_features = autoFeatureSelector(methods=['chi-square', 'rf'])

print("\n\nTop Features of the datasets: ")
print(best_features)



Top Features of the datasets: 
1            S8_External_Torque3
2            S7_External_Torque1
3            S6_External_Torque3
4            S5_External_Torque3
5            S4_External_Torque3
6            S3_External_Torque3
7           S27_External_Torque3
8              S26_Joint_Torque6
9           S26_External_Torque3
10             S25_Joint_Torque6
11          S25_External_Torque3
12          S24_External_Torque3
13          S24_External_Torque1
14             S23_Joint_Torque3
15          S23_External_Torque3
16          S23_External_Torque1
17          S22_External_Torque3
18          S21_External_Torque3
19          S21_External_Torque1
20          S20_External_Torque3
21          S20_External_Torque1
22          S19_External_Torque3
23          S19_External_Torque1
24             S18_Joint_Torque1
25          S18_External_Torque3
26          S16_External_Torque3
27          S14_External_Torque3
28          S12_External_Torque1
29          S10_External_Torque3
30        

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=101)

In [25]:
dt_model = DecisionTreeClassifier()
dt_params = {'criterion': ['entropy'],
             'splitter': ['random'],
             'max_depth': [1,3,5],
             'min_samples_split': [2,6],
             'min_samples_leaf': [1,2]}

dt_grid = GridSearchCV(estimator=dt_model, param_grid=dt_params, refit=True, verbose=3)
dt_grid.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=2, splitter=random;, score=0.526 total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=2, splitter=random;, score=0.583 total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=2, splitter=random;, score=0.571 total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=2, splitter=random;, score=0.551 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=2, splitter=random;, score=0.492 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=6, splitter=random;, score=0.530 total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=1, min_samples_leaf=1, min_samples_split=6, splitter=random;, score=0.583 total time=  

In [28]:
y_predict = dt_grid.predict(X_test)
print(accuracy_score(y_test, y_predict))

0.6796116504854369


In [None]:
# Accuracy to be improved