In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score,r2_score
from sklearn.svm import  SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.simplefilter('ignore')
from sklearn.ensemble import VotingClassifier

In [2]:
df = pd.read_csv("./Assignment/Assign_21_Dec/predictive_maintenance.csv", index_col=0)
df.head()

Unnamed: 0_level_0,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
UDI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [3]:
## Checking if the data has null values
df.isnull().sum().sum()

0

In [4]:
df = df.drop(['Product ID','Target'], axis=1)

In [5]:
X = df.drop('Failure Type', axis=1) 

ohe = OneHotEncoder(sparse_output=False, drop='first').set_output(transform='pandas')
ct = make_column_transformer((ohe, make_column_selector(dtype_include=object)),("passthrough", make_column_selector(dtype_exclude=object)),verbose_feature_names_out=False).set_output(transform='pandas')
X_ohe = ct.fit_transform(X)

y = df['Failure Type']
X_train, X_test, y_train, y_test = train_test_split(X_ohe,y, test_size=0.3, random_state=24, stratify=y)

In [6]:

lr = LogisticRegression(penalty = 'l2', solver = 'liblinear')
pipe = Pipeline([('LR',lr)])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
f1_score(y_test, y_pred, average='micro')

0.974

In [7]:

Cs = [0.001,0.5,1,3,5,10]
scores = []
for i in Cs:
    svm = SVC(kernel='linear', C=i)
    pipe = Pipeline([('SVC',svm)])
    pipe.fit(X_train,y_train)
    y_pred = pipe.predict(X_test)
    scores.append(f1_score(y_test, y_pred, average='micro'))

i_max = np.argmax(scores)
print("Best Cs : ", Cs[i_max])
print("Best score: ", scores[i_max])


Best Cs :  0.5
Best score:  0.983


In [8]:
Cs = [0.001,0.5,1,3,5,10]
Gs = np.linspace(0.001,4,10)
scores = []
for i in Cs:
    svm = SVC(kernel='rbf', C=i, gamma=0.01)
    pipe = Pipeline([('SVC',svm)])
    pipe.fit(X_train,y_train)
    y_pred = pipe.predict(X_test)
    scores.append(f1_score(y_test, y_pred, average='micro'))

i_max = np.argmax(scores)
print("Best Cs : ", Cs[i_max])
print("Best score: ", scores[i_max])

Best Cs :  1
Best score:  0.967


In [10]:
depth = [2,3,4,None]
min_sample_split = [2,5,10,50]
min_sample_leaf = [1,5,20]
score = []
for i in depth:
    for j in min_sample_split:
        for k in min_sample_leaf:
            dtc = DecisionTreeClassifier(random_state=24, max_depth=i, min_samples_split=j, min_samples_leaf=k)
            pipe = Pipeline([('DTC', dtc)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            score.append([i,j,k,f1_score(y_test, y_pred, average='micro')])

scores_df = pd.DataFrame(data=score, columns=['depth','min_split','min_leaf','f1_score'])
scores_df = scores_df.sort_values(by='f1_score', ascending=False)
scores_df.head()

Unnamed: 0,depth,min_split,min_leaf,f1_score
43,,10,5,0.979333
40,,5,5,0.979333
37,,2,5,0.979333
42,,10,1,0.975333
46,,50,5,0.974667


In [12]:
dtc = DecisionTreeClassifier(random_state=24)
svm_l = SVC(kernel='linear')
svm_r = SVC(kernel='rbf')
voting = VotingClassifier([('TREE',dtc),('SVML',svm_l),('SVMR', svm_r)], verbose=True)
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(f1_score(y_test,y_pred, average='micro'))

[Voting] ..................... (1 of 3) Processing TREE, total=   0.0s
[Voting] ..................... (2 of 3) Processing SVML, total=  10.0s
[Voting] ..................... (3 of 3) Processing SVMR, total=   0.1s
0.9786666666666667


In [22]:
df_pred = pd.read_csv("./Assignment/Assign_21_Dec/unlabelled_data.csv",index_col=0)
df_pred.head()

Unnamed: 0_level_0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
UDI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10001,M,298.4,308.6,1554,42.8,0
10002,L,298.2,308.7,1408,46.3,3
10003,L,298.9,309.1,2870,4.6,143
10004,L,298.8,308.9,1450,41.3,208
10005,L,298.4,308.0,1471,48.0,215


In [23]:
ohe = OneHotEncoder(sparse_output=False, drop='first').set_output(transform='pandas')
ct = make_column_transformer((ohe, make_column_selector(dtype_include=object)),("passthrough", make_column_selector(dtype_exclude=object)),verbose_feature_names_out=False).set_output(transform='pandas')
df_pred = ct.fit_transform(df_pred)

In [24]:
df_pred.head()

Unnamed: 0_level_0,Type_L,Type_M,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
UDI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10001,0.0,1.0,298.4,308.6,1554,42.8,0
10002,1.0,0.0,298.2,308.7,1408,46.3,3
10003,1.0,0.0,298.9,309.1,2870,4.6,143
10004,1.0,0.0,298.8,308.9,1450,41.3,208
10005,1.0,0.0,298.4,308.0,1471,48.0,215


In [25]:
svm = SVC(kernel='linear', C=0.5)
pipe = Pipeline([('SVC',svm)])
pipe.fit(X_ohe,y)
y_pred = pipe.predict(df_pred)
y_pred

array(['No Failure', 'No Failure', 'Power Failure', 'No Failure',
       'No Failure', 'Overstrain Failure', 'Overstrain Failure',
       'No Failure', 'No Failure', 'No Failure', 'No Failure'],
      dtype=object)

In [26]:
df_pred['Failure Type'] = y_pred
df_pred

Unnamed: 0_level_0,Type_L,Type_M,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Failure Type
UDI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10001,0.0,1.0,298.4,308.6,1554,42.8,0,No Failure
10002,1.0,0.0,298.2,308.7,1408,46.3,3,No Failure
10003,1.0,0.0,298.9,309.1,2870,4.6,143,Power Failure
10004,1.0,0.0,298.8,308.9,1450,41.3,208,No Failure
10005,1.0,0.0,298.4,308.0,1471,48.0,215,No Failure
10006,1.0,0.0,298.2,308.2,1278,60.7,216,Overstrain Failure
10007,1.0,0.0,298.3,308.1,1412,52.3,218,Overstrain Failure
10008,0.0,0.0,298.2,308.4,1398,51.8,76,No Failure
10009,0.0,0.0,298.2,308.4,1680,34.6,82,No Failure
10010,1.0,0.0,298.2,308.1,1450,50.8,26,No Failure
