In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve, average_precision_score, auc
import time

In [2]:
dfF=pd.read_csv('../Features/df_fan_feature.csv')
dfP=pd.read_csv('../Features/df_pump_feature.csv')
dfS=pd.read_csv('../Features/df_slider_feature.csv')
dfV=pd.read_csv('../Features/df_valve_feature.csv')
TF=pd.read_csv('../Features/df_fan_target.csv')
TP=pd.read_csv('../Features/df_pump_target.csv')
TS=pd.read_csv('../Features/df_slider_target.csv')
TV=pd.read_csv('../Features/df_valve_target.csv')
df_all=((dfF.append(dfP, ignore_index=True)).append(dfS, ignore_index=True)).append(dfV, ignore_index=True)
t_all=((TF.append(TP, ignore_index=True)).append(TS, ignore_index=True)).append(TV, ignore_index=True)
df_all.to_csv("df_all_features.csv", index=False)
t_all.to_csv("df_all_targets.csv", index=False)

## All 4 Machines

In [4]:
X = df_all
y = t_all.values.ravel()

In [13]:
X.shape

(54057, 29)

In [14]:
54057*0.7

37839.899999999994

In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.3, 
    random_state=42
)
X_test, X_val, y_test, y_val = train_test_split(
    X_test, 
    y_test, 
    test_size=0.33, 
    random_state=42
)

In [24]:
start = time.process_time()
########## Tuned Random Forest #######
model = RandomForestClassifier(
    n_estimators = 50, 
    criterion ='entropy',
    warm_start = True,
    max_features = 'sqrt',
    oob_score = 'True', 
    random_state=42  
) 

model.fit(X_train, y_train)

print(f'Random Forest Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Random Forest Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

print(time.process_time() - start)

Random Forest Model's accuracy on training set is 99.99%
Random Forest Model's accuracy on test set is 93.04%
34.09375


In [25]:
start = time.process_time()
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(time.process_time() - start)

              precision    recall  f1-score   support

           0       0.96      0.65      0.78      2011
           1       0.93      0.99      0.96      8855

    accuracy                           0.93     10866
   macro avg       0.95      0.82      0.87     10866
weighted avg       0.93      0.93      0.92     10866

0.265625


0=Abnormal
1=Normal

In [26]:
y_pred_val = model.predict(X_val)
print(classification_report(y_val, y_pred_val))

              precision    recall  f1-score   support

           0       0.97      0.66      0.78       997
           1       0.93      1.00      0.96      4355

    accuracy                           0.93      5352
   macro avg       0.95      0.83      0.87      5352
weighted avg       0.94      0.93      0.93      5352



## Machine type pred

In [27]:
f=np.ones(16650, dtype=int)
p=(np.ones(12615, dtype=int))+1
s=(np.ones(12282, dtype=int))+2
v=(np.ones(12510, dtype=int))+3

In [30]:
mach=np.concatenate((f,p,s,v))

In [31]:
y = mach

In [32]:
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.3, 
    random_state=42
)
X_test, X_val, y_test, y_val = train_test_split(
    X_test, 
    y_test, 
    test_size=0.33, 
    random_state=42
)

In [33]:
start = time.process_time()
########## Tuned Random Forest #######
model = RandomForestClassifier(
    n_estimators = 50, 
    criterion ='entropy',
    warm_start = True,
    max_features = 'sqrt',
    oob_score = 'True', 
    random_state=42  
) 

model.fit(X_train, y_train)

print(f'Random Forest Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Random Forest Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

print(time.process_time() - start)

Random Forest Model's accuracy on training set is 100.00%
Random Forest Model's accuracy on test set is 94.07%
28.265625


In [34]:
start = time.process_time()
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(time.process_time() - start)

              precision    recall  f1-score   support

           1       0.94      0.93      0.93      3381
           2       0.90      0.90      0.90      2509
           3       0.93      0.94      0.94      2490
           4       1.00      1.00      1.00      2486

    accuracy                           0.94     10866
   macro avg       0.94      0.94      0.94     10866
weighted avg       0.94      0.94      0.94     10866

0.1875


In [35]:
y_pred_val = model.predict(X_val)
print(classification_report(y_val, y_pred_val))

              precision    recall  f1-score   support

           1       0.95      0.94      0.95      1664
           2       0.91      0.91      0.91      1240
           3       0.94      0.94      0.94      1254
           4       1.00      0.99      1.00      1194

    accuracy                           0.95      5352
   macro avg       0.95      0.95      0.95      5352
weighted avg       0.95      0.95      0.95      5352

