In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve, average_precision_score, auc
import time

In [2]:
dfF=pd.read_csv('../Features/df_fan_feature.csv')
dfP=pd.read_csv('../Features/df_pump_feature.csv')
dfS=pd.read_csv('../Features/df_slider_feature.csv')
dfV=pd.read_csv('../Features/df_valve_feature.csv')
TF=pd.read_csv('../Features/df_fan_target.csv')
TP=pd.read_csv('../Features/df_pump_target.csv')
TS=pd.read_csv('../Features/df_slider_target.csv')
TV=pd.read_csv('../Features/df_valve_target.csv')
df_all=((dfF.append(dfP, ignore_index=True)).append(dfS, ignore_index=True)).append(dfV, ignore_index=True)
t_all=((TF.append(TP, ignore_index=True)).append(TS, ignore_index=True)).append(TV, ignore_index=True)
df_all.to_csv("df_all_features.csv", index=False)
t_all.to_csv("df_all_targets.csv", index=False)


## All 4 Machines

In [3]:
X = df_all
y = t_all.values.ravel()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.2, 
    random_state=69
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, 
    y_train, 
    test_size=0.05, 
    random_state=69
)

In [5]:
start = time.process_time()
########## Tuned Random Forest #######
model = RandomForestClassifier(
    n_estimators = 50, 
    criterion ='entropy',
    warm_start = True,
    max_features = 'sqrt',
    oob_score = 'True', 
    random_state=42  
) 

model.fit(X_train, y_train)

print(f'Random Forest Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Random Forest Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

print(time.process_time() - start)

Random Forest Model's accuracy on training set is 99.99%
Random Forest Model's accuracy on test set is 93.53%
32.234375


In [6]:
start = time.process_time()
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(time.process_time() - start)

              precision    recall  f1-score   support

           0       0.97      0.66      0.79      1936
           1       0.93      0.99      0.96      8876

    accuracy                           0.94     10812
   macro avg       0.95      0.83      0.87     10812
weighted avg       0.94      0.94      0.93     10812

0.171875


0=Abnormal
1=Normal

In [7]:
y_pred_val = model.predict(X_val)
print(classification_report(y_val, y_pred_val))

              precision    recall  f1-score   support

           0       0.96      0.68      0.80       414
           1       0.93      0.99      0.96      1749

    accuracy                           0.93      2163
   macro avg       0.95      0.84      0.88      2163
weighted avg       0.94      0.93      0.93      2163



## Machine type pred

In [8]:
f=np.ones(16650, dtype=int)
p=(np.ones(12615, dtype=int))+1
s=(np.ones(12282, dtype=int))+2
v=(np.ones(12510, dtype=int))+3

In [9]:
mach=np.concatenate((f,p,s,v))

In [11]:
y = mach

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.2, 
    random_state=69
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, 
    y_train, 
    test_size=0.05, 
    random_state=69
)

In [13]:
start = time.process_time()
########## Tuned Random Forest #######
model = RandomForestClassifier(
    n_estimators = 50, 
    criterion ='entropy',
    warm_start = True,
    max_features = 'sqrt',
    oob_score = 'True', 
    random_state=42  
) 

model.fit(X_train, y_train)

print(f'Random Forest Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Random Forest Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

print(time.process_time() - start)

Random Forest Model's accuracy on training set is 100.00%
Random Forest Model's accuracy on test set is 94.36%
28.015625


In [14]:
start = time.process_time()
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(time.process_time() - start)

              precision    recall  f1-score   support

           1       0.94      0.94      0.94      3335
           2       0.90      0.91      0.91      2538
           3       0.94      0.93      0.93      2444
           4       1.00      1.00      1.00      2495

    accuracy                           0.94     10812
   macro avg       0.94      0.94      0.94     10812
weighted avg       0.94      0.94      0.94     10812

0.21875


In [15]:
y_pred_val = model.predict(X_val)
print(classification_report(y_val, y_pred_val))

              precision    recall  f1-score   support

           1       0.93      0.94      0.93       637
           2       0.89      0.88      0.88       516
           3       0.91      0.91      0.91       496
           4       0.99      0.99      0.99       514

    accuracy                           0.93      2163
   macro avg       0.93      0.93      0.93      2163
weighted avg       0.93      0.93      0.93      2163

