In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import matthews_corrcoef, f1_score, precision_score, recall_score, roc_auc_score

In [2]:
train_data = pd.read_csv("/home/b.cassoli@PTW.Maschinenbau.TU-Darmstadt.de/projects/bosch-production-line/raw/product_8463_train.csv")
train_data_labels = train_data.Response
train_data.drop(columns=["Id", "Response"], inplace=True)

test_data = pd.read_csv("/home/b.cassoli@PTW.Maschinenbau.TU-Darmstadt.de/projects/bosch-production-line/raw/product_8463_test.csv")
test_data_labels = test_data.Response
test_data.drop(columns=["Id", "Response"], inplace=True)


In [3]:
# Random Forest
model = RandomForestClassifier()
model.fit(X=train_data, y=train_data_labels)

y_pred = model.predict(train_data)
y_true = train_data_labels

print("TRAIN")
print(f"MCC: {matthews_corrcoef(y_true,y_pred):.2f}, F1: {f1_score(y_true,y_pred):.2f} Precision: {precision_score(y_true,y_pred):.2f} Recall: {recall_score(y_true,y_pred):.2f}")

y_pred = model.predict(test_data)
y_true = test_data_labels

print("TEST")
print(f"MCC: {matthews_corrcoef(y_true,y_pred):.3f}, F1: {f1_score(y_true,y_pred):.3f} Precision: {precision_score(y_true,y_pred):.3f} Recall: {recall_score(y_true,y_pred):.3f}, ROC AUC: {roc_auc_score(y_true,y_pred):.3f}")

TRAIN
MCC: 0.87, F1: 0.87 Precision: 0.96 Recall: 0.79
TEST
MCC: 0.499, F1: 0.400 Precision: 1.000 Recall: 0.250, ROC AUC: 0.625


In [6]:
# MLP
scaler = StandardScaler()
scaler.fit(train_data)

ros = RandomOverSampler(random_state=0)
X_resampled, y_resampled = ros.fit_resample(scaler.transform(train_data), train_data_labels)

clf = MLPClassifier(random_state=1, max_iter=300).fit(X_resampled, y_resampled)

y_pred = clf.predict(X_resampled)
y_true = y_resampled

print("TRAIN")
print(f"MCC: {matthews_corrcoef(y_true,y_pred):.2f}, F1: {f1_score(y_true,y_pred):.2f} Precision: {precision_score(y_true,y_pred):.2f} Recall: {recall_score(y_true,y_pred):.2f}")

y_pred = clf.predict(scaler.transform(test_data))
y_true = test_data_labels

print("TEST")
print(f"MCC: {matthews_corrcoef(y_true,y_pred):.3f}, F1: {f1_score(y_true,y_pred):.3f} Precision: {precision_score(y_true,y_pred):.3f} Recall: {recall_score(y_true,y_pred):3f}, ROC AUC: {roc_auc_score(y_true,y_pred):.3f}")

TRAIN
MCC: 1.00, F1: 1.00 Precision: 1.00 Recall: 1.00
TEST
MCC: 0.249, F1: 0.118 Precision: 1.000 Recall: 0.062500, ROC AUC: 0.531
