In [1]:
import pandas as pd
import numpy as np
from numpy import mean, std
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.covariance import EllipticEnvelope
from sklearn.metrics import classification_report
import itertools
from sklearn.neural_network import MLPClassifier

import tensorflow as tf
from tensorflow import keras

from sklearn.metrics import cohen_kappa_score, make_scorer
kappa_scorer = make_scorer(cohen_kappa_score)

In [2]:
# Load data from Url
url = 'https://drive.google.com/uc?id=1ImrEW5T3cOdecopkVNd279XJ8Pifl9Ke'
df = pd.read_csv(url)
# One-hot encode the 'climate_class' categorical feature
df_encoded = pd.get_dummies(df, columns=['climate_class'], prefix=['climate_class'], drop_first=False)

In [3]:
df_encoded

Unnamed: 0,final_duration,final_cost,adjustment_cost,final_change_cost,delay_class,climate_class_C,climate_class_HD,climate_class_MR,climate_class_SA,climate_class_SMR,climate_class_VHD,climate_class_VHH
0,2837,7.178160e+11,4.632660e+11,0.000000e+00,2,False,False,False,False,False,True,False
1,1765,2.525490e+11,4.755340e+11,0.000000e+00,2,False,False,False,False,False,True,False
2,3311,9.573671e+10,3.285553e+10,0.000000e+00,2,False,False,False,False,False,True,False
3,1757,3.828944e+10,3.713023e+10,0.000000e+00,2,False,False,False,False,False,True,False
4,2577,1.042549e+10,3.080719e+09,0.000000e+00,2,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...
238,796,7.308602e+09,0.000000e+00,7.308602e+09,2,True,False,False,False,False,False,False
239,1456,7.437125e+09,3.319195e+09,7.519285e+09,2,False,True,False,False,False,False,False
240,1087,5.139000e+09,1.862507e+09,0.000000e+00,2,True,False,False,False,False,False,False
241,819,5.897179e+09,0.000000e+00,5.943477e+09,2,True,False,False,False,False,False,False


In [4]:
# Define the condition
condition = df_encoded['delay_class'] < 2

# Filter rows that meet the condition
filtered_df = df_encoded[condition]

# Duplicate the filtered rows (you can change the number of times to duplicate)
num_duplicates = 2  # Change this to the desired number of duplicates
duplicated_df = pd.concat([filtered_df] * num_duplicates, ignore_index=True)

# Concatenate the original DataFrame with the duplicated DataFrame
filtered_df = pd.concat([df_encoded, duplicated_df], ignore_index=True)

In [5]:
# Separate features and target
X = filtered_df[['final_duration', 'final_cost', 'adjustment_cost', 'final_change_cost','climate_class_C', 'climate_class_HD', 'climate_class_MR', 'climate_class_SA', 'climate_class_SMR', 'climate_class_VHD', 'climate_class_VHH']]
y = filtered_df['delay_class']

# Normalize numerical features
scaler = StandardScaler()
X[['final_duration', 'final_cost', 'adjustment_cost', 'final_change_cost']] = scaler.fit_transform(X[['final_duration', 'final_cost', 'adjustment_cost', 'final_change_cost']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['final_duration', 'final_cost', 'adjustment_cost', 'final_change_cost']] = scaler.fit_transform(X[['final_duration', 'final_cost', 'adjustment_cost', 'final_change_cost']])


In [6]:
# Show data sample
filtered_df

Unnamed: 0,final_duration,final_cost,adjustment_cost,final_change_cost,delay_class,climate_class_C,climate_class_HD,climate_class_MR,climate_class_SA,climate_class_SMR,climate_class_VHD,climate_class_VHH
0,2837,7.178160e+11,4.632660e+11,0.000000e+00,2,False,False,False,False,False,True,False
1,1765,2.525490e+11,4.755340e+11,0.000000e+00,2,False,False,False,False,False,True,False
2,3311,9.573671e+10,3.285553e+10,0.000000e+00,2,False,False,False,False,False,True,False
3,1757,3.828944e+10,3.713023e+10,0.000000e+00,2,False,False,False,False,False,True,False
4,2577,1.042549e+10,3.080719e+09,0.000000e+00,2,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...
434,849,1.506803e+10,5.336830e+09,1.775818e+10,0,True,False,False,False,False,False,False
435,1042,1.715261e+10,6.346088e+09,0.000000e+00,0,True,False,False,False,False,False,False
436,161,1.572668e+10,1.888369e+08,1.590530e+10,0,False,False,False,False,False,True,False
437,597,2.706334e+09,0.000000e+00,2.677253e+09,1,False,False,False,False,False,True,False


In [7]:
X.shape

(439, 11)

In [8]:
# Apply outlier detection
outlier_detector = EllipticEnvelope()
outliers = outlier_detector.fit_predict(X[['final_duration', 'final_cost', 'adjustment_cost', 'final_change_cost', 'climate_class_C', 'climate_class_HD', 'climate_class_MR', 'climate_class_SA', 'climate_class_SMR', 'climate_class_VHD', 'climate_class_VHH']])
X = X[outliers == 1]
y = y[outliers == 1]

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)

In [10]:
X.shape

(396, 11)

# SVC

In [11]:
from sklearn.svm import SVC
model = SVC()
kernel = ['linear','poly', 'rbf', 'sigmoid']
C = [ 1.0, 2.0, 3.0, 5.0, 10.0, 0.1, 0.01, 100]
grid = dict(kernel=kernel,C=C)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.724174 using {'C': 100, 'kernel': 'rbf'}


In [11]:
model_svm = SVC(C= 100, kernel='rbf')

model=model_svm
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.7329 (0.0301)
f1_macro: 0.7299 (0.0302)
precision_macro: 0.7439 (0.0294)
recall_macro: 0.7335 (0.0299)
kappa_scorer: 0.5995 (0.0450)


# KNN

In [14]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
n_neighbors = range(1, 21)
weights = ['uniform', 'distance']
algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute']
grid = dict(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro', error_score=0)

grid_result = grid_search.fit(X, y)
print("Best F1 Score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best F1 Score: 0.863607 using {'algorithm': 'auto', 'n_neighbors': 1, 'weights': 'uniform'}


In [12]:
model_knn = KNeighborsClassifier(algorithm= 'auto', n_neighbors= 1, weights= 'uniform')
model = model_knn
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8662 (0.0305)
f1_macro: 0.8590 (0.0337)
precision_macro: 0.8833 (0.0319)
recall_macro: 0.8663 (0.0304)
kappa_scorer: 0.7993 (0.0456)


# RandomForest

In [16]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=1)
n_estimators = [50, 100, 120, 150, 200, 300]
max_features = ['sqrt', 'log2', 0.5]
max_depth = [2,6,8,10,12,14,16]
grid = dict(n_estimators=n_estimators,
            max_features=max_features,max_depth=max_depth)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.900766 using {'max_depth': 14, 'max_features': 'sqrt', 'n_estimators': 50}


In [13]:
model_rf = RandomForestClassifier(random_state=1, max_depth=14 , max_features='sqrt', n_estimators=50)

from sklearn.metrics import accuracy_score
history = model_rf.fit(X_train,y_train)
y_hat = model_rf.predict(X_test)

cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model_rf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_rf, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_rf, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_rf, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_rf, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9101 (0.0370)
f1_macro: 0.9070 (0.0389)
precision_macro: 0.9225 (0.0304)
recall_macro: 0.9102 (0.0367)
kappa_scorer: 0.8652 (0.0554)


# DT

In [15]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(random_state=1)
max_features = ['sqrt', 'log2', 0.5]
max_depth = [2,6,8,10,12,14,16]
grid = dict(max_features=max_features,max_depth=max_depth)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.872435 using {'max_depth': 14, 'max_features': 0.5}


In [14]:
model_dt = DecisionTreeClassifier(random_state=1, max_depth=16 , max_features=0.5)
from sklearn.metrics import accuracy_score
history = model_dt.fit(X_train,y_train)
y_hat = model_dt.predict(X_test)

cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model_dt, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_dt, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_dt, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_dt, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model_dt, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8824 (0.0400)
f1_macro: 0.8768 (0.0431)
precision_macro: 0.8961 (0.0381)
recall_macro: 0.8826 (0.0398)
kappa_scorer: 0.8236 (0.0600)


# NB

In [17]:
model =  GaussianNB()
var_smoothing= np.logspace(0,-9, num=5)
grid = dict(var_smoothing=var_smoothing)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.418914 using {'var_smoothing': 0.005623413251903491}


In [15]:
model_nb = GaussianNB(var_smoothing= 1.0)

model=model_nb
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.4575 (0.0498)
f1_macro: 0.4158 (0.0507)
precision_macro: 0.4609 (0.0702)
recall_macro: 0.4625 (0.0500)
kappa_scorer: 0.1907 (0.0744)


# Ann Shallow

In [46]:
# Tuning hyperparameters of shallow ANNs using grid search algorithm
model = MLPClassifier(max_iter=1000,  random_state=1)
hidden_layer_sizes= [(7),(8),(9),(10)]
activation= ['tanh', 'relu','identity','logistic']
solver= ['sgd', 'adam','lbfgs']
alpha= [0.0001, 0.1, 0.5, 1, 0.7]
learning_rate= ['constant','adaptive','invscaling']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.799191 using {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': 10, 'learning_rate': 'constant', 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [16]:
from sklearn.neural_network import MLPClassifier

model_ann = MLPClassifier(max_iter=1000,random_state=1, activation= 'logistic', alpha= 0.0001, hidden_layer_sizes= (10), learning_rate= 'constant', solver= 'lbfgs' )

model=model_ann
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8061 (0.0497)
f1_macro: 0.7990 (0.0514)
precision_macro: 0.8187 (0.0519)
recall_macro: 0.8062 (0.0500)
kappa_scorer: 0.7091 (0.0748)


# Layer 2

In [47]:
# Finding the best architecture for proposed Deep-MLP-NN and Tuning its hyperparameters using grid search algorithm
layer_2= []
for a in range(7,10):
    for b in range(7,10):
        layer_2.append((a,b))
#2layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_2
activation= ['tanh', 'relu']
solver= ['sgd', 'adam','lbfgs']
alpha= [0.1, 1, 0.7]
learning_rate= ['constant','adaptive','invscaling']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.853434 using {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (9, 7), 'learning_rate': 'constant', 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [17]:
model_mlp_2layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 0.1, hidden_layer_sizes= (9, 7), learning_rate= 'constant', solver= 'lbfgs' )

model=model_mlp_2layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8399 (0.0377)
f1_macro: 0.8331 (0.0410)
precision_macro: 0.8483 (0.0365)
recall_macro: 0.8400 (0.0377)
kappa_scorer: 0.7598 (0.0565)


# Layer 3

In [50]:
layer_3= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            layer_3.append((a,b,c))
#3layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_3
activation= ['tanh', 'relu']
solver= ['adam','lbfgs']
alpha= [0.1, 0.7]
learning_rate= ['constant','adaptive']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.864859 using {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (8, 8, 9), 'learning_rate': 'constant', 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [18]:
model_mlp_3layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 0.1, hidden_layer_sizes= (8, 8, 9), learning_rate= 'constant', solver= 'lbfgs' )

model=model_mlp_3layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8562 (0.0411)
f1_macro: 0.8469 (0.0463)
precision_macro: 0.8700 (0.0429)
recall_macro: 0.8563 (0.0411)
kappa_scorer: 0.7842 (0.0616)


# Layer 4

In [52]:
layer_4= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            for d in range(7,10):
                layer_4.append((a,b,c,d))
#4layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_4
activation= ['tanh', 'relu']
solver= ['adam','lbfgs']
alpha= [0.1, 0.7]
learning_rate= ['constant','adaptive']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.870767 using {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (7, 9, 8, 9), 'learning_rate': 'constant', 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [19]:
model_mlp_4layer = MLPClassifier(max_iter = 2000,
                                 random_state = 1, 
                                 activation = 'tanh', 
                                 alpha = 0.1, 
                                 hidden_layer_sizes = (7, 9, 8, 9), 
                                 learning_rate = 'constant', 
                                 solver = 'lbfgs'
                                )

model=model_mlp_4layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8632 (0.0405)
f1_macro: 0.8563 (0.0437)
precision_macro: 0.8799 (0.0383)
recall_macro: 0.8635 (0.0401)
kappa_scorer: 0.7947 (0.0607)


# Layer 5

In [55]:
layer_5= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            for d in range(7,10):
                 for e in range(7,10):
                        layer_5.append((a,b,c,d,e))
#5layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_5
activation= ['tanh']
solver= ['adam','lbfgs']
alpha= [0.1]
learning_rate= ['constant']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.867297 using {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (9, 8, 9, 9, 7), 'learning_rate': 'constant', 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [20]:
model_mlp_5layer = MLPClassifier(max_iter=2000,
                                 random_state=1, 
                                 activation= 'tanh', 
                                 alpha= 0.1, 
                                 hidden_layer_sizes= (9, 8, 9, 9, 7), 
                                 learning_rate= 'constant', 
                                 solver= 'lbfgs' 
                                )

model=model_mlp_5layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8425 (0.0414)
f1_macro: 0.8323 (0.0458)
precision_macro: 0.8609 (0.0426)
recall_macro: 0.8426 (0.0412)
kappa_scorer: 0.7637 (0.0621)


# Layer 6

In [57]:
layer_6= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            for d in range(7,10):
                 for e in range(7,10):
                        for f in range(7,10):
                            layer_6.append((a,b,c,d,e,f))
#6layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_6
activation= ['tanh']
solver= ['lbfgs']
alpha= [0.1]
learning_rate= ['constant']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(X, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.868773 using {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (8, 7, 7, 8, 8, 9), 'learning_rate': 'constant', 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [21]:
model_mlp_6layer = MLPClassifier(max_iter=2000,
                                 random_state=1, 
                                 activation= 'tanh', 
                                 alpha= 0.1, 
                                 hidden_layer_sizes= (8, 7, 7, 8, 8, 9), 
                                 learning_rate= 'constant', 
                                 solver= 'lbfgs' 
                                )

model=model_mlp_6layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8536 (0.0405)
f1_macro: 0.8429 (0.0456)
precision_macro: 0.8752 (0.0372)
recall_macro: 0.8535 (0.0406)
kappa_scorer: 0.7803 (0.0608)


# Stacking

In [22]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import StackingClassifier
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostClassifier
import xgboost as xgb

In [23]:
# Select models with higher accuracy.
base_models = [
    #("svm", model_svm),
    ("knn", model_knn),
    ("rf", model_rf),
    ("dt", model_dt),
    #("nb", model_nb),
    #("ann", model_ann),
    #("mlp_2layer", model_mlp_2layer),
    #("mlp_3layer", model_mlp_3layer),
    ("mlp_4layer", model_mlp_4layer),
    #("mlp_5layer", model_mlp_5layer),
    #("mlp_6layer", model_mlp_6layer),
]

# Define your meta-learner (you can choose any classifier)
meta_learner = RandomForestClassifier(random_state=1)

# Create the stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_learner)
model = stacking_model
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9571 (0.0305)
f1_macro: 0.9572 (0.0304)
precision_macro: 0.9592 (0.0296)
recall_macro: 0.9572 (0.0306)
kappa_scorer: 0.9356 (0.0458)


In [24]:
# Select models with higher accuracy.
base_models = [
    #("svm", model_svm),
    ("knn", model_knn),
    ("rf", model_rf),
    ("dt", model_dt),
    #("nb", model_nb),
    #("ann", model_ann),
    #("mlp_2layer", model_mlp_2layer),
    #("mlp_3layer", model_mlp_3layer),
    ("mlp_4layer", model_mlp_4layer),
    #("mlp_5layer", model_mlp_5layer),
    #("mlp_6layer", model_mlp_6layer),
]

# Define your meta-learner (you can choose any classifier)
meta_learner = xgb.XGBClassifier(random_state=1)

# Create the stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_learner)
model = stacking_model
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9465 (0.0314)
f1_macro: 0.9464 (0.0315)
precision_macro: 0.9499 (0.0293)
recall_macro: 0.9467 (0.0312)
kappa_scorer: 0.9197 (0.0471)


In [26]:
# Select models with higher accuracy.
base_models = [
    #("svm", model_svm),
    ("knn", model_knn),
    ("rf", model_rf),
    ("dt", model_dt),
    #("nb", model_nb),
    #("ann", model_ann),
    #("mlp_2layer", model_mlp_2layer),
    #("mlp_3layer", model_mlp_3layer),
    ("mlp_4layer", model_mlp_4layer),
    #("mlp_5layer", model_mlp_5layer),
    #("mlp_6layer", model_mlp_6layer),
]

# Define your meta-learner (you can choose any classifier)
meta_learner = CatBoostClassifier(random_state=1)

# Create the stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_learner)
model = stacking_model
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, X, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9540 (0.0247)
f1_macro: 0.9539 (0.0248)
precision_macro: 0.9564 (0.0239)
recall_macro: 0.9542 (0.0247)
kappa_scorer: 0.9311 (0.0371)
