In [1]:
import warnings
from decimal import Decimal
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn import preprocessing
# ignore warnings generated due to usage of old version of tensorflow
warnings.simplefilter("ignore")

In [2]:
df_comb = pd.read_csv("Dataset/dis_sym_dataset_comb.csv")
X = df_comb.iloc[:, 1:]
Y = df_comb.iloc[:, 0:1]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.10)

In [25]:
accuracy_list = []
cross_accuracy_list = []
model_list = []
base_learners = []
knn = KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)
svm = SVC(probability=True)
rf = RandomForestClassifier(n_estimators=10, criterion='entropy')
mlp = MLPClassifier(hidden_layer_sizes=(32, 32, 32), activation='relu', solver='adam', max_iter=50)
dt = DecisionTreeClassifier()
base_learners.append(knn)
base_learners.append(svm)
base_learners.append(rf)
base_learners.append(mlp)
base_learners.append(dt)
meta_learner = LogisticRegression()

## TESTING

In [32]:
base_learners = [
                 ('KNN', KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)),
                 ('SVM', SVC(probability=True)),
                 ('RF', RandomForestClassifier(n_estimators=10, criterion='entropy')),
                 ('MLP', MLPClassifier(hidden_layer_sizes=(32, 32, 32), activation='relu', solver='adam', max_iter=50)),
                 ('DT', DecisionTreeClassifier())
                ]

# Initialize Stacking Classifier with the Meta Learner
clf = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression(), cv = 5)

# Extract score
clf = clf.fit(X,Y)


0.9049773755656109

## BASE LEARNERS: KNN+SVM+RF

In [33]:
base_learners2 = [
                 ('KNN', KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)),
                 ('SVM', SVC(probability=True)),
                 ('RF', RandomForestClassifier(n_estimators=10, criterion='entropy')),
                  ]
clf2 = StackingClassifier(estimators=base_learners2, final_estimator=LogisticRegression(), cv = 5)
clf2 = clf2.fit(X,Y)
clf_pred2 = clf2.predict(x_test)
acc_clf2= round(Decimal(accuracy_score(y_test, clf_pred2) * 100), 2)
print(f"Accuracy: {acc_clf2}%")

Accuracy: 89.82%


In [35]:
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(clf2, X, Y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')

In [36]:
scores1 = round(Decimal(scores.mean() * 100), 2)
print(f"Cross Validation Accuracy (scores1): {scores1}%")

Cross Validation Accuracy (scores1): 84.32%


## BASE_LEARNERS: KNN+SVM+RF+MLP+DT

In [40]:
base_learners3 = [
                 ('KNN', KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)),
                 ('SVM', SVC(probability=True)),
                 ('RF', RandomForestClassifier(n_estimators=10, criterion='entropy')),
                 ('MLP', MLPClassifier(hidden_layer_sizes=(32, 32, 32), activation='relu', solver='adam', max_iter=50)),
                 ('DT', DecisionTreeClassifier())
                  ]
clf3 = StackingClassifier(estimators=base_learners3, final_estimator=LogisticRegression(), cv = 5)
clf3 = clf3.fit(X,Y)
clf_pred3 = clf3.predict(x_test)
acc_clf3= round(Decimal(accuracy_score(y_test, clf_pred3) * 100), 2)
print(f"Accuracy: {acc_clf3}%")
scores3 = cross_val_score(clf3, X, Y, scoring='accuracy', cv=5, n_jobs=-1, error_score='raise')
score3 = round(Decimal(scores3.mean() * 100), 2)
print(f"Cross Validation Accuracy (score3): {score3}%")

Accuracy: 90.27%
Cross Validation Accuracy (score3): 87.09%


## BASE_LEARNERS: KNN+SVM+RF+MLP+LR

In [39]:
base_learners4 = [
                 ('KNN', KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)),
                 ('SVM', SVC(probability=True)),
                 ('RF', RandomForestClassifier(n_estimators=10, criterion='entropy')),
                 ('MLP', MLPClassifier(hidden_layer_sizes=(32, 32, 32), activation='relu', solver='adam', max_iter=50)),
                 ('DT', LogisticRegression())
                  ]
clf4 = StackingClassifier(estimators=base_learners4, final_estimator=LogisticRegression(), cv = 5)
clf4 = clf4.fit(X,Y)
clf_pred4 = clf4.predict(x_test)
acc_clf4= round(Decimal(accuracy_score(y_test, clf_pred4) * 100), 2)
print(f"Accuracy: {acc_clf4}%")
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores4 = cross_val_score(clf4, X, Y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
score4 = round(Decimal(scores4.mean() * 100), 2)
print(f"Cross Validation Accuracy (score4): {score4}%")

Accuracy: 90.38%
Cross Validation Accuracy (score4): 86.89%


## BASE_LEARNERS:  KNN + SVM + MLP

In [3]:
base_learners5 = [
                 ('KNN', KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=4)),
                 ('SVM', SVC(probability=True)),
                 ('MLP', MLPClassifier(hidden_layer_sizes=(32, 32, 32), activation='relu', solver='adam', max_iter=50)),
                  ]
clf5 = StackingClassifier(estimators=base_learners5, final_estimator=LogisticRegression(), cv = 5)
clf5 = clf5.fit(X,Y)
clf_pred5 = clf5.predict(x_test)
acc_clf5= round(Decimal(accuracy_score(y_test, clf_pred5) * 100), 2)
print(f"Accuracy: {acc_clf5}%")
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores5 = cross_val_score(clf5, X, Y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
score5 = round(Decimal(scores5.mean() * 100), 2)
print(f"Cross Validation Accuracy (score5): {score5}%")

Accuracy: 90.27%
Cross Validation Accuracy (score5): 86.29%
