In [1]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import time
from imblearn.under_sampling import RandomUnderSampler
from scipy import stats as st
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
class Dataset():
    def __init__(self):

        # whole dataset
        df = pd.read_csv('/content/drive/MyDrive/ADHD/file/na.csv')
        self.y = df['K2Q31A']
        self.X = df.drop(columns='K2Q31A')

        # chi square
        self.chi = pd.read_csv('/content/drive/MyDrive/adhd_tune/Adhd-detection-ML-algorithms/chi.csv')
        self.chi = self.chi.loc[self.chi['Dr Sheikhy'] == 'Y']['feature name']

        # fisher's score
        self.fisher = pd.read_csv('/content/drive/MyDrive/adhd_tune/Adhd-detection-ML-algorithms/fisher.csv')
        self.fisher = self.fisher.loc[self.fisher['Dr Sheikhy'] == 'Y']['Feature Name']

        # information gain
        self.inf = pd.read_csv('/content/drive/MyDrive/adhd_tune/Adhd-detection-ML-algorithms/inf-gain.csv')
        self.inf = self.inf.loc[self.inf['Dr Sheikhy'] == 'Y']['Feature Name']

        # corelation
        self.cor = pd.read_csv('/content/drive/MyDrive/adhd_tune/Adhd-detection-ML-algorithms/cor.csv')
        self.cor = self.cor.iloc[:,[0, 14]].where(self.cor.iloc[:,14] == 'Y').dropna().iloc[:, 0]

    def return_dataset(self) -> pd.DataFrame:
        return self.X, self.y

    def return_chi(self) -> pd.Series:
        return self.chi

    def return_fisher(self) -> pd.Series:
        return self.fisher

    def return_inf(self) -> pd.Series:
        return self.inf

    def return_cor(self) -> pd.Series:
        return self.cor

    # intersections of 2 sets
    def return_intersection_chi_fisher(self) -> list:
        return list(set(self.chi.tolist()) & set(self.fisher.tolist()))

    def return_intersection_chi_inf(self) -> list:
        return list(set(self.chi.tolist()) & set(self.inf.tolist()))

    def return_intersection_chi_cor(self) -> list:
        return list(set(self.chi.tolist()) & set(self.cor.tolist()))

    def return_intersection_fisher_inf(self) -> list:
        return list(set(self.fisher.tolist()) & set(self.inf.tolist()))

    def return_intersection_fisher_cor(self) -> list:
        return list(set(self.fisher.tolist()) & set(self.cor.tolist()))

    def return_intersection_inf_cor(self) -> list:
        return list(set(self.inf.tolist()) & set(self.cor.tolist()))

    # intersections of 3 sets
    def return_intersection_chi_fisher_inf(self) -> list:
        return list(set(self.chi.tolist()) & set(self.fisher.tolist()) & set(self.inf.tolist()))

    def return_intersection_chi_fisher_cor(self) -> list:
        return list(set(self.chi.tolist()) & set(self.fisher.tolist()) & set(self.cor.tolist()))

    def return_intersection_chi_inf_cor(self) -> list:
        return list(set(self.chi.tolist()) & set(self.inf.tolist()) & set(self.cor.tolist()))

    def return_intersection_fisher_inf_cor(self) -> list:
        return list(set(self.fisher.tolist()) & set(self.inf.tolist()) & set(self.cor.tolist()))




In [4]:
class EnsembleModel():
  def __init__(self,models: list ):
    '''
    This class gives a list of odd number classifier and predict
    based on the frequent prediction.

    '''
    self.models=models
    self.fitted_models=[]



  def fit(self,X_train: pd.DataFrame ,y_train: pd.DataFrame):
    '''
    Fitting functinon
    This function fits the models that are input of an object
    '''
    for model in self.models:
      self.fitted_models.append(model.fit(X_train,y_train))


  def predict(self,X_test,show_result=False):
    '''
    This function makes prediction based on frequent predicted values

    '''
    pred=[]
    for model in self.fitted_models:
      pred.append(model.predict(X_test))

    pred_array=np.array(pred)
    y_pred=st.mode(pred_array,axis=0,keepdims=True)[0]



    if not show_result:
      return y_pred.reshape(-1,)



    if show_result:
      return  y_pred.reshape(-1,) , pred.reshape(-1,len(self.models))

In [10]:
class Model():
    def __init__(self, no_iters: int, selection: list, x: pd.DataFrame, y: pd.DataFrame, filename: str):

        self.no_iters = no_iters

        self.x = x.loc[:, x.columns.isin(selection)]
        self.y = y


        self.filename = str(self._create_filename(filename) + '.text').replace("'", "")

        # [name, acc, f1_score, recall, precision, time]
        # self.xgboost_result = []
        # self.adaboost_result = []
        # self.gradient_boost_result = []
        # self.random_forest_result = []
        # self.svm_result = []
        # self.ensemble_result=[]

        # classifiers
        self.xgboost_classifier = XGBClassifier(max_depth=5,
        learning_rate=0.005,
        n_estimators=100,
        objective='binary:logistic',
        random_state=42)

        self.adaboost_classifier = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=2),
        n_estimators=100,
        learning_rate=0.05,
        algorithm='SAMME.R',
        random_state=42,
        base_estimator='deprecated')

        self.svm_classifier = SVC()

        self.random_forest_classifier = RandomForestClassifier()

        self.gradient_boost_classifier = GradientBoostingClassifier()

        self.decision_tree_classifier= DecisionTreeClassifier()

        self.logistic_regression=LogisticRegression(random_state=0,max_iter=300)
        #ensemble learning classifier uses the object classifer as input
        self.ensemble_classifier=EnsembleModel([self.xgboost_classifier,self.gradient_boost_classifier,self.adaboost_classifier,self.svm_classifier,self.logistic_regression])

        # undersampling
        self.undersample = RandomUnderSampler(sampling_strategy='majority',random_state=9)
        self.X_us , self.y_us= self.undersample.fit_resample(self.x, self.y)




    def _append_items(self, target: list, name, acc, f1_score, recall, precision, time, no_iters):
        target.append(name)
        target.append(acc)
        target.append(f1_score)
        target.append(recall)
        target.append(precision)
        target.append(time)
        target.append(no_iters)
        return target

    def _train(self, classifier, random_state=0):

        X_train, X_test, y_train, y_test = train_test_split(self.X_us, self.y_us, test_size=0.2)


        #imputing missing values
        imputer = SimpleImputer(strategy='median')
        imputer.fit(X_train)

        X_train = imputer.transform(X_train)
        X_test = imputer.transform(X_test)






        y_train = -(y_train - 2)
        y_test = -(y_test - 2)

        start_time = time.time()

        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_test)

        end_time = time.time()

        time0 = end_time - start_time
        accuracy = accuracy_score(y_test, y_pred) * 100
        f1 = f1_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)



        return accuracy, f1, recall, precision, time0
    # [name, acc, f1_score, recall, precision, time]


    def display_result(self, result, print_result=True):
        filename ='/content/drive/MyDrive/elec/filesss/'+str(self.filename)
        with open(filename, 'a') as file:
            result_string = (f'algorithm name: {result[0]}\n'
                f'number of iterations: {str(self.no_iters)}\n'
                f'average accuracy: {(result[1]).mean():.4f}' + '\n'
                f'max accuracy: {(result[1]).max():.4f}' + '\n'
                f'std accuracy: {(result[1]).std():.4f}' + '\n'
                f'average f1 score: {(result[2]).mean():.4f}' + '\n'
                f'max f1 score: {(result[2]).max():.4f}' + '\n'
                f'std f1 score: {(result[2]).std():.4f}' + '\n'
                f'average recall: {(result[3]).mean():.4f}' + '\n'
                f'max recall: {(result[3]).max():.4f}' + '\n'
                f'std recall: {(result[3]).std():.4f}' + '\n'
                f'average precision: {(result[4]).mean():.4f}' + '\n'
                f'max precision: {(result[4]).max():.4f}' + '\n'
                f'std precision: {(result[4]).std():.4f}' + '\n'
                f'average time: {result[5].mean():.4f}' + '\n\n\n')
            if print_result:
                print(result_string)
            file.write(result_string)
        file.close()


    def fit_algorithm(self, classifier):

        scores = []
        times = []
        f1_scores = []
        recall_scores = []
        precision_scores = []

        for _ in range(self.no_iters):

            accuracy, f1, recall, precision, time0 = self._train(classifier)

            scores.append(accuracy)
            f1_scores.append(f1)
            recall_scores.append(recall)
            precision_scores.append(precision)
            times.append(time0)

        target = []


        name = classifier
        if  name == self.xgboost_classifier:
            name = XGBClassifier.__name__
        if name==self.ensemble_classifier:
            name = EnsembleModel.__name__

        target = self._append_items(target=target, name=name, acc=np.array(scores),
                            f1_score=np.array(f1_scores), recall=np.array(recall_scores),
                            precision=np.array(precision_scores), time=np.array(times), no_iters=self.no_iters)
        self.display_result(target)

    def _create_filename(self, filename):
        index = filename.find("return_")
        if index == -1:
            return None
        else:
            return filename[index + len("return_"):]



In [14]:
no_iterarion =30

In [110]:
def data():
    print(5)
data.__name__

'data'

In [15]:
dataset  = Dataset()
X, y = dataset.return_dataset()

models=[Model(no_iters=no_iterarion, selection=dataset.return_intersection_chi_inf(), x=X, y=y,filename=dataset.return_intersection_chi_inf.__name__),
       Model(no_iters=no_iterarion,selection=dataset.return_intersection_chi_fisher_inf(),x=X, y=y, filename=dataset.return_intersection_chi_fisher_inf.__name__),
      Model(no_iters=no_iterarion,selection=dataset.return_intersection_chi_fisher(),x=X, y=y,filename=dataset.return_intersection_chi_fisher.__name__),
      Model(no_iters=no_iterarion,selection=dataset.return_intersection_chi_inf(),x=X, y=y, filename=dataset.return_intersection_chi_inf.__name__),
      Model(no_iters=no_iterarion,selection=dataset.return_intersection_fisher_inf(),x=X, y=y,filename=dataset.return_intersection_fisher_inf.__name__),
      Model(no_iters=no_iterarion,selection=dataset.return_intersection_chi_cor(),x=X, y=y, filename=dataset.return_intersection_chi_cor.__name__),
      Model(no_iters=no_iterarion,selection=dataset.return_intersection_fisher_cor(),x=X, y=y, filename=dataset.return_intersection_fisher_cor.__name__),
      Model(no_iters=no_iterarion,selection=dataset.return_intersection_inf_cor(),x=X, y=y,filename=str(dataset.return_intersection_inf_cor.__name__))
]



In [95]:
# classifiers =   [
#                 model.xgboost_classifier,
#                 model.adaboost_classifier,
#                 model.gradient_boost_classifier,
#                 model.random_forest_classifier,
#                 model.svm_classifier,
#                 model.ensemble_classifier
#                 ]

In [8]:
for model in models:
  classifiers =   [
                model.xgboost_classifier,
                model.adaboost_classifier,
                model.gradient_boost_classifier,
                model.random_forest_classifier,
                model.svm_classifier
                # model.ensemble_classifier
                ]
  for classifier in classifiers:
    model.fit_algorithm(classifier=classifier)

algorithm name: XGBClassifier
number of iterations: 100
average accuracy: 86.0709
max accuracy: 88.1664
std accuracy: 0.7466
average f1 score: 0.8580
max f1 score: 0.8839
std f1 score: 0.0087
average recall: 0.8436
max recall: 0.8709
std recall: 0.0118
average precision: 0.8729
max precision: 0.8973
std precision: 0.0113
average time: 0.6269



algorithm name: AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=2),
                   learning_rate=0.05, n_estimators=100, random_state=42)
number of iterations: 100
average accuracy: 85.7692
max accuracy: 87.2876
std accuracy: 0.7297
average f1 score: 0.8530
max f1 score: 0.8704
std f1 score: 0.0081
average recall: 0.8267
max recall: 0.8513
std recall: 0.0110
average precision: 0.8811
max precision: 0.9049
std precision: 0.0108
average time: 0.7491



algorithm name: GradientBoostingClassifier()
number of iterations: 100
average accuracy: 85.8219
max accuracy: 87.3462
std accuracy: 0.7614
average f1 score: 0.8555
max f1 score: 0

In [16]:
for model in models:
  model.fit_algorithm(classifier=model.ensemble_classifier)

algorithm name: EnsembleModel
number of iterations: 30
average accuracy: 85.9754
max accuracy: 87.4048
std accuracy: 0.7626
average f1 score: 0.8559
max f1 score: 0.8708
std f1 score: 0.0080
average recall: 0.8379
max recall: 0.8677
std recall: 0.0100
average precision: 0.8748
max precision: 0.8952
std precision: 0.0103
average time: 8.6753



algorithm name: EnsembleModel
number of iterations: 30
average accuracy: 85.3193
max accuracy: 87.4048
std accuracy: 0.7641
average f1 score: 0.8482
max f1 score: 0.8718
std f1 score: 0.0084
average recall: 0.8169
max recall: 0.8359
std recall: 0.0116
average precision: 0.8821
max precision: 0.9126
std precision: 0.0099
average time: 8.2377



algorithm name: EnsembleModel
number of iterations: 30
average accuracy: 85.5770
max accuracy: 87.1119
std accuracy: 0.6296
average f1 score: 0.8494
max f1 score: 0.8698
std f1 score: 0.0075
average recall: 0.8176
max recall: 0.8390
std recall: 0.0103
average precision: 0.8838
max precision: 0.9087
std prec

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

algorithm name: EnsembleModel
number of iterations: 30
average accuracy: 85.2392
max accuracy: 87.0533
std accuracy: 0.7018
average f1 score: 0.8458
max f1 score: 0.8661
std f1 score: 0.0083
average recall: 0.8122
max recall: 0.8355
std recall: 0.0119
average precision: 0.8824
max precision: 0.9039
std precision: 0.0112
average time: 13.7843



algorithm name: EnsembleModel
number of iterations: 30
average accuracy: 86.1043
max accuracy: 88.4007
std accuracy: 0.6811
average f1 score: 0.8599
max f1 score: 0.8835
std f1 score: 0.0072
average recall: 0.8499
max recall: 0.8722
std recall: 0.0092
average precision: 0.8702
max precision: 0.8951
std precision: 0.0107
average time: 6.5739



algorithm name: EnsembleModel
number of iterations: 30
average accuracy: 86.4245
max accuracy: 87.7563
std accuracy: 0.7610
average f1 score: 0.8641
max f1 score: 0.8764
std f1 score: 0.0076
average recall: 0.8581
max recall: 0.8781
std recall: 0.0105
average precision: 0.8704
max precision: 0.8952
std pre