In [1]:
from sklearn.utils import shuffle
import sklearn 
from sklearn import svm, tree, linear_model, neighbors, naive_bayes, ensemble, discriminant_analysis, gaussian_process
from xgboost import XGBClassifier
from sklearn import feature_selection
from sklearn import model_selection
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import pandas as pd
import numpy as np
import random

In [4]:
random.seed(0)

In [5]:
import matplotlib.pyplot as plt

### Objective is to create a Test data set. 
We have total 285 Patients recordings. We will be using ~ 10 percent of data for testing purpose ie is 28 patients. 
These 28 patients will be unique and will not br present in training data. Distribution of these 28 patients will be as follows:

* 28 = 14 Asthma + 14 Non_Asthma 
* 14 Asthma = 7 Males + 7 Females
* 14 Non_Asthma = 7 Males + 7 Females
m
* Total Female = 7+7 = 14
* Total male = 7+7 = 14 

In [6]:
def train_test_set(Asthmatic_Female, Asthmatic_Male, Healthy_Female, Healthy_Male, before_after):
    random.seed(0)
    test_set = np.concatenate(((np.random.choice(np.array(Asthmatic_Male), replace=False, size=(7)),
                                np.random.choice(np.array(Asthmatic_Female), replace=False, size=(7)),
                                np.random.choice(np.array(Healthy_Male), replace=False, size=(7)),
                                np.random.choice(np.array(Healthy_Female), replace=False, size=(7)))))
    train_set = np.concatenate((np.array(Asthmatic_Male),
                                np.array(Asthmatic_Female),
                                np.array(Healthy_Male),
                                np.array(Healthy_Female)))
    train_set = np.setdiff1d(train_set,test_set)
#     np.array(before_after).shape
    train_set = np.concatenate((train_set, np.array(before_after)))
    return train_set, test_set

#### Gender Ratio in Train Set F/m = 116/141 = 0.8226950354609929
#### Non_Asthma/Asthma = 92/165 = 0.5575

#### Healthy Female: Healthy Male in Train set

In [14]:
(len(Healthy_Female)-7)/(len(Healthy_Male)-7)

0.9166666666666666

In [16]:
(len(Asthmatic_Female)-7)/(len(Asthmatic_Male)-7)

0.7375

### Loading CSV

In [9]:
Cough_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Cough.csv")
Wheeze_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Wheeze.csv")
Aaa_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Aaa.csv")
Yee_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Yee.csv")
Inhale_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Inhale.csv")
Exhale_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Exhale.csv")
Ooo_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Ooo.csv")
Sss_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Sss.csv")
Eee_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Eee.csv")
Uuu_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Uuu.csv")
Zzz_dataframe = pd.read_csv("./MFCCs_stasts_Csv/Zzz.csv")

In [10]:
# train_test Csv
def train_test_csv(sound_dataframe):
    Total = np.unique(sound_dataframe.iloc[:,73])
    name = []
    for i in Total:
        name.append(i[:4])

    unique = []
    before_after = []
    for i in Total:
        if np.sum((np.array(name, dtype = int) == int(i[:4]))) == 1:
            unique.append(i)
        else:
            before_after.append(i) 

    Asthmatic_Female = []
    Asthmatic_Male = []
    Healthy_Male = []
    Healthy_Female = []
    for file in unique:
        if file.find('sthma') !=-1:
            if file.find("_M_")!=-1:
                Asthmatic_Male.append(file)
            if file.find("_F_")!=-1:
                Asthmatic_Female.append(file)
        if file.find("_C_")!=-1:
            if file.find("_M_")!=-1:
                Healthy_Male.append(file)
            if file.find("_F_")!=-1:
                Healthy_Female.append(file)

                

    Train, Test = train_test_set(Asthmatic_Female, Asthmatic_Male, Healthy_Female, Healthy_Male, before_after)     

    test_dataframe = pd.DataFrame()
    for i, line in enumerate(Test):
        A = sound_dataframe[(sound_dataframe['73'] == (Test[i]))]
        test_dataframe = pd.DataFrame.append(test_dataframe,A)
        
        
    train_dataframe = pd.DataFrame()
    for i, line in enumerate(Train):
        A = sound_dataframe[(sound_dataframe['73'] == (Train[i]))]
        train_dataframe = pd.DataFrame.append(train_dataframe,A) 
        
        
    
    return train_dataframe, test_dataframe

#### Wheeze

In [47]:
for i in range(0,10):
    train_csv, test_csv = train_test_csv(Wheeze_dataframe)
    train_csv.to_csv('./Set_CSV/train'+str(i)+'.csv', index=False)
    test_csv.to_csv('./Set_CSV/test'+str(i)+'.csv', index=False)

In [14]:
#Machine Learning Algorithm (MLA) Selection and Initialization
def MLA_selection(sound_dataframe, sound):    
    MLA = [
        #Ensemble Methods
        ensemble.AdaBoostClassifier(),
        ensemble.BaggingClassifier(),
        ensemble.ExtraTreesClassifier(),
        ensemble.GradientBoostingClassifier(),
        ensemble.RandomForestClassifier(),

        #Gaussian Processes
        gaussian_process.GaussianProcessClassifier(),

        #GLM
        linear_model.LogisticRegressionCV(),
        linear_model.PassiveAggressiveClassifier(),
        linear_model.RidgeClassifierCV(),
        linear_model.SGDClassifier(),
        linear_model.Perceptron(),

        #Navies Bayes
        naive_bayes.BernoulliNB(),
        naive_bayes.GaussianNB(),

        #Nearest Neighbor
        neighbors.KNeighborsClassifier(),

        #SVM
        svm.SVC(probability=True),
        svm.NuSVC(probability=True),
        svm.LinearSVC(),

        #Trees    
        tree.DecisionTreeClassifier(),
        tree.ExtraTreeClassifier(),

        #Discriminant Analysis
        discriminant_analysis.LinearDiscriminantAnalysis(),
        discriminant_analysis.QuadraticDiscriminantAnalysis(),


        XGBClassifier()    
        ]


    MLA_columns = ['MLA Name', 'MLA Parameters','MLA Train Accuracy','MLA Train Accuracy Mean', 'MLA Test Accuracy', 'MLA Test Accuracy Mean','MLA Test Accuracy Std' ]
    MLA_compare = pd.DataFrame(columns = MLA_columns)
    row_index = 0
    for alg in MLA:
        MLA_name = alg.__class__.__name__
        MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
        MLA_compare.loc[row_index, 'MLA Parameters'] = str(alg.get_params())
        test = []
        train = []
        for i in range(0,10):
            train_csv, test_csv = pd.read_csv('./Set_CSV/train'+sound+str(i)+'.csv'), pd.read_csv('./Set_CSV/test'+sound+str(i)+'.csv')
            X_train = train_csv.iloc[:,:72]
            y_train = train_csv.iloc[:,72]
            X_test = test_csv.iloc[:,:72]
            y_test = test_csv.iloc[:,72]
            #Scaling
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            X_train = pd.DataFrame(X_train)
            X_test = pd.DataFrame(X_test)

            alg.fit(X_train,y_train)
            test.append(metrics.accuracy_score(y_test,alg.predict(X_test)))

            train.append(metrics.accuracy_score(y_train,alg.predict(X_train)))
        
        MLA_compare.loc[row_index, 'MLA Train Accuracy'] = train
        MLA_compare.loc[row_index, 'MLA Train Accuracy Mean'] = np.mean(train)
        MLA_compare.loc[row_index, 'MLA Test Accuracy'] = test
        MLA_compare.loc[row_index, 'MLA Test Accuracy Mean'] = np.mean(test)
        MLA_compare.loc[row_index, 'MLA Test Accuracy Std'] = np.std(test)


        row_index+=1

    
    MLA_compare.sort_values(by = ['MLA Test Accuracy Mean'], ascending = False, inplace = True)
    
    return MLA_compare

In [52]:
MLA_Wheeze = MLA_selection(Wheeze_dataframe)



In [53]:
MLA_Wheeze

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.991941391941392, 0.9935437589670014, 0.9875...",0.99228,"[0.8830645161290323, 0.8401826484018264, 0.872...",0.843015,0.0356814
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.8870967741935484, 0.8401826484018264, 0.9, ...",0.840443,0.0461645
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9963369963369964, 0.994261119081779, 0.9985...",0.996439,"[0.8104838709677419, 0.821917808219178, 0.888,...",0.814428,0.0574147
2,ExtraTreesClassifier,"{'bootstrap': False, 'class_weight': None, 'cr...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.8467741935483871, 0.7990867579908676, 0.844...",0.811775,0.0473367
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': 'warn',...","[0.882051282051282, 0.9103299856527977, 0.8892...",0.885064,"[0.8104838709677419, 0.7534246575342466, 0.82,...",0.806276,0.0636931
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9985347985347985, 0.9985652797704447, 0.997...",0.997455,"[0.8266129032258065, 0.8447488584474886, 0.852...",0.804336,0.0638674
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.936996336996337, 0.9461979913916786, 0.9464...",0.950383,"[0.8306451612903226, 0.776255707762557, 0.864,...",0.803735,0.0495058
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.8923076923076924, 0.9074605451936872, 0.888...",0.895104,"[0.8266129032258065, 0.7488584474885844, 0.796...",0.802352,0.0538487
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.8945054945054945, 0.9074605451936872, 0.894...",0.903819,"[0.8225806451612904, 0.7168949771689498, 0.804...",0.801532,0.0555916
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9714285714285714, 0.975609756097561, 0.9691...",0.97424,"[0.8387096774193549, 0.7899543378995434, 0.784...",0.799876,0.0591807


In [54]:
# MLA_Wheeze.to_excel('Wheeze_with_ratio.xlsx')

### COUGH

In [37]:
# Making Sets
def make_and_save_sets(sound_dataframe, sound):
    for i in range(0,10):
        train_csv, test_csv = train_test_csv(sound_dataframe)
        train_csv.to_csv('./Set_CSV/train'+sound+str(i)+'.csv', index=False)
        test_csv.to_csv('./Set_CSV/test'+sound+str(i)+'.csv', index=False)

In [38]:
make_and_save_sets(Cough_dataframe, 'cough')

In [40]:
MLA_Cough = MLA_selection(Cough_dataframe, 'cough')



In [41]:
MLA_Cough

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.8419243986254296, 0.7639344262295082, 0.718...",0.722194,0.0584537
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9447731755424064, 0.9448631495438318, 0.952...",0.949875,"[0.872852233676976, 0.7442622950819672, 0.7010...",0.71883,0.0685792
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.8994082840236687, 0.9131297104323681, 0.913...",0.913949,"[0.8247422680412371, 0.7737704918032787, 0.718...",0.718684,0.0596891
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9254437869822485, 0.9194763982546609, 0.913...",0.920461,"[0.865979381443299, 0.7377049180327869, 0.6975...",0.717109,0.0675938
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.8272189349112427, 0.8254660848869496, 0.828...",0.831155,"[0.8041237113402062, 0.740983606557377, 0.6941...",0.707968,0.0565371
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9960552268244576, 0.9956366521221738, 0.995...",0.995977,"[0.7938144329896907, 0.760655737704918, 0.6323...",0.701206,0.0549314
12,GaussianNB,"{'priors': None, 'var_smoothing': 1e-09}","[0.7187376725838265, 0.7231257437524792, 0.740...",0.729337,"[0.7319587628865979, 0.8065573770491803, 0.642...",0.698866,0.0615273
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9932938856015779, 0.9952399841332804, 0.993...",0.994483,"[0.8075601374570447, 0.7540983606557377, 0.659...",0.698762,0.0571057
2,ExtraTreesClassifier,"{'bootstrap': False, 'class_weight': None, 'cr...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7835051546391752, 0.7836065573770492, 0.666...",0.698362,0.0593545
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...","[0.7080867850098619, 0.7052756842522808, 0.721...",0.718211,"[0.7766323024054983, 0.8327868852459016, 0.615...",0.694485,0.0867879


In [42]:
MLA_Cough.to_excel("Cough.xlsx") 

### Aaa

In [46]:
# make_and_save_sets(Aaa_dataframe, 'Aaa')

In [47]:
MLA_Aaa = MLA_selection(Aaa_dataframe, 'Aaa')



In [48]:
MLA_Aaa

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9607535321821036, 0.9661150512214342, 0.964...",0.961409,"[0.6690140845070423, 0.7074829931972789, 0.724...",0.693712,0.0602202
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7112676056338029, 0.7210884353741497, 0.655...",0.689589,0.0547546
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9434850863422292, 0.9385342789598109, 0.940...",0.941558,"[0.7183098591549296, 0.7074829931972789, 0.613...",0.68214,0.0542329
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9937205651491365, 0.9952718676122931, 0.988...",0.9937,"[0.6690140845070423, 0.7142857142857143, 0.627...",0.67926,0.0461592
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9474097331240189, 0.9511426319936959, 0.949...",0.949197,"[0.7112676056338029, 0.7074829931972789, 0.579...",0.67591,0.0589994
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...","[0.6664050235478807, 0.6674546887312844, 0.686...",0.671707,"[0.6549295774647887, 0.7482993197278912, 0.641...",0.66936,0.044443
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9976452119309263, 0.9976359338061466, 0.994...",0.99551,"[0.6197183098591549, 0.6598639455782312, 0.703...",0.66218,0.045476
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.7723704866562009, 0.7738376674546887, 0.772...",0.774264,"[0.7183098591549296, 0.6802721088435374, 0.613...",0.662048,0.0373621
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.7653061224489796, 0.7691095350669819, 0.767...",0.769936,"[0.704225352112676, 0.6666666666666666, 0.6, 0...",0.659875,0.0459721
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.8563579277864992, 0.8447596532702916, 0.845...",0.84609,"[0.6549295774647887, 0.6802721088435374, 0.586...",0.658085,0.0462682


In [49]:
MLA_Aaa.to_excel("Aaa.xlsx") 

### Eee

In [50]:
make_and_save_sets(Eee_dataframe, 'Eee')

In [51]:
MLA_Eee = MLA_selection(Eee_dataframe, 'Eee')



In [52]:
MLA_Eee

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9427385892116182, 0.942643391521197, 0.9402...",0.940273,"[0.5971223021582733, 0.6666666666666666, 0.712...",0.671689,0.0710029
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9435684647302904, 0.9501246882793017, 0.948...",0.945602,"[0.60431654676259, 0.6595744680851063, 0.71223...",0.670353,0.0699395
5,GaussianProcessClassifier,"{'copy_X_train': True, 'kernel': None, 'max_it...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.60431654676259, 0.5815602836879432, 0.69784...",0.666813,0.0537799
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.5827338129496403, 0.75177304964539, 0.69784...",0.6655,0.0599564
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9950207468879668, 0.9908561928512053, 0.993...",0.995253,"[0.5971223021582733, 0.7730496453900709, 0.654...",0.664723,0.0511305
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...","[0.975103734439834, 0.9808811305070657, 0.9784...",0.977256,"[0.5755395683453237, 0.5673758865248227, 0.719...",0.660685,0.069368
2,ExtraTreesClassifier,"{'bootstrap': False, 'class_weight': None, 'cr...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.6474820143884892, 0.5673758865248227, 0.726...",0.656595,0.0542618
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.975103734439834, 0.9675810473815462, 0.9634...",0.96935,"[0.60431654676259, 0.7588652482269503, 0.65467...",0.650664,0.0728481
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.995850622406639, 0.999168744804655, 0.99917...",0.997334,"[0.5899280575539568, 0.7375886524822695, 0.640...",0.65028,0.0518052
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.8580912863070539, 0.8528678304239401, 0.859...",0.8619,"[0.5899280575539568, 0.7446808510638298, 0.683...",0.642053,0.0553015


In [53]:
MLA_Eee.to_excel("Eee.xlsx") 

### Yee

In [54]:
make_and_save_sets(Yee_dataframe, 'Yee')
MLA_Yee = MLA_selection(Yee_dataframe, 'Yee')



In [55]:
MLA_Yee

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9935012185215272, 0.9925925925925926, 0.997...",0.995245,"[0.7205882352941176, 0.6710526315789473, 0.732...",0.691461,0.0654568
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9699431356620634, 0.9703703703703703, 0.972...",0.971057,"[0.6470588235294118, 0.6578947368421053, 0.616...",0.674658,0.0722019
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9512591389114541, 0.9572016460905349, 0.951...",0.95392,"[0.6397058823529411, 0.7302631578947368, 0.691...",0.67115,0.0733604
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9496344435418359, 0.9588477366255144, 0.950...",0.952685,"[0.6397058823529411, 0.7302631578947368, 0.691...",0.671082,0.0711374
19,LinearDiscriminantAnalysis,"{'n_components': None, 'priors': None, 'shrink...","[0.7822908204711616, 0.7695473251028807, 0.782...",0.776805,"[0.6617647058823529, 0.7302631578947368, 0.609...",0.670027,0.0717499
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': 'warn',...","[0.7701056051990252, 0.7506172839506173, 0.754...",0.766516,"[0.6764705882352942, 0.743421052631579, 0.6232...",0.668781,0.0676164
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.7944760357432982, 0.7786008230452675, 0.782...",0.784271,"[0.6764705882352942, 0.7171052631578947, 0.589...",0.668722,0.0761962
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.776604386677498, 0.7637860082304527, 0.7772...",0.775508,"[0.6691176470588235, 0.7302631578947368, 0.609...",0.668128,0.0700755
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.6838235294117647, 0.631578947368421, 0.6369...",0.667158,0.0769623
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...","[0.7043054427294883, 0.7119341563786008, 0.712...",0.702012,"[0.6617647058823529, 0.6381578947368421, 0.506...",0.659991,0.0884033


In [56]:
MLA_Yee.to_excel("Yee.xlsx") 

### Ooo

In [57]:
make_and_save_sets(Ooo_dataframe, 'Ooo')
MLA_Ooo = MLA_selection(Ooo_dataframe, 'Ooo')



In [58]:
MLA_Ooo

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9929133858267717, 0.9928286852589642, 0.991...",0.993577,"[0.6962962962962963, 0.66, 0.6896551724137931,...",0.657233,0.0508682
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9496062992125984, 0.949800796812749, 0.9444...",0.952814,"[0.674074074074074, 0.5866666666666667, 0.6758...",0.652539,0.0449993
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9952755905511811, 0.999203187250996, 0.9968...",0.996829,"[0.7185185185185186, 0.66, 0.6620689655172414,...",0.652404,0.0595082
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9401574803149606, 0.9378486055776892, 0.939...",0.940124,"[0.6888888888888889, 0.5866666666666667, 0.682...",0.650496,0.0433521
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9653543307086614, 0.9641434262948207, 0.965...",0.964634,"[0.725925925925926, 0.6933333333333334, 0.6206...",0.646475,0.0656254
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.6888888888888889, 0.7333333333333333, 0.606...",0.645584,0.0479154
2,ExtraTreesClassifier,"{'bootstrap': False, 'class_weight': None, 'cr...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.6666666666666666, 0.64, 0.6551724137931034,...",0.63712,0.0306426
17,DecisionTreeClassifier,"{'class_weight': None, 'criterion': 'gini', 'm...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.6370370370370371, 0.5533333333333333, 0.682...",0.634715,0.0450541
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...","[0.9771653543307086, 0.9776892430278884, 0.975...",0.974383,"[0.6666666666666666, 0.6333333333333333, 0.648...",0.633334,0.0373928
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...","[0.6708661417322834, 0.6908366533864542, 0.692...",0.683448,"[0.725925925925926, 0.5266666666666666, 0.5655...",0.61808,0.066771


In [59]:
MLA_Ooo.to_excel("Ooo.xlsx") 

### Uuu

In [60]:
make_and_save_sets(Uuu_dataframe, 'Uuu')
MLA_Uuu = MLA_selection(Uuu_dataframe, 'Uuu')



In [61]:
MLA_Uuu

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7832167832167832, 0.7338129496402878, 0.709...",0.708973,0.0703449
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9742710120068611, 0.9692307692307692, 0.980...",0.971865,"[0.8041958041958042, 0.7410071942446043, 0.689...",0.708429,0.079347
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9459691252144082, 0.9478632478632478, 0.960...",0.950505,"[0.6923076923076923, 0.8201438848920863, 0.648...",0.674084,0.0810944
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9416809605488851, 0.9393162393162393, 0.955...",0.946131,"[0.6853146853146853, 0.8129496402877698, 0.648...",0.671975,0.0780242
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9931389365351629, 0.994017094017094, 0.9939...",0.994083,"[0.7552447552447552, 0.5827338129496403, 0.722...",0.668128,0.0889664
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9974271012006861, 0.9957264957264957, 0.994...",0.996483,"[0.7132867132867133, 0.7194244604316546, 0.695...",0.664417,0.0647026
2,ExtraTreesClassifier,"{'bootstrap': False, 'class_weight': None, 'cr...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7062937062937062, 0.7769784172661871, 0.655...",0.661263,0.0743895
5,GaussianProcessClassifier,"{'copy_X_train': True, 'kernel': None, 'max_it...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7272727272727273, 0.7697841726618705, 0.648...",0.658332,0.0726471
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.8542024013722127, 0.8735042735042735, 0.851...",0.866684,"[0.8041958041958042, 0.6762589928057554, 0.628...",0.648434,0.0975919
13,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...","[0.9742710120068611, 0.9700854700854701, 0.974...",0.973064,"[0.6993006993006993, 0.7769784172661871, 0.628...",0.646005,0.0665947


In [62]:
MLA_Uuu.to_excel("Uuu.xlsx") 

### Sss

In [63]:
make_and_save_sets(Sss_dataframe, 'Sss')
MLA_Sss = MLA_selection(Sss_dataframe, 'Sss')



In [64]:
MLA_Sss

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
12,GaussianNB,"{'priors': None, 'var_smoothing': 1e-09}","[0.7285342584562012, 0.7137931034482758, 0.694...",0.716362,"[0.7034482758620689, 0.7898550724637681, 0.718...",0.675889,0.0728414
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.8013876843018214, 0.781896551724138, 0.7984...",0.801997,"[0.6482758620689655, 0.8333333333333334, 0.661...",0.666441,0.0795955
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.810928013876843, 0.7913793103448276, 0.8053...",0.810584,"[0.6620689655172414, 0.7898550724637681, 0.654...",0.665516,0.0666521
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9306157849089333, 0.9224137931034483, 0.915...",0.925733,"[0.6275862068965518, 0.7318840579710145, 0.711...",0.662893,0.0720208
19,LinearDiscriminantAnalysis,"{'n_components': None, 'priors': None, 'shrink...","[0.7970511708586296, 0.7939655172413793, 0.810...",0.803807,"[0.6482758620689655, 0.8115942028985508, 0.661...",0.659523,0.0793861
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': 'warn',...","[0.7762359063313097, 0.753448275862069, 0.7698...",0.776424,"[0.6275862068965518, 0.8333333333333334, 0.647...",0.659452,0.0848458
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9288811795316565, 0.9370689655172414, 0.923...",0.923464,"[0.6275862068965518, 0.7318840579710145, 0.711...",0.657382,0.0766674
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.8725065047701648, 0.881896551724138, 0.8788...",0.872864,"[0.593103448275862, 0.5942028985507246, 0.7042...",0.654742,0.0542937
9,SGDClassifier,"{'alpha': 0.0001, 'average': False, 'class_wei...","[0.7068516912402428, 0.7474137931034482, 0.723...",0.727954,"[0.5862068965517241, 0.7753623188405797, 0.711...",0.645217,0.0741773
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9921942758022549, 0.9922413793103448, 0.997...",0.99428,"[0.6068965517241379, 0.6594202898550725, 0.640...",0.640891,0.0583084


In [65]:
MLA_Uuu.to_excel("Sss.xlsx") 

### Zzz

In [66]:
make_and_save_sets(Zzz_dataframe, 'Zzz')
MLA_Zzz = MLA_selection(Zzz_dataframe, 'Zzz')



In [67]:
MLA_Zzz

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.6689655172413793, 0.6923076923076923, 0.614...",0.66415,0.071367
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.9284313725490196, 0.923679060665362, 0.9395...",0.92943,"[0.6689655172413793, 0.6153846153846154, 0.607...",0.657537,0.0622628
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9813725490196078, 0.9765166340508806, 0.978...",0.977981,"[0.6413793103448275, 0.6363636363636364, 0.585...",0.656988,0.0622442
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9568627450980393, 0.9471624266144814, 0.958...",0.952042,"[0.6689655172413793, 0.6293706293706294, 0.571...",0.65673,0.0688875
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.7696078431372549, 0.7896281800391389, 0.767...",0.773911,"[0.6827586206896552, 0.6223776223776224, 0.664...",0.648471,0.0641702
9,SGDClassifier,"{'alpha': 0.0001, 'average': False, 'class_wei...","[0.6892156862745098, 0.6986301369863014, 0.691...",0.700305,"[0.6413793103448275, 0.5944055944055944, 0.635...",0.645204,0.0574458
19,LinearDiscriminantAnalysis,"{'n_components': None, 'priors': None, 'shrink...","[0.7745098039215687, 0.7896281800391389, 0.775...",0.778028,"[0.6482758620689655, 0.6363636363636364, 0.65,...",0.641636,0.0518535
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': 'warn',...","[0.7235294117647059, 0.7377690802348337, 0.773...",0.758548,"[0.6620689655172414, 0.6013986013986014, 0.664...",0.638766,0.0672249
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.7696078431372549, 0.7915851272015656, 0.784...",0.777923,"[0.6758620689655173, 0.6153846153846154, 0.671...",0.638157,0.0573746
1,BaggingClassifier,"{'base_estimator': None, 'bootstrap': True, 'b...","[0.9911764705882353, 0.9980430528375733, 0.995...",0.994029,"[0.6275862068965518, 0.6363636363636364, 0.642...",0.63357,0.0630976


In [68]:
MLA_Uuu.to_excel("Zzz.xlsx") 

### Inhale

In [69]:
make_and_save_sets(Inhale_dataframe, 'Inhale')
MLA_Inhale = MLA_selection(Inhale_dataframe, 'Inhale')



In [70]:
MLA_Inhale

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...","[0.7300613496932515, 0.7297043641482872, 0.741...",0.734145,"[0.8174603174603174, 0.8125, 0.682539682539682...",0.773497,0.0590541
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.8008494572911751, 0.8038479587048334, 0.801...",0.80431,"[0.7738095238095238, 0.8, 0.7380952380952381, ...",0.760934,0.0361152
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9428975932043416, 0.9310183012670108, 0.938...",0.936177,"[0.753968253968254, 0.8125, 0.7380952380952381...",0.760354,0.0455145
14,SVC,"{'C': 1.0, 'cache_size': 200, 'class_weight': ...","[0.924964605946201, 0.9174096668230878, 0.9235...",0.920819,"[0.7738095238095238, 0.8041666666666667, 0.714...",0.758556,0.0395528
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.7942425672487022, 0.7986860628812764, 0.799...",0.799532,"[0.7777777777777778, 0.8041666666666667, 0.742...",0.758291,0.0423958
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9164700330344502, 0.9084936649460347, 0.911...",0.909319,"[0.7698412698412699, 0.8125, 0.726190476190476...",0.756998,0.0383302
19,LinearDiscriminantAnalysis,"{'n_components': None, 'priors': None, 'shrink...","[0.7975460122699386, 0.7996245893946504, 0.798...",0.801194,"[0.7738095238095238, 0.8083333333333333, 0.718...",0.754865,0.0443947
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7380952380952381, 0.825, 0.7182539682539683...",0.747696,0.0478508
0,AdaBoostClassifier,"{'algorithm': 'SAMME.R', 'base_estimator': Non...","[0.85181689476168, 0.8423275457531675, 0.84709...",0.848394,"[0.7103174603174603, 0.7833333333333333, 0.734...",0.74348,0.0421778
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9957527135441245, 0.995776630689817, 0.9929...",0.994844,"[0.753968253968254, 0.7958333333333333, 0.6825...",0.739909,0.0494944


In [71]:
MLA_Uuu.to_excel("Inhale.xlsx") 

### Exhale

In [72]:
make_and_save_sets(Exhale_dataframe, 'Exhale')
MLA_Exhale = MLA_selection(Exhale_dataframe, 'Exhale')



In [73]:
MLA_Exhale

Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Train Accuracy Mean,MLA Test Accuracy,MLA Test Accuracy Mean,MLA Test Accuracy Std
6,LogisticRegressionCV,"{'Cs': 10, 'class_weight': None, 'cv': 'warn',...","[0.8072174738841406, 0.813780260707635, 0.8161...",0.816663,"[0.7682119205298014, 0.8076923076923077, 0.738...",0.757408,0.0399525
3,GradientBoostingClassifier,"{'criterion': 'friedman_mse', 'init': None, 'l...","[0.9425451092117759, 0.952048417132216, 0.9399...",0.94368,"[0.7649006622516556, 0.7846153846153846, 0.696...",0.752077,0.04887
16,LinearSVC,"{'C': 1.0, 'class_weight': None, 'dual': True,...","[0.8238366571699905, 0.8147113594040968, 0.830...",0.826998,"[0.7317880794701986, 0.7961538461538461, 0.719...",0.747641,0.0330973
8,RidgeClassifierCV,"{'alphas': array([ 0.1, 1. , 10. ]), 'class_w...","[0.8167141500474834, 0.8133147113594041, 0.821...",0.821133,"[0.7350993377483444, 0.8, 0.7269230769230769, ...",0.746122,0.0415793
4,RandomForestClassifier,"{'bootstrap': True, 'class_weight': None, 'cri...","[0.9952516619183286, 0.9962756052141527, 0.995...",0.995996,"[0.7814569536423841, 0.7961538461538461, 0.738...",0.745659,0.0505735
15,NuSVC,"{'cache_size': 200, 'class_weight': None, 'coe...","[0.9031339031339032, 0.914804469273743, 0.9138...",0.910859,"[0.7086092715231788, 0.8153846153846154, 0.734...",0.745198,0.0454495
21,XGBClassifier,"{'objective': 'binary:logistic', 'use_label_en...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.0,"[0.7847682119205298, 0.7769230769230769, 0.711...",0.74481,0.0422862
11,BernoulliNB,"{'alpha': 1.0, 'binarize': 0.0, 'class_prior':...","[0.751661918328585, 0.7462756052141527, 0.7583...",0.757365,"[0.7847682119205298, 0.8576923076923076, 0.738...",0.740362,0.0707076
19,LinearDiscriminantAnalysis,"{'n_components': None, 'priors': None, 'shrink...","[0.8214624881291548, 0.813780260707635, 0.8244...",0.823003,"[0.7251655629139073, 0.8038461538461539, 0.707...",0.738225,0.040957
9,SGDClassifier,"{'alpha': 0.0001, 'average': False, 'class_wei...","[0.7844254510921178, 0.7337057728119181, 0.743...",0.748754,"[0.7947019867549668, 0.7384615384615385, 0.753...",0.735834,0.0260729


In [74]:
MLA_Uuu.to_excel("Exhale.xlsx") 

### Without Maintaining Gender Ratio

In [17]:
def train_test_set(Asthmatic_Female, Asthmatic_Male, Healthy_Female, Healthy_Male, before_after):
    test_set = np.concatenate((np.random.choice(np.concatenate((np.array(Asthmatic_Male), np.array(Asthmatic_Female))), replace=False, size=(14)), 
np.random.choice(np.concatenate((np.array(Healthy_Male), np.array(Healthy_Female))), replace=False, size=(14))))
    train_set = np.concatenate((np.array(Asthmatic_Male),
                                np.array(Asthmatic_Female),
                                np.array(Healthy_Male),
                                np.array(Healthy_Female)))
    train_set = np.setdiff1d(train_set,test_set)
#     np.array(before_after).shape
    train_set = np.concatenate((train_set, np.array(before_after)))
    return train_set, test_set

In [20]:
MLA_Wheeze_without = MLA_selection(Wheeze_dataframe)



In [36]:
MLA_Wheeze_without.to_excel('./Wheeze_without_ratio.xlsx')