In [5]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
import timeit
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import pickle
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import warnings
import os


def training(X_train, Y_train):
        #We are going to create our machine learning models with the classifiers
        #Classifiers
        #This is a list of models and each of them is going to be a classifier
        models=[]
        models.append(("Tree",DecisionTreeClassifier()))
        models.append(("KNN",KNeighborsClassifier()))
        models.append(("LDiscrimination",LinearDiscriminantAnalysis()))
        models.append(("NB",GaussianNB()))
        models.append(("SVM",SVC(gamma="auto")))
        models.append(("LRegression",LogisticRegression(solver="liblinear",multi_class="ovr")))
        models.append(("RandomForest",RandomForestClassifier()))
        models.append(("GradientBoosting",GradientBoostingClassifier()))
        models.append(("AdaBoost",AdaBoostClassifier()))
        models.append(("XGBoost",XGBClassifier()))
        models.append(("NNet",MLPClassifier(random_state=1, max_iter=300)))
        #models.append(("OneRule",StackingClassifier()))
        #This list will accumulate the results
        results=[]
        names = []
        times=[]

        for name, model in models:
                #Normally you divide the training data in 10 blocks (or n blocks) and you use 9 for training and one
                #for testing, then you change the blocks 10 times and you choose form the 10 models that you have 
                #created the best one. This reduces overfitting
                start=timeit.default_timer()
                cv_fold= StratifiedKFold(n_splits=10,random_state=1,shuffle=True)
                cv_results= cross_val_score(model, X_train,Y_train,cv=cv_fold, scoring="accuracy")
                model.fit(X_train,Y_train)
                stop=timeit.default_timer()
                results.append(cv_results)
                names.append(name)
                times.append(stop-start)
                print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
                print("Time: ",stop-start)
        return results, names, times,models

#Set up for the dierctories
os.mkdir("models")
os.mkdir("results")
os.mkdir("times")


iris = datasets.load_iris()
X = iris.data
Y = iris.target
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, shuffle=True)
rIris,nIris,tIris,mIris=training(X_train,Y_train)
with open(r'classifiersNames.txt', 'w') as fp:
        for name in nIris:
                fp.write("%s\n" % name)

for i,model in enumerate(mIris):
       pickle.dump(model, open('models/Iris_model_'+nIris[i], 'wb'))
       pickle.dump(rIris[i], open('results/Iris_results_'+nIris[i], 'wb'))
       pickle.dump(tIris[i], open('times/Iris_times_'+nIris[i], 'wb'))


Tree: 0.975000 (0.038188)
Time:  0.02744751493446529
KNN: 0.966667 (0.040825)
Time:  0.045788997784256935
LDiscrimination: 0.991667 (0.025000)
Time:  0.027688296046108007
NB: 0.975000 (0.038188)
Time:  0.02403884194791317
SVM: 0.975000 (0.038188)
Time:  0.02959879720583558
LRegression: 0.958333 (0.055902)
Time:  0.03229916701093316




RandomForest: 0.975000 (0.038188)
Time:  1.6336350950878114
GradientBoosting: 0.966667 (0.055277)
Time:  2.124810194130987




AdaBoost: 0.975000 (0.038188)
Time:  0.7635212840978056
XGBoost: 0.950000 (0.055277)
Time:  0.5702299061231315




NNet: 0.975000 (0.038188)
Time:  1.2610962509643286




In [17]:
for model in mIris:
    predictions=model[1].predict(X_test)
    print(str(model[0]) + ": " + str(sum(predictions == Y_test)/sum(Y_test)))

Tree: 0.7647058823529411
KNN: 0.8235294117647058
LDiscrimination: 0.8235294117647058
NB: 0.7941176470588235
SVM: 0.8529411764705882
LRegression: 0.7941176470588235
RandomForest: 0.7941176470588235
GradientBoosting: 0.7941176470588235
AdaBoost: 0.7647058823529411
XGBoost: 0.7941176470588235
NNet: 0.8529411764705882
