In [33]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error,r2_score,explained_variance_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
#%matplotlib inline

In [74]:
class RegressionModels():


    def scores(self,y_test,y_pred, model):
        print("Variance Score : " , explained_variance_score(y_test, y_pred))
        print("R2 Score : " ,r2_score(y_test,y_pred))
        print("Root Mean Square : ",math.sqrt(mean_squared_error(y_test, y_pred)))
        print("Best Parameters : ", model.best_params_ )

    def svmRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------SVM Regression Starts------------\n\n ")
        clf = SVR()
        svm_grid = GridSearchCV(clf, params, verbose=False, cv=3,return_train_score=True)
        svm_grid.fit(X_train,y_train)
        svm_predict = svm_grid.predict(X_test)
        print("Best Parameters : ", svm_grid.best_params_ )
        self.scores(Y_test, svm_predict, svm_grid)
        print("\n-----------SVM Regression ends------------\n\n ")

    def decisionTreeRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Decision Tree Regression Starts------------\n\n")
        DTregressor = DecisionTreeRegressor(random_state=0)
        DT_grid = GridSearchCV(DTregressor, params, verbose=False, cv=3,return_train_score=True)
        DT_grid.fit(X_train,y_train)
        DT_predict = DT_grid.predict(X_test)
        self.scores(Y_test, DT_predict, DT_grid)
        print("\n-----------Decision Tree Regression Ends------------\n\n")

    def randomForestRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Random Forest Regression Starts------------\n\n")
        RFRegressor = RandomForestRegressor(random_state=0)
        RF_grid = GridSearchCV(RFRegressor, params, verbose=False, cv=3, return_train_score=True)
        RF_grid.fit(X_train,y_train)
        RF_predict = RF_grid.predict(X_test)
        self.scores(Y_test, RF_predict, RF_grid)
        print("\n-----------Random Forest Regression Ends------------\n\n")
  
    def adaBoostRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------AdaBoost Regression Starts------------\n\n")
        AdaRegressor = AdaBoostRegressor(random_state=0)
        adaBoost_grid = GridSearchCV(AdaRegressor, params, verbose=False, cv=3,return_train_score=True)
        adaBoost_grid.fit(X_train,y_train)
        adaBoost_predict = adaBoost_grid.predict(X_test)
        self.scores(Y_test, adaBoost_predict, adaBoost_grid)
        print("\n-----------AdaBoost Regression Ends------------\n\n")

    def gaussianProcessRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------GaussianProcess Regression Starts------------\n\n")
        GPRRegressor = GaussianProcessRegressor(random_state=0)
        GPR_grid = GridSearchCV(GPRRegressor, params, verbose=False, cv=3,return_train_score=True)
        GPR_grid.fit(X_train,y_train)
        GPR_predict = GPR_grid.predict(X_test)
        self.scores(Y_test, GPR_predict, GPR_grid)
        print("\n-----------GaussianProcess Regression Ends------------\n\n")

    def LinearRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Linear Regression Starts------------\n\n")
        LinearRegressor = LinearRegression()
        linearRegression_grid = GridSearchCV(LinearRegressor, params, verbose=False, cv=3,return_train_score=True)
        linearRegression_grid.fit(X_train,y_train)
        linearRegression_predict = linearRegression_grid.predict(X_test)
        self.scores(Y_test, linearRegression_predict, linearRegression_grid)
        print("\n-----------Linear Regression Ends------------\n\n")

    def mlpRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Neural Network Regression Starts------------\n\n")
        MLPRegressor_obj = MLPRegressor(random_state=0)
        MLPRegressor_grid = GridSearchCV(MLPRegressor_obj, params, verbose=False, cv=3,return_train_score=True)
        MLPRegressor_grid.fit(X_train,y_train)
        MLPRegressor_predict = MLPRegressor_grid.predict(X_test)
        self.scores(Y_test, MLPRegressor_predict, MLPRegressor_grid)
        print("\n-----------Neural Network Regression Ends------------\n\n")
    

    def train_split(self,X,y,test_size=0.2,random_state=0):
        X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
        return X_train,X_test,y_train,y_test
 
    def train_all__models(self, X, y):
        svm_regression_params =  { 'C' : np.logspace(0, 3, 4), 'gamma' : np.logspace(-2, 1, 4)}
        dt_params = {'max_depth' : np.arange(1, 10, 10),'min_samples_split': np.arange(0.1, 1.0, 10)}
        rd_params = {'n_estimators' : np.arange(10,100,10),'max_depth' : np.arange(1,6,2)}
        ada_params = {'n_estimators' : np.arange(10,100,10)}
        gpr_params = {'n_restarts_optimizer' : np.arange(1,10,1)}
        linear_params = {'n_jobs' : np.arange(1,5,1)}
        mlp_params = {'hidden_layer_sizes': np.arange(30,150,20),'learning_rate': ['constant','invscaling','adaptive'],'max_iter': np.arange(20,200,50)}
        X_train,X_test,y_train,y_test = self.train_split(X,y)
        self.svmRegression(X_train,X_test,y_train,y_test, svm_regression_params)
        self.decisionTreeRegression(X_train,X_test,y_train,y_test, dt_params)
        self.randomForestRegression(X_train,X_test,y_train,y_test, rd_params)
        self.adaBoostRegression(X_train,X_test,y_train,y_test, ada_params)
        self.gaussianProcessRegression(X_train,X_test,y_train,y_test, gpr_params)
        self.LinearRegression(X_train,X_test,y_train,y_test, linear_params)
        self.mlpRegression(X_train,X_test,y_train,y_test, mlp_params)
    
    def speech_data(self):
        col = ['Subject_id','local_jitter','absolute_jitter','rap_jitter','ppq5_jitter','ddp_jitter','local_shimmer',
                       'db_shimmer','apq3_shimmer','apq5_shimmer','apq11_shimmer','dda_shimmer','AC','NTH','HTN','Median_pitch',
                       'Mean_pitch','Standard_deviation','Minimum_pitch','Maximum_pitch','Number_of_pulses','Number_of_periods',
                       'Mean_period','Standard_deviation_of_period','Fraction_of_locally_unvoiced_frames','Number_of_voice_breaks',
                       'Degree_of_voice_breaks','UPDRS','class_info']
        data = pd.read_csv("../Datasets/Parkinson_Multiple_Sound_Recording/Prakinson_Multiple_sound_recording_train_data.txt")
        data.columns=col

        data.columns = data.columns.str.lstrip()
        a = StandardScaler()
        X = data.drop(['class_info'],axis=1)
        X = a.fit_transform(X)
        y = data['class_info']
        self.train_all__models(X, y.values.ravel())
    
    def concrete_data(self):
        col = ['Cement','Blast Furnace Slag','Fly Ash','Water','Superplasticizer','Coarse Aggregate','Fine Aggregate',
                       'Age','Concrete compressive strength']
        data  = pd.read_excel("../Datasets/concrete_data/Concrete_compressive_strength_Data.xls",skiprows=1)
        data.columns=col

        data.columns = data.columns.str.lstrip()
        a = StandardScaler()
        X = data.drop(['Concrete compressive strength'],axis=1)
        X = a.fit_transform(X)
        y = data['Concrete compressive strength']
        self.train_all__models(X, y.values.ravel())
        
    def student_data(self):
    
        data  = pd.read_csv("../Datasets/Student_performance/student1/student-mat.csv",delimiter=";")

        categorical_columns = ['school','sex','famsize','address','famsize','Pstatus','Mjob','Fjob','reason','guardian','schoolsup',
                               'famsup','paid','activities','nursery','higher','internet','romantic']
        for i in categorical_columns:
            data[i] = pd.Categorical(data[i]).codes

        data.columns = data.columns.str.lstrip()
        a = StandardScaler()
        X = data.drop(['G2','G3'],axis=1)
        X = a.fit_transform(X)
        y = data['G1']
        self.train_all__models(X, y.values.ravel())

In [2]:
col = ['Subject_id','local_jitter','absolute_jitter','rap_jitter','ppq5_jitter','ddp_jitter','local_shimmer','db_shimmer','apq3_shimmer','apq5_shimmer','apq11_shimmer','dda_shimmer','AC','NTH','HTN','Median_pitch','Mean_pitch','Standard_deviation','Minimum_pitch','Maximum_pitch','Number_of_pulses','Number_of_periods','Mean_period','Standard_deviation_of_period','Fraction_of_locally_unvoiced_frames','Number_of_voice_breaks','Degree_of_voice_breaks','UPDRS','class_info']
print(len(col))

29


In [73]:
def train_all__models(self, X, y):
    svm_regression_params =  { 'C' : np.logspace(0, 3, 4), 'gamma' : np.logspace(-2, 1, 4)}
    dt_params = {'max_depth' : np.arange(1, 10, 10),'min_samples_split': np.arange(0.1, 1.0, 10)}
    rd_params = {'n_estimators' : np.arange(10,100,10),'max_depth' : np.arange(1,6,2)}
    ada_params = {'n_estimators' : np.arange(10,100,10)}
    gpr_params = {'n_restarts_optimizer' : np.arange(1,10,1)}
    linear_params = {'n_jobs' : np.arange(1,5,1)}
    mlp_params = {'hidden_layer_sizes': np.arange(30,150,20),'learning_rate': ['constant','invscaling','adaptive'],'max_iter': np.arange(20,200,50)}
    X_train,X_test,y_train,y_test = self.train_split(X,y)



def speech_data(self):
    col = ['Subject_id','local_jitter','absolute_jitter','rap_jitter','ppq5_jitter','ddp_jitter','local_shimmer',
                   'db_shimmer','apq3_shimmer','apq5_shimmer','apq11_shimmer','dda_shimmer','AC','NTH','HTN','Median_pitch',
                   'Mean_pitch','Standard_deviation','Minimum_pitch','Maximum_pitch','Number_of_pulses','Number_of_periods',
                   'Mean_period','Standard_deviation_of_period','Fraction_of_locally_unvoiced_frames','Number_of_voice_breaks',
                   'Degree_of_voice_breaks','UPDRS','class_info']
    data = pd.read_csv("../Datasets/Parkinson_Multiple_Sound_Recording/Prakinson_Multiple_sound_recording_train_data.txt")
    data.columns=col

    data.columns = data.columns.str.lstrip()
    a = StandardScaler()
    X = data.drop(['class_info'],axis=1)
    X = a.fit_transform(X)
    y = data['class_info']
    self.train_all__models(X, y.values.ravel())

def concrete_data(self):
    col = ['Cement','Blast Furnace Slag','Fly Ash','Water','Superplasticizer','Coarse Aggregate','Fine Aggregate',
                   'Age','Concrete compressive strength']
    data  = pd.read_excel("../Datasets/concrete_data/Concrete_compressive_strength_Data.xls",skiprows=1)
    data.columns=col

    data.columns = data.columns.str.lstrip()
    a = StandardScaler()
    X = data.drop(['Concrete compressive strength'],axis=1)
    X = a.fit_transform(X)
    y = data['Concrete compressive strength']
    self.train_all__models(X, y.values.ravel())
    
def student_data(self):
    
    data  = pd.read_csv("../Datasets/Student_performance/student1/student-mat.csv",delimiter=";")
    
    categorical_columns = ['school','sex','famsize','address','famsize','Pstatus','Mjob','Fjob','reason','guardian','schoolsup',
                           'famsup','paid','activities','nursery','higher','internet','romantic']
    for i in categorical_columns:
        data[i] = pd.Categorical(data[i]).codes

    data.columns = data.columns.str.lstrip()
    a = StandardScaler()
    X = data.drop(['G2','G3'],axis=1)
    X = a.fit_transform(X)
    y1 = data['G1']
    y2 = data['G2']
    
    self.train_all__models(X, y2.values.ravel())
    
    


In [70]:
data  = pd.read_csv("../Datasets/Student_performance/student1/student-mat.csv",delimiter=";")

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


In [None]:
regressionModels = RegressionModels()
#regressionModels.speech_data()
#regressionModels.concrete_data()
regressionModels.student_data()


-----------SVM Regression Starts------------

 
Best Parameters :  {'C': 10.0, 'gamma': 0.01}
Variance Score :  0.9901079475787757
R2 Score :  0.9900998809706156
Root Mean Square :  0.35475128335196515
Best Parameters :  {'C': 10.0, 'gamma': 0.01}

-----------SVM Regression ends------------

 
-----------Decision Tree Regression Starts------------


Variance Score :  0.7662233284401789
R2 Score :  0.7661943902322993
Root Mean Square :  1.7239760600579943
Best Parameters :  {'max_depth': 1, 'min_samples_split': 0.1}

-----------Decision Tree Regression Ends------------


-----------Random Forest Regression Starts------------


Variance Score :  0.9999306157367287
R2 Score :  0.9999289352932027
Root Mean Square :  0.030055970197912472
Best Parameters :  {'max_depth': 5, 'n_estimators': 20}

-----------Random Forest Regression Ends------------


-----------AdaBoost Regression Starts------------


Variance Score :  0.9873915101810066
R2 Score :  0.9858993187018266
Root Mean Square :  0.423









In [39]:
data.head(n=5)


Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [None]:
def sound_data_preprocessing():
    col = ['Subject_id','local_jitter','absolute_jitter','rap_jitter','ppq5_jitter','ddp_jitter','local_shimmer','db_shimmer','apq3_shimmer','apq5_shimmer','apq11_shimmer','dda_shimmer','AC','NTH','HTN','Median_pitch','Mean_pitch','Standard_deviation','Minimum_pitch','Maximum_pitch','Number_of_pulses','Number_of_periods','Mean_period','Standard_deviation_of_period','Fraction_of_locally_unvoiced_frames','Number_of_voice_breaks','Degree_of_voice_breaks','UPDRS','class_info']
    data = pd.read_csv("adult.data")

In [None]:
def Adult_data_preprocessing():
    col = ['age','workclass','fnlwgt','education','education-num','marital-status','occupation','relationship','race','sex',
       'capital-gain','capital-loss','hours-per-week','native-country','salary']
    data = pd.read_csv("adult.data")
    categorical_columns = ['workclass','education','marital-status','occupation','relationship','race','sex','native-country','salary']
    for i in categorical_columns:
    data[i] = pd.Categorical(data[i]).codes
    data.columns = data.columns.str.lstrip()
    a = StandardScaler()
    X = data.drop(['salary'],axis=1)
    X = a.fit_transform(X)
    y = data['salary']
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.3)

In [26]:
entrypoint = entryPoint()
temp = entrypoint.Yeast_Category()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Yeast_Category')])
Final_report = pd.concat([Final_report,temp])


Knn Grid Search Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


SVM Classification Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Decisiontree Classifier Starting...
randomForest Classifier Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


AdaBoost Classifier Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Logistic Regression classification Starting...






  'precision', 'predicted', average, warn_for)


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


GaussianNaiveBaive Classifier Starting... 
NeuralNetworks Classifier Starting...




  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [3]:
entrypoint = entryPoint()

#GIRISH
temp = entrypoint.diabetic_retinopaty()
best_report = entrypoint.getbest(temp,'Diabetic Retinopathy')
Final_report = temp
temp = entrypoint.Breast_Cancer_Wisconsin()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Breast_Cancer_Wisconsin')])
Final_report = pd.concat([Final_report,temp])


##Gursimran SIngh
temp = entrypoint.thoratic()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Thoracic Surgery Data')])
Final_report = pd.concat([Final_report,temp])

temp = entrypoint.seismicbumps()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Seismic-Bumps')])
Final_report = pd.concat([Final_report,temp])

temp = entrypoint.steel_plates_faults()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Steel_Plates_Faults')])
Final_report = pd.concat([Final_report,temp])

##Aravind
temp = entrypoint.Adults_Salary()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Adult_Salary')])
Final_report = pd.concat([Final_report,temp])

temp = entrypoint.Yeast_Category()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Yeast_Category')])
Final_report = pd.concat([Final_report,temp])

##Darshan
temp = entrypoint.creditCardDataset()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'Credit_Card')])
Final_report = pd.concat([Final_report,temp])

temp = entrypoint.australianCredit()
best_report = pd.concat([best_report,entryplloint.getbest(temp,'Australia_Credit')])
Final_report = pd.concat([Final_report,temp])

temp = entrypoint.germanCredit()
best_report = pd.concat([best_report,entrypoint.getbest(temp,'German_Credit')])
Final_report = pd.concat([Final_report,temp])


print('Best Report')
best_report
print("  ")
print('Final Report')
Final_report



Before removing outliers , rows -  1146
After removing outliers , rows - 1080
Number of records deleted -  66
Normalization done
Knn Grid Search Starting...
SVM Classification Starting...
Decisiontree Classifier Starting...
randomForest Classifier Starting...
AdaBoost Classifier Starting...
Logistic Regression classification Starting...






GaussianNaiveBaive Classifier Starting... 
NeuralNetworks Classifier Starting...






Before removing outliers , rows -  568
After removing outliers , rows - 494
Number of records deleted -  74
Normalization done
Knn Grid Search Starting...
SVM Classification Starting...
Decisiontree Classifier Starting...
randomForest Classifier Starting...
AdaBoost Classifier Starting...
Logistic Regression classification Starting...






GaussianNaiveBaive Classifier Starting... 
NeuralNetworks Classifier Starting...






Normalization done
Knn Grid Search Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


SVM Classification Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Decisiontree Classifier Starting...
randomForest Classifier Starting...
AdaBoost Classifier Starting...
Logistic Regression classification Starting...






GaussianNaiveBaive Classifier Starting... 
NeuralNetworks Classifier Starting...




  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Normalization done
Knn Grid Search Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


SVM Classification Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Decisiontree Classifier Starting...
randomForest Classifier Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


AdaBoost Classifier Starting...
Logistic Regression classification Starting...






  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


GaussianNaiveBaive Classifier Starting... 
NeuralNetworks Classifier Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Normalization done
Knn Grid Search Starting...
SVM Classification Starting...
Decisiontree Classifier Starting...
randomForest Classifier Starting...
AdaBoost Classifier Starting...


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Logistic Regression classification Starting...












GaussianNaiveBaive Classifier Starting... 
NeuralNetworks Classifier Starting...






FileNotFoundError: [Errno 2] File b'../Datasets/Adult_Salary_Data/adult.data' does not exist: b'../Datasets/Adult_Salary_Data/adult.data'

In [27]:
best_report

Unnamed: 0,Train Accuracy,Test Accuracy,Precision,Recall,F1 score
Diabetic Retinopathy - KNN,0.739418,0.638889,0.638665,0.638889,0.638478
Breast_Cancer_Wisconsin - KNN,0.985507,0.986577,0.986826,0.986577,0.98648
Thoracic Surgery Data - Random Forest,0.869301,0.822695,0.74327,0.822695,0.766915
Seismic-Bumps - SVM,0.948009,0.929124,0.906655,0.929124,0.915487
Steel_Plates_Faults - KNN,0.840206,0.763293,0.768286,0.763293,0.761135
Adult_Salary - Decision Tree,0.911811,0.866196,0.860622,0.866196,0.860221
Yeast_Category - Decision Tree,0.993256,0.620225,0.607276,0.620225,0.603029


In [20]:
Final_report


Unnamed: 0,Unnamed: 1,Train Accuracy,Test Accuracy,Precision,Recall,F1 score
Diabetic Retinopathy,Logistic Regression,0.685185,0.58642,0.587011,0.58642,0.586514
Diabetic Retinopathy,KNN,0.739418,0.638889,0.638665,0.638889,0.638478
Diabetic Retinopathy,SVM,0.732804,0.611111,0.615868,0.611111,0.602806
Diabetic Retinopathy,Decision Tree,0.828042,0.604938,0.609105,0.604938,0.603671
Diabetic Retinopathy,Random Forest,0.730159,0.58642,0.593794,0.58642,0.582708
Diabetic Retinopathy,Adaboost,0.638889,0.58642,0.591306,0.58642,0.584428
Diabetic Retinopathy,GaussionNB,0.544974,0.611111,0.67584,0.611111,0.563047
Diabetic Retinopathy,Nueral Network,0.650794,0.614198,0.626056,0.614198,0.608928
Breast_Cancer_Wisconsin,Logistic Regression,0.976812,0.959732,0.960263,0.959732,0.959122
Breast_Cancer_Wisconsin,KNN,0.985507,0.986577,0.986826,0.986577,0.98648


In [19]:
best_report



Unnamed: 0,Train Accuracy,Test Accuracy,Precision,Recall,F1 score
Diabetic Retinopathy - KNN,0.739418,0.638889,0.638665,0.638889,0.638478
Breast_Cancer_Wisconsin - KNN,0.985507,0.986577,0.986826,0.986577,0.98648
Thoracic Surgery Data - Random Forest,0.844985,0.858156,0.833179,0.858156,0.809913
Seismic-Bumps - Logistic Regression,0.936394,0.935567,0.908555,0.935567,0.90686
Steel_Plates_Faults - KNN,0.856406,0.758148,0.763924,0.758148,0.755617
