# Classification

In [1]:
import openml
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
import time

### Iris Dataset

In [2]:
iris = openml.datasets.get_dataset(61)
iris_x, iris_y, iris_cat_ind, iris_att_names = iris.get_data(
    target = iris.default_target_attribute, 
    dataset_format="dataframe"
)
print(iris_att_names, iris_cat_ind)
print(iris_y.dtype)
print(iris_x.isnull().sum(), iris_y.isnull().sum())

['sepallength', 'sepalwidth', 'petallength', 'petalwidth'] [False, False, False, False]
category
sepallength    0
sepalwidth     0
petallength    0
petalwidth     0
dtype: int64 0


In [3]:
iris_x_nor = (iris_x-iris_x.min())/(iris_x.max()-iris_x.min())
iris_y_num = LabelEncoder().fit_transform(iris_y)

In [4]:
iris_x_train, iris_x_test, iris_y_train, iris_y_test =  train_test_split(
    iris_x_nor, 
    iris_y_num, 
    test_size = 0.3, 
    random_state = 42
)

### Wine Dataset

In [5]:
wine = openml.datasets.get_dataset(187)
wine_x, wine_y, wine_cat_ind, wine_att_names = wine.get_data(
    target = wine.default_target_attribute, 
    dataset_format="dataframe"
)
print(wine_att_names, wine_cat_ind)
print(wine_y.dtype)
print(wine_x.isnull().sum(), wine_y.isnull().sum())

['Alcohol', 'Malic_acid', 'Ash', 'Alcalinity_of_ash', 'Magnesium', 'Total_phenols', 'Flavanoids', 'Nonflavanoid_phenols', 'Proanthocyanins', 'Color_intensity', 'Hue', 'OD280%2FOD315_of_diluted_wines', 'Proline'] [False, False, False, False, False, False, False, False, False, False, False, False, False]
category
Alcohol                           0
Malic_acid                        0
Ash                               0
Alcalinity_of_ash                 0
Magnesium                         0
Total_phenols                     0
Flavanoids                        0
Nonflavanoid_phenols              0
Proanthocyanins                   0
Color_intensity                   0
Hue                               0
OD280%2FOD315_of_diluted_wines    0
Proline                           0
dtype: int64 0


In [6]:
wine_x_nor = (wine_x-wine_x.min())/(wine_x.max()-wine_x.min())
wine_y_num = LabelEncoder().fit_transform(wine_y)

In [7]:
wine_x_train, wine_x_test, wine_y_train, wine_y_test =  train_test_split(
    wine_x_nor, 
    wine_y_num, 
    test_size = 0.3, 
    random_state = 42
)

### Haberman Dataset

In [8]:
hman = openml.datasets.get_dataset(43)
hman_x, hman_y, hman_cat_ind, hman_att_names = hman.get_data(
    target = hman.default_target_attribute, 
    dataset_format="dataframe")
print(hman_att_names, hman_cat_ind)
print(hman_y.dtype)
print(hman_x.isnull().sum(), hman_y.isnull().sum())

['Age_of_patient_at_time_of_operation', 'Patients_year_of_operation', 'Number_of_positive_axillary_nodes_detected'] [False, True, False]
category
Age_of_patient_at_time_of_operation           0
Patients_year_of_operation                    0
Number_of_positive_axillary_nodes_detected    0
dtype: int64 0


In [9]:
col = ['Age_of_patient_at_time_of_operation', 'Number_of_positive_axillary_nodes_detected']
hman_x_nor = (hman_x[col]-hman_x[col].min())/(hman_x[col].max()-hman_x[col].min())
hman_x_nor['Patients_year_of_operation'] = LabelEncoder().fit_transform(hman_x['Patients_year_of_operation'])

In [10]:
hman_y_num = LabelEncoder().fit_transform(hman_y)

In [11]:
hman_x_train, hman_x_test, hman_y_train, hman_y_test =  train_test_split(
    hman_x_nor, 
    hman_y_num, 
    test_size = 0.3, 
    random_state = 42
)

### Glass Dataset

In [12]:
glass = openml.datasets.get_dataset(41)
glass_x, glass_y, glass_cat_ind, glass_att_names = glass.get_data(
    target = glass.default_target_attribute, 
    dataset_format="dataframe")
print(glass_att_names, glass_cat_ind)
print(glass_y.dtype)
print(glass_x.isnull().sum(), glass_y.isnull().sum())

['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe'] [False, False, False, False, False, False, False, False, False]
category
RI    0
Na    0
Mg    0
Al    0
Si    0
K     0
Ca    0
Ba    0
Fe    0
dtype: int64 0


In [13]:
glass_x_nor = (glass_x-glass_x.min())/(glass_x.max()-glass_x.min())
glass_y_num = LabelEncoder().fit_transform(glass_y)

In [14]:
glass_x_train, glass_x_test, glass_y_train, glass_y_test =  train_test_split(
    glass_x_nor, 
    glass_y_num, 
    test_size = 0.3, 
    random_state = 42
)

### Satellite Image Dataset

In [15]:
simg = openml.datasets.get_dataset(294)
simg_x, simg_y, simg_cat_ind, simg_att_names = simg.get_data(
    target = simg.default_target_attribute, 
    dataset_format="dataframe")
print(simg_att_names, simg_cat_ind)
print(simg_y.dtype)
print(simg_x.isnull().sum())
print(simg_y.isnull().sum())

['attr1', 'attr2', 'attr3', 'attr4', 'attr5', 'attr6', 'attr7', 'attr8', 'attr9', 'attr10', 'attr11', 'attr12', 'attr13', 'attr14', 'attr15', 'attr16', 'attr17', 'attr18', 'attr19', 'attr20', 'attr21', 'attr22', 'attr23', 'attr24', 'attr25', 'attr26', 'attr27', 'attr28', 'attr29', 'attr30', 'attr31', 'attr32', 'attr33', 'attr34', 'attr35', 'attr36'] [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]
uint8
attr1     0
attr2     0
attr3     0
attr4     0
attr5     0
attr6     0
attr7     0
attr8     0
attr9     0
attr10    0
attr11    0
attr12    0
attr13    0
attr14    0
attr15    0
attr16    0
attr17    0
attr18    0
attr19    0
attr20    0
attr21    0
attr22    0
attr23    0
attr24    0
attr25    0
attr26    0
attr27    0
attr28    0
attr29    0
attr30    0
attr31    0
attr32    0
attr3

In [16]:
simg_x_nor = (simg_x-simg_x.min())/(simg_x.max()-simg_x.min())
simg_y_num = LabelEncoder().fit_transform(simg_y)

In [17]:
simg_x_train, simg_x_test, simg_y_train, simg_y_test =  train_test_split(
    simg_x_nor, 
    simg_y_num, 
    test_size = 0.3, 
    random_state = 42
)

### Libras Move Dataset

In [18]:
lm = openml.datasets.get_dataset(299)
lm_x, lm_y, lm_cat_ind, lm_att_names = lm.get_data(
    target = lm.default_target_attribute, 
    dataset_format="dataframe")
print(lm_att_names, lm_cat_ind)
print(lm_y.dtype)
print(lm_x.isnull().sum().sum())
print(lm_y.isnull().sum())

['xcoord1', 'ycoord1', 'xcoord2', 'ycoord2', 'xcoord3', 'ycoord3', 'xcoord4', 'ycoord4', 'xcoord5', 'ycoord5', 'xcoord6', 'ycoord6', 'xcoord7', 'ycoord7', 'xcoord8', 'ycoord8', 'xcoord9', 'ycoord9', 'xcoord10', 'ycoord10', 'xcoord11', 'ycoord11', 'xcoord12', 'ycoord12', 'xcoord13', 'ycoord13', 'xcoord14', 'ycoord14', 'xcoord15', 'ycoord15', 'xcoord16', 'ycoord16', 'xcoord17', 'ycoord17', 'xcoord18', 'ycoord18', 'xcoord19', 'ycoord19', 'xcoord20', 'ycoord20', 'xcoord21', 'ycoord21', 'xcoord22', 'ycoord22', 'xcoord23', 'ycoord23', 'xcoord24', 'ycoord24', 'xcoord25', 'ycoord25', 'xcoord26', 'ycoord26', 'xcoord27', 'ycoord27', 'xcoord28', 'ycoord28', 'xcoord29', 'ycoord29', 'xcoord30', 'ycoord30', 'xcoord31', 'ycoord31', 'xcoord32', 'ycoord32', 'xcoord33', 'ycoord33', 'xcoord34', 'ycoord34', 'xcoord35', 'ycoord35', 'xcoord36', 'ycoord36', 'xcoord37', 'ycoord37', 'xcoord38', 'ycoord38', 'xcoord39', 'ycoord39', 'xcoord40', 'ycoord40', 'xcoord41', 'ycoord41', 'xcoord42', 'ycoord42', 'xcoord43

In [19]:
lm_x_nor = (lm_x-lm_x.min())/(lm_x.max()-lm_x.min())
lm_y_num = LabelEncoder().fit_transform(lm_y)

In [20]:
lm_x_train, lm_x_test, lm_y_train, lm_y_test =  train_test_split(
    lm_x_nor, 
    lm_y_num, 
    test_size = 0.3, 
    random_state = 42
)

In [21]:
Datasets = ['iris', 'wine', 'haberman', 'glass', 'satellite_image', 'libras_move']

## K - Nearest Neighbors 

In [22]:
knn_par = {
    'n_neighbors' : [3, 5, 7],
    'weights' : ['uniform', 'distance'],
    'p' : [1, 2]
}

In [23]:
def knn (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    KNN = KNeighborsClassifier()
    knn_grid = GridSearchCV(KNN, parameter, refit = True, n_jobs=-1, cv=5)
    knn_grid.fit(x_train, y_train)
    opt_par = knn_grid.best_params_
    pred = knn_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [24]:
iris_knn = knn(iris_x_train, iris_y_train, iris_x_test, iris_y_test, knn_par)

In [25]:
wine_knn = knn(wine_x_train, wine_y_train, wine_x_test, wine_y_test, knn_par)

In [26]:
hman_knn = knn(hman_x_train, hman_y_train, hman_x_test, hman_y_test, knn_par)

In [27]:
glass_knn = knn(glass_x_train, glass_y_train, glass_x_test, glass_y_test, knn_par)

In [28]:
simg_knn = knn(simg_x_train, simg_y_train, simg_x_test, simg_y_test, knn_par)

In [29]:
lm_knn = knn(lm_x_train, lm_y_train, lm_x_test, lm_y_test, knn_par)

In [30]:
knn_results = [iris_knn, wine_knn, hman_knn, glass_knn, simg_knn, lm_knn]

In [31]:
knn_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [32]:
for i in range(0, len(knn_results)):
    knn_df = knn_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : knn_results[i][0],
        'Accuracy' : knn_results[i][2],
        'f1 Score' : knn_results[i][3],
        'Execution Time' : knn_results[i][4]
    }, ignore_index = True
    )

In [33]:
knn_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'n_neighbors': 7, 'p': 1, 'weights': 'uniform'}",1.0,1.0,7.157332
1,wine,"{'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}",0.944444,0.94378,0.453321
2,haberman,"{'n_neighbors': 7, 'p': 1, 'weights': 'uniform'}",0.717391,0.647669,0.411791
3,glass,"{'n_neighbors': 7, 'p': 1, 'weights': 'distance'}",0.738462,0.720334,0.402285
4,satellite_image,"{'n_neighbors': 5, 'p': 2, 'weights': 'distance'}",0.902123,0.902812,8.31032
5,libras_move,"{'n_neighbors': 3, 'p': 2, 'weights': 'distance'}",0.842593,0.837243,0.665472


In [34]:
knn_df.to_csv('KNN_Results.csv')

## Logistic Regression

In [34]:
lr_par = {
    'penalty' : ['l2'],
    'C' : [0.1, 1.0, 10.0],
    'solver' : ['newton-cg', 'lbfgs', 'sag', 'saga'],
    'max_iter' : [100, 200, 300]
}

In [35]:
def lr (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    LR = LogisticRegression()
    lr_grid = GridSearchCV(LR, parameter, refit = True, n_jobs=-1, cv=5)
    lr_grid.fit(x_train, y_train)
    opt_par = lr_grid.best_params_
    pred = lr_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [36]:
iris_lr = lr(iris_x_train, iris_y_train, iris_x_test, iris_y_test, lr_par)

In [37]:
wine_lr = lr(wine_x_train, wine_y_train, wine_x_test, wine_y_test, lr_par)

In [38]:
hman_lr = lr(hman_x_train, hman_y_train, hman_x_test, hman_y_test, lr_par)

In [39]:
glass_lr = lr(glass_x_train, glass_y_train, glass_x_test, glass_y_test, lr_par)

In [40]:
simg_lr = lr(simg_x_train, simg_y_train, simg_x_test, simg_y_test, lr_par)



In [41]:
lm_lr = lr(lm_x_train, lm_y_train, lm_x_test, lm_y_test, lr_par)



In [42]:
lr_results = [iris_lr, wine_lr, hman_lr, glass_lr, simg_lr, lm_lr]

In [43]:
lr_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [44]:
for i in range(0, len(lr_results)):
    lr_df = lr_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : lr_results[i][0],
        'Accuracy' : lr_results[i][2],
        'f1 Score' : lr_results[i][3],
        'Execution Time' : lr_results[i][4]
    }, ignore_index = True
    )

In [45]:
lr_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'C': 1.0, 'max_iter': 100, 'penalty': 'l2', '...",0.911111,0.908956,1.876832
1,wine,"{'C': 10.0, 'max_iter': 100, 'penalty': 'l2', ...",0.981481,0.981575,1.728228
2,haberman,"{'C': 10.0, 'max_iter': 100, 'penalty': 'l2', ...",0.73913,0.685729,1.715718
3,glass,"{'C': 10.0, 'max_iter': 100, 'penalty': 'l2', ...",0.584615,0.559651,2.157548
4,satellite_image,"{'C': 10.0, 'max_iter': 100, 'penalty': 'l2', ...",0.847229,0.83894,68.162218
5,libras_move,"{'C': 10.0, 'max_iter': 200, 'penalty': 'l2', ...",0.740741,0.734748,33.771809


In [63]:
lr_df.to_csv('LR_Results.csv')

## Random Forest

In [22]:
rf_par = {
    'n_estimators' : [10, 30, 50],
    'criterion' : ['gini', 'entropy'],
    'max_depth' : [5, 10, 20],
    'min_samples_split' : [5, 8],
    'min_samples_leaf' : [2, 3, 5],
    'max_features' : ['sqrt', 'log2']
}

In [23]:
def rf (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    RF = RandomForestClassifier()
    rf_grid = GridSearchCV(RF, parameter, refit = True, n_jobs=-1, cv=5)
    rf_grid.fit(x_train, y_train)
    opt_par = rf_grid.best_params_
    pred = rf_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [24]:
iris_rf = rf(iris_x_train, iris_y_train, iris_x_test, iris_y_test, rf_par)

In [25]:
wine_rf = rf(wine_x_train, wine_y_train, wine_x_test, wine_y_test, rf_par)

In [26]:
hman_rf = rf(hman_x_train, hman_y_train, hman_x_test, hman_y_test, rf_par)

In [27]:
glass_rf = rf(glass_x_train, glass_y_train, glass_x_test, glass_y_test, rf_par)

In [28]:
simg_rf = rf(simg_x_train, simg_y_train, simg_x_test, simg_y_test, rf_par)

In [29]:
lm_rf = rf(lm_x_train, lm_y_train, lm_x_test, lm_y_test, rf_par)

In [30]:
rf_results = [iris_rf, wine_rf, hman_rf, glass_rf, simg_rf, lm_rf]

In [31]:
rf_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [32]:
for i in range(0, len(rf_results)):
    rf_df = rf_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : rf_results[i][0],
        'Accuracy' : rf_results[i][2],
        'f1 Score' : rf_results[i][3],
        'Execution Time' : rf_results[i][4]
    }, ignore_index = True
    )

In [33]:
rf_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'criterion': 'gini', 'max_depth': 5, 'max_fea...",1.0,1.0,56.670052
1,wine,"{'criterion': 'entropy', 'max_depth': 5, 'max_...",0.981481,0.981575,51.132006
2,haberman,"{'criterion': 'gini', 'max_depth': 5, 'max_fea...",0.717391,0.647669,51.681832
3,glass,"{'criterion': 'gini', 'max_depth': 20, 'max_fe...",0.784615,0.78225,60.725462
4,satellite_image,"{'criterion': 'entropy', 'max_depth': 20, 'max...",0.905748,0.903851,184.782552
5,libras_move,"{'criterion': 'entropy', 'max_depth': 20, 'max...",0.796296,0.79912,76.898488


In [34]:
rf_df.to_csv('RF_Results.csv')

## Gaussian Naive Bayes

In [58]:
gnb_par = {}

In [59]:
def gnb (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    GNB = GaussianNB()
    gnb_grid = GridSearchCV(GNB, parameter, refit = True, n_jobs=-1, cv=5)
    gnb_grid.fit(x_train, y_train)
    pred = gnb_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return pred, accuracy, f1score, execution_time

In [60]:
iris_gnb = gnb(iris_x_train, iris_y_train, iris_x_test, iris_y_test, gnb_par)

In [61]:
wine_gnb = gnb(wine_x_train, wine_y_train, wine_x_test, wine_y_test, gnb_par)

In [62]:
glass_gnb = gnb(glass_x_train, glass_y_train, glass_x_test, glass_y_test, gnb_par)

In [63]:
simg_gnb = gnb(simg_x_train, simg_y_train, simg_x_test, simg_y_test, gnb_par)

In [64]:
lm_gnb = gnb(lm_x_train, lm_y_train, lm_x_test, lm_y_test, gnb_par)

In [65]:
gnb_results = [iris_gnb, wine_gnb, glass_gnb, simg_gnb, lm_gnb]

In [66]:
gnb_df = pd.DataFrame({
    'Datasets' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [67]:
Datasets_gnb = ['iris', 'wine', 'glass', 'satellite_image', 'libras_move']

In [68]:
for i in range(0, len(gnb_results)):
    gnb_df = gnb_df.append({
        'Datasets' : Datasets_gnb[i],
        'Accuracy' : gnb_results[i][1],
        'f1 Score' : gnb_results[i][2],
        'Execution Time' : gnb_results[i][3]
    }, ignore_index = True
    )
    

In [69]:
gnb_df

Unnamed: 0,Datasets,Accuracy,f1 Score,Execution Time
0,iris,0.977778,0.977745,0.22616
1,wine,1.0,1.0,0.045032
2,glass,0.446154,0.422613,0.049018
3,satellite_image,0.7913,0.800472,0.305245
4,libras_move,0.601852,0.612932,0.068049


In [89]:
gnb_df.to_csv('GNB_Results.csv')

## XGBoost

In [79]:
xgb_par = {
    'n_estimators' : [10, 20, 50],
    'objective' : ['multi:softprob', 'multi:softmax'],
    'use_label_encoder' : [False],
    'booster' : ['gbtree', 'gblinear', 'dart'],
    'max_depth' : [8, 16, 32],
    'learning_rate' : [0.02, 0.2]
}

In [80]:
def xgb (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    XGB = XGBClassifier()
    xgb_grid = GridSearchCV(XGB, parameter, refit = True, n_jobs=-1, cv=5)
    xgb_grid.fit(x_train, y_train)
    opt_par = xgb_grid.best_params_
    pred = xgb_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [81]:
iris_xgb = xgb(iris_x_train, iris_y_train, iris_x_test, iris_y_test, xgb_par)



In [82]:
wine_xgb = xgb(wine_x_train, wine_y_train, wine_x_test, wine_y_test, xgb_par)



In [85]:
glass_xgb = xgb(glass_x_train, glass_y_train, glass_x_test, glass_y_test, xgb_par)



In [86]:
simg_xgb = xgb(simg_x_train, simg_y_train, simg_x_test, simg_y_test, xgb_par)



In [87]:
lm_xgb = xgb(lm_x_train, lm_y_train, lm_x_test, lm_y_test, xgb_par)

Parameters: { "max_depth" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




In [88]:
xgb_results = [iris_xgb, wine_xgb, glass_xgb, simg_xgb, lm_xgb]

In [89]:
xgb_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [90]:
for i in range(0, len(xgb_results)):
    xgb_df = xgb_df.append({
        'Datasets' : Datasets_gnb[i],
        'Best Parameters' : xgb_results[i][0],
        'Accuracy' : xgb_results[i][2],
        'f1 Score' : xgb_results[i][3],
        'Execution Time' : xgb_results[i][4]
    }, ignore_index = True
    )

In [91]:
xgb_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'booster': 'gbtree', 'learning_rate': 0.02, '...",1.0,1.0,10.607527
1,wine,"{'booster': 'gbtree', 'learning_rate': 0.2, 'm...",0.962963,0.962857,25.708136
2,glass,"{'booster': 'gbtree', 'learning_rate': 0.02, '...",0.738462,0.742867,24.428277
3,satellite_image,"{'booster': 'gbtree', 'learning_rate': 0.2, 'm...",0.910409,0.909266,593.382874
4,libras_move,"{'booster': 'gblinear', 'learning_rate': 0.2, ...",0.657407,0.647371,191.423967


In [93]:
xgb_df.to_csv('XGB_Results.csv')

## LDA

In [35]:
lda_par = [
    {
        'solver' : ['svd'],
        'shrinkage' : [ None]
    },
    {
        'solver' : ['lsqr', 'eigen'],
        'shrinkage' : [ None, 'auto']
    }
        ]

In [36]:
def lda (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    LDA_ = LDA()
    lda_grid = GridSearchCV(LDA_, parameter, refit = True, n_jobs=-1, cv=5)
    lda_grid.fit(x_train, y_train)
    opt_par = lda_grid.best_params_
    pred = lda_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [37]:
iris_lda = lda(iris_x_train, iris_y_train, iris_x_test, iris_y_test, lda_par)

In [38]:
wine_lda = lda(wine_x_train, wine_y_train, wine_x_test, wine_y_test, lda_par)

In [39]:
hman_lda = lda(hman_x_train, hman_y_train, hman_x_test, hman_y_test, lda_par)

In [40]:
glass_lda = lda(glass_x_train, glass_y_train, glass_x_test, glass_y_test, lda_par)

In [41]:
simg_lda = lda(simg_x_train, simg_y_train, simg_x_test, simg_y_test, lda_par)

In [42]:
lm_lda = lda(lm_x_train, lm_y_train, lm_x_test, lm_y_test, lda_par)

In [43]:
lda_results = [iris_lda, wine_lda, hman_lda, glass_lda, simg_lda, lm_lda]

In [44]:
lda_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [45]:
for i in range(0, len(lda_results)):
    lda_df = lda_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : lda_results[i][0],
        'Accuracy' : lda_results[i][2],
        'f1 Score' : lda_results[i][3],
        'Execution Time' : lda_results[i][4]
    }, ignore_index = True
    )

In [46]:
lda_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'shrinkage': None, 'solver': 'svd'}",1.0,1.0,1.81479
1,wine,"{'shrinkage': None, 'solver': 'svd'}",1.0,1.0,0.185135
2,haberman,"{'shrinkage': None, 'solver': 'svd'}",0.73913,0.703583,0.308203
3,glass,"{'shrinkage': 'auto', 'solver': 'lsqr'}",0.6,0.576896,0.226166
4,satellite_image,"{'shrinkage': 'auto', 'solver': 'lsqr'}",0.829622,0.821517,1.251
5,libras_move,"{'shrinkage': 'auto', 'solver': 'lsqr'}",0.694444,0.688528,0.700496


In [48]:
lda_df.to_csv('LDA_Results.csv')

## SVM

In [49]:
svm_par = [
   { 'C' : [0.1, 1.0, 10.0],
    'kernel' : ['linear']
   },
   { 'C' : [0.1, 1.0, 10.0],
    'kernel' : ['poly'],
    'degree' : [2, 3, 4],
    'gamma' : ['scale', 'auto']
   },
   { 'C' : [0.1, 1.0, 10.0],
    'kernel' : ['rbf', 'sigmoid'],
    'gamma' : ['scale', 'auto']
    }
]

In [51]:
def svm (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    SVM = SVC()
    svm_grid = GridSearchCV(SVM, parameter, refit = True, n_jobs=-1, cv=5)
    svm_grid.fit(x_train, y_train)
    opt_par = svm_grid.best_params_
    pred = svm_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [52]:
iris_svm = svm(iris_x_train, iris_y_train, iris_x_test, iris_y_test, svm_par)

In [53]:
wine_svm = svm(wine_x_train, wine_y_train, wine_x_test, wine_y_test, svm_par)

In [54]:
hman_svm = svm(hman_x_train, hman_y_train, hman_x_test, hman_y_test, svm_par)

In [55]:
glass_svm = svm(glass_x_train, glass_y_train, glass_x_test, glass_y_test, svm_par)

In [56]:
simg_svm = svm(simg_x_train, simg_y_train, simg_x_test, simg_y_test, svm_par)

In [57]:
lm_svm = svm(lm_x_train, lm_y_train, lm_x_test, lm_y_test, svm_par)

In [58]:
svm_results = [iris_svm, wine_svm, hman_svm, glass_svm, simg_svm, lm_svm]

In [59]:
svm_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [60]:
for i in range(0, len(svm_results)):
    svm_df = svm_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : svm_results[i][0],
        'Accuracy' : svm_results[i][2],
        'f1 Score' : svm_results[i][3],
        'Execution Time' : svm_results[i][4]
    }, ignore_index = True
    )

In [61]:
svm_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",1.0,1.0,0.961534
1,wine,"{'C': 10.0, 'kernel': 'linear'}",0.981481,0.981575,0.902641
2,haberman,"{'C': 10.0, 'gamma': 'auto', 'kernel': 'rbf'}",0.706522,0.627258,323.526112
3,glass,"{'C': 10.0, 'degree': 4, 'gamma': 'scale', 'ke...",0.646154,0.647832,0.858612
4,satellite_image,"{'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.897462,0.896341,160.840894
5,libras_move,"{'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.87963,0.884631,1.88184


In [62]:
svm_df.to_csv('SVM_Results.csv')

## AdaBoost

In [98]:
ada_par = {
    'n_estimators' : [10, 30, 50, 100],
    'learning_rate' : [0.01, 0.1, 1.0],
    'algorithm' : ['SAMME.R', 'SAMME']
}

In [99]:
def ada (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    ADA = AdaBoostClassifier()
    ada_grid = GridSearchCV(ADA, parameter, refit = True, n_jobs=-1, cv=5)
    ada_grid.fit(x_train, y_train)
    opt_par = ada_grid.best_params_
    pred = ada_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [100]:
iris_ada = ada(iris_x_train, iris_y_train, iris_x_test, iris_y_test, ada_par)

In [101]:
wine_ada = ada(wine_x_train, wine_y_train, wine_x_test, wine_y_test, ada_par)

In [102]:
hman_ada = ada(hman_x_train, hman_y_train, hman_x_test, hman_y_test, ada_par)

In [103]:
glass_ada = ada(glass_x_train, glass_y_train, glass_x_test, glass_y_test, ada_par)

In [104]:
simg_ada = ada(simg_x_train, simg_y_train, simg_x_test, simg_y_test, ada_par)

In [105]:
lm_ada = ada(lm_x_train, lm_y_train, lm_x_test, lm_y_test, ada_par)

In [106]:
ada_results = [iris_ada, wine_ada, hman_ada, glass_ada, simg_ada, lm_ada]

In [107]:
ada_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [108]:
for i in range(0, len(ada_results)):
    ada_df = ada_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : ada_results[i][0],
        'Accuracy' : ada_results[i][2],
        'f1 Score' : ada_results[i][3],
        'Execution Time' : ada_results[i][4]
    }, ignore_index = True
    )

In [109]:
ada_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'algorithm': 'SAMME.R', 'learning_rate': 1.0,...",1.0,1.0,7.914716
1,wine,"{'algorithm': 'SAMME', 'learning_rate': 1.0, '...",0.962963,0.962594,7.213979
2,haberman,"{'algorithm': 'SAMME', 'learning_rate': 1.0, '...",0.73913,0.71726,6.736976
3,glass,"{'algorithm': 'SAMME', 'learning_rate': 1.0, '...",0.553846,0.530994,6.877263
4,satellite_image,"{'algorithm': 'SAMME', 'learning_rate': 0.1, '...",0.781978,0.737011,31.457042
5,libras_move,"{'algorithm': 'SAMME.R', 'learning_rate': 0.1,...",0.444444,0.40547,15.839632


In [110]:
ada_df.to_csv('ADA_Results.csv')

## Gradient Boosting

In [22]:
gb_par = {
   'max_depth' :[3,5,7],
   'n_estimators' : [10, 50, 100],
   'learning_rate' : [0.01, 0.1, 1.0],
   'min_samples_split' : [2, 3, 5], 
   'min_samples_leaf' : [1, 2],
   'max_features' : ['sqrt', 'log2']
}

In [23]:
def gb (x_train, y_train, x_test, y_test, parameter) :
    start_time = time.time()
    GB = GradientBoostingClassifier()
    gb_grid = GridSearchCV(GB, parameter, refit = True, n_jobs=-1, cv=5)
    gb_grid.fit(x_train, y_train)
    opt_par = gb_grid.best_params_
    pred = gb_grid.predict(x_test)
    accuracy = accuracy_score (y_test, pred)
    f1score = f1_score(y_test, pred, average = 'weighted')
    execution_time = time.time() - start_time
    return opt_par, pred, accuracy, f1score, execution_time

In [24]:
iris_gb = gb(iris_x_train, iris_y_train, iris_x_test, iris_y_test, gb_par)

In [25]:
wine_gb = gb(wine_x_train, wine_y_train, wine_x_test, wine_y_test, gb_par)

In [26]:
hman_gb = gb(hman_x_train, hman_y_train, hman_x_test, hman_y_test, gb_par)

In [27]:
glass_gb = gb(glass_x_train, glass_y_train, glass_x_test, glass_y_test, gb_par)

In [28]:
simg_gb = gb(simg_x_train, simg_y_train, simg_x_test, simg_y_test, gb_par)

In [29]:
lm_gb = gb(lm_x_train, lm_y_train, lm_x_test, lm_y_test, gb_par)

In [30]:
gb_results = [iris_gb, wine_gb, hman_gb, glass_gb, simg_gb, lm_gb]

In [31]:
gb_df = pd.DataFrame({
    'Datasets' : [],
    'Best Parameters' : [],
    'Accuracy' : [],
    'f1 Score' : [],
    'Execution Time' : []
})

In [32]:
for i in range(0, len(gb_results)):
    gb_df = gb_df.append({
        'Datasets' : Datasets[i],
        'Best Parameters' : gb_results[i][0],
        'Accuracy' : gb_results[i][2],
        'f1 Score' : gb_results[i][3],
        'Execution Time' : gb_results[i][4]
    }, ignore_index = True
    )

In [33]:
gb_df

Unnamed: 0,Datasets,Best Parameters,Accuracy,f1 Score,Execution Time
0,iris,"{'learning_rate': 0.01, 'max_depth': 3, 'max_f...",1.0,1.0,275.72057
1,wine,"{'learning_rate': 0.01, 'max_depth': 5, 'max_f...",1.0,1.0,278.863936
2,haberman,"{'learning_rate': 0.1, 'max_depth': 5, 'max_fe...",0.73913,0.69526,118.705158
3,glass,"{'learning_rate': 0.1, 'max_depth': 7, 'max_fe...",0.738462,0.745244,418.980913
4,satellite_image,"{'learning_rate': 0.1, 'max_depth': 7, 'max_fe...",0.917141,0.916052,2262.287341
5,libras_move,"{'learning_rate': 0.1, 'max_depth': 7, 'max_fe...",0.75,0.749441,1483.44908


In [34]:
gb_df.to_csv('GB_Results.csv')