# 0.0 Imports

In [1]:
import json
import os
import random

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

## 0.1 Loading dataset

In [2]:
path_home = os.path.dirname(os.getcwd())
# Dados de Treino
path_X_traning = os.path.join(path_home, 'data', 'X_training.csv')
path_y_traning = os.path.join(path_home, 'data', 'y_training.csv')

x_training  = pd.read_csv(path_X_traning)
y_training = pd.read_csv(path_y_traning)

# Dados de Validação
path_X_validation = os.path.join(path_home, 'data', 'X_validation.csv')
path_y_validation = os.path.join(path_home, 'data', 'y_validation.csv')

x_validation  = pd.read_csv(path_X_validation)
y_validation = pd.read_csv(path_y_validation)

# Dados de Teste
path_X_test = os.path.join(path_home, 'data', 'X_test.csv')
path_y_test = os.path.join(path_home, 'data', 'y_test.csv')

x_test  = pd.read_csv(path_X_test)
y_test = pd.read_csv(path_y_test)

In [3]:
x_training.head()

Unnamed: 0,id,customer_type,age,class,flight_distance,inflight_wifi_service,departure_arrival_time_convenient,ease_of_online_booking,gate_location,food_and_drink,...,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_in_minutes,arrival_delay_in_minutes,gender_Female,gender_Male,type_of_travel_business_travel,type_of_travel_personal_travel
0,13508,1,0.5,0.0,0.03958,0.6,0.6,0.6,0.6,1.0,...,0.5,1.0,0.6,0.4,0.0,0.013848,1.0,0.0,1.0,0.0
1,28874,1,0.24359,0.0,0.205775,0.6,0.4,0.4,0.4,0.6,...,0.5,0.5,0.2,0.6,0.0,0.0,0.0,1.0,1.0,0.0
2,21484,0,0.435897,1.0,0.026858,0.6,0.6,0.6,0.2,1.0,...,0.0,1.0,0.6,1.0,0.0,0.0,1.0,0.0,1.0,0.0
3,48280,1,0.589744,0.5,0.041397,0.6,1.0,0.6,0.6,0.8,...,0.0,1.0,0.4,0.4,0.029499,0.020772,1.0,0.0,0.0,1.0
4,472,0,0.423077,1.0,0.016559,0.2,0.2,0.2,0.8,0.6,...,1.0,0.75,0.8,0.6,0.021632,0.019782,0.0,1.0,1.0,0.0


## 0.2 Helpe Function

In [4]:
def get_metrics(model, x_data, y_data):
    model.fit(x_data, y_data.values.ravel()) 
    yhat_model = model.predict(x_data)
    metrics = {
        'accuracy': accuracy_score(y_data, yhat_model),
        'precision_score': precision_score(y_data, yhat_model),
        'recall_score': recall_score(y_data, yhat_model),
        'f1_score': f1_score(y_data, yhat_model),
    }
    return metrics

def classifier_evaluation(model_classifier, param, data):
    x_data, y_data = data['x'], data['y']
    metric = pd.DataFrame()
    for i in range(1, len(param)+1):
        model = model_classifier(**param)
        metrics = get_metrics(model, x_data, y_data)
        metric = pd.DataFrame({
            'name': model.__class__.__name__ ,
            **metrics,
            'param': json.dumps(param)
        }, index=[0])
    
    return metric

# data_train = {'x': x_training, 'y': y_training}
# data_validation = {'x': x_validation, 'y': y_validation}
# data_test = {'x': x_test, 'y': y_test}


# 2.0 Training

In [5]:
data_train = {'x': x_training, 'y': y_training}

## 2.1 KNN

In [6]:
resul_knn = pd.DataFrame()
model = KNeighborsClassifier

for i in range(2, 20, 1):
    param = {
        'n_neighbors': i
    }

    result = classifier_evaluation(model, param, data_train)

    resul_knn = pd.concat([resul_knn,result]).reset_index(drop=True)

In [7]:
resul_knn

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,KNeighborsClassifier,0.840199,1.0,0.631284,0.773972,"{""n_neighbors"": 2}"
1,KNeighborsClassifier,0.832186,0.812008,0.79741,0.804643,"{""n_neighbors"": 3}"
2,KNeighborsClassifier,0.784665,0.85267,0.608247,0.710012,"{""n_neighbors"": 4}"
3,KNeighborsClassifier,0.781562,0.755893,0.732563,0.744046,"{""n_neighbors"": 5}"
4,KNeighborsClassifier,0.757471,0.792164,0.597047,0.680904,"{""n_neighbors"": 6}"
5,KNeighborsClassifier,0.756312,0.731249,0.69209,0.711131,"{""n_neighbors"": 7}"
6,KNeighborsClassifier,0.742177,0.762429,0.588488,0.66426,"{""n_neighbors"": 8}"
7,KNeighborsClassifier,0.739999,0.713814,0.667844,0.690064,"{""n_neighbors"": 9}"
8,KNeighborsClassifier,0.729973,0.741647,0.578465,0.649971,"{""n_neighbors"": 10}"
9,KNeighborsClassifier,0.729559,0.704652,0.647321,0.674771,"{""n_neighbors"": 11}"


## 2.2 Decision Tree

In [8]:
resul_dt = pd.DataFrame()
model = DecisionTreeClassifier

for i in range(2, 20, 1):
    param = {
        'max_depth': i
    }

    result = classifier_evaluation(model, param, data_train)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [9]:
resul_dt

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,DecisionTreeClassifier,0.857092,0.829749,0.843293,0.836466,"{""max_depth"": 2}"
1,DecisionTreeClassifier,0.884162,0.839506,0.905912,0.871446,"{""max_depth"": 3}"
2,DecisionTreeClassifier,0.893443,0.923308,0.822451,0.869966,"{""max_depth"": 4}"
3,DecisionTreeClassifier,0.906764,0.907732,0.87368,0.890381,"{""max_depth"": 5}"
4,DecisionTreeClassifier,0.922816,0.901864,0.922267,0.911951,"{""max_depth"": 6}"
5,DecisionTreeClassifier,0.934483,0.952659,0.893216,0.92198,"{""max_depth"": 7}"
6,DecisionTreeClassifier,0.942715,0.950363,0.915648,0.932683,"{""max_depth"": 8}"
7,DecisionTreeClassifier,0.949528,0.961784,0.920103,0.940482,"{""max_depth"": 9}"
8,DecisionTreeClassifier,0.955471,0.963754,0.932321,0.947777,"{""max_depth"": 10}"
9,DecisionTreeClassifier,0.961043,0.969225,0.939958,0.954367,"{""max_depth"": 11}"


## 2.3 Random Forest

In [10]:
resul_rf = pd.DataFrame()
model = RandomForestClassifier
para = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 3, 4, 5]
    }

for i in range(2, 20, 1):
    param = {
        'n_estimators': random.choice(para['n_estimators']),
        'max_depth': random.choice(para['max_depth'])
    }

    result = classifier_evaluation(model, param, data_train)


    resul_rf = pd.concat([resul_rf,result]).reset_index(drop=True)

In [11]:
resul_rf

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,RandomForestClassifier,0.91006,0.910692,0.878643,0.894381,"{""n_estimators"": 300, ""max_depth"": 4}"
1,RandomForestClassifier,0.925519,0.920455,0.906485,0.913416,"{""n_estimators"": 100, ""max_depth"": 5}"
2,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 100, ""max_depth"": null}"
3,RandomForestClassifier,0.911549,0.911848,0.881093,0.896207,"{""n_estimators"": 300, ""max_depth"": 4}"
4,RandomForestClassifier,0.923616,0.919527,0.902762,0.911067,"{""n_estimators"": 300, ""max_depth"": 5}"
5,RandomForestClassifier,0.925202,0.919799,0.906453,0.913077,"{""n_estimators"": 200, ""max_depth"": 5}"
6,RandomForestClassifier,0.92334,0.918038,0.903812,0.910869,"{""n_estimators"": 200, ""max_depth"": 5}"
7,RandomForestClassifier,0.908943,0.912608,0.873552,0.892653,"{""n_estimators"": 300, ""max_depth"": 4}"
8,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 200, ""max_depth"": null}"
9,RandomForestClassifier,0.895497,0.894528,0.860316,0.877088,"{""n_estimators"": 300, ""max_depth"": 3}"


## 2.4 Logistic Regression

In [12]:
resul_rf = pd.DataFrame()
model = LogisticRegression
para = {
        'C': [0.1, 0.5, 1.0],
        'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
    }

for i in range(2, 20, 1):
    param = {
        'C': random.choice(para['C']),
        'solver': random.choice(para['solver']),
    }

    result = classifier_evaluation(model, param, data_train)


    resul_rf = pd.concat([resul_rf,result]).reset_index(drop=True)

  alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
  ret = line_search_wolfe2(
  alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
  ret = line_search_wolfe2(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  alpha_star, phi_star, old_fval, derphi_star = scalar

In [13]:
resul_rf

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,LogisticRegression,0.876191,0.871738,0.837565,0.85431,"{""C"": 0.1, ""solver"": ""newton-cg""}"
1,LogisticRegression,0.8513,0.837083,0.815642,0.826224,"{""C"": 0.1, ""solver"": ""lbfgs""}"
2,LogisticRegression,0.793174,0.729226,0.831551,0.777034,"{""C"": 0.5, ""solver"": ""liblinear""}"
3,LogisticRegression,0.793174,0.729226,0.831551,0.777034,"{""C"": 0.5, ""solver"": ""liblinear""}"
4,LogisticRegression,0.876191,0.871738,0.837565,0.85431,"{""C"": 0.1, ""solver"": ""newton-cg""}"
5,LogisticRegression,0.876081,0.871606,0.837438,0.85418,"{""C"": 1.0, ""solver"": ""newton-cholesky""}"
6,LogisticRegression,0.876191,0.871738,0.837565,0.85431,"{""C"": 0.1, ""solver"": ""newton-cg""}"
7,LogisticRegression,0.8513,0.837083,0.815642,0.826224,"{""C"": 0.1, ""solver"": ""lbfgs""}"
8,LogisticRegression,0.566724,0.588235,0.000955,0.001906,"{""C"": 1.0, ""solver"": ""saga""}"
9,LogisticRegression,0.566669,0.524272,0.001718,0.003425,"{""C"": 0.1, ""solver"": ""sag""}"


# 3.0 validation

In [14]:
data_validation = {'x': x_validation, 'y': y_validation}

## 3.1 KNN

In [15]:
resul_knn = pd.DataFrame()
model = KNeighborsClassifier

for i in range(2, 20, 1):
    param = {
        'n_neighbors': i
    }

    result = classifier_evaluation(model, param, data_validation)

    resul_knn = pd.concat([resul_knn,result]).reset_index(drop=True)

In [16]:
resul_knn

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,KNeighborsClassifier,0.820136,1.0,0.584973,0.738149,"{""n_neighbors"": 2}"
1,KNeighborsClassifier,0.815535,0.795448,0.773183,0.784157,"{""n_neighbors"": 3}"
2,KNeighborsClassifier,0.766884,0.842128,0.568713,0.678928,"{""n_neighbors"": 4}"
3,KNeighborsClassifier,0.766563,0.745225,0.701017,0.722445,"{""n_neighbors"": 5}"
4,KNeighborsClassifier,0.739503,0.781456,0.55379,0.648214,"{""n_neighbors"": 6}"
5,KNeighborsClassifier,0.737701,0.71535,0.655654,0.684202,"{""n_neighbors"": 7}"
6,KNeighborsClassifier,0.719489,0.747165,0.53315,0.62227,"{""n_neighbors"": 8}"
7,KNeighborsClassifier,0.716078,0.692021,0.621427,0.654827,"{""n_neighbors"": 9}"
8,KNeighborsClassifier,0.703208,0.721394,0.513475,0.599931,"{""n_neighbors"": 10}"
9,KNeighborsClassifier,0.700505,0.678232,0.587794,0.629783,"{""n_neighbors"": 11}"


## 3.2 Decision Tree

In [17]:
resul_dt = pd.DataFrame()
model = DecisionTreeClassifier

for i in range(2, 20, 1):
    param = {
        'max_depth': i
    }

    result = classifier_evaluation(model, param, data_validation)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [18]:
resul_dt

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,DecisionTreeClassifier,0.862801,0.858869,0.817804,0.837834,"{""max_depth"": 2}"
1,DecisionTreeClassifier,0.88555,0.840572,0.908159,0.873059,"{""max_depth"": 3}"
2,DecisionTreeClassifier,0.889475,0.937173,0.7985,0.862297,"{""max_depth"": 4}"
3,DecisionTreeClassifier,0.904437,0.894788,0.883362,0.889038,"{""max_depth"": 5}"
4,DecisionTreeClassifier,0.920976,0.903969,0.914841,0.909373,"{""max_depth"": 6}"
5,DecisionTreeClassifier,0.933267,0.9546,0.888262,0.920237,"{""max_depth"": 7}"
6,DecisionTreeClassifier,0.941826,0.963511,0.899844,0.93059,"{""max_depth"": 8}"
7,DecisionTreeClassifier,0.949065,0.962203,0.918554,0.939872,"{""max_depth"": 9}"
8,DecisionTreeClassifier,0.956788,0.970803,0.928206,0.949026,"{""max_depth"": 10}"
9,DecisionTreeClassifier,0.963705,0.972799,0.942609,0.957466,"{""max_depth"": 11}"


## 3.3 Random Forest

In [19]:
resul_rf = pd.DataFrame()
model = RandomForestClassifier
para = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 3, 4, 5]
    }

for i in range(2, 20, 1):
    param = {
        'n_estimators': random.choice(para['n_estimators']),
        'max_depth': random.choice(para['max_depth'])
    }

    result = classifier_evaluation(model, param, data_validation)


    resul_rf = pd.concat([resul_rf,result]).reset_index(drop=True)

In [20]:
resul_rf

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 200, ""max_depth"": null}"
1,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 100, ""max_depth"": null}"
2,RandomForestClassifier,0.886708,0.897348,0.833989,0.864509,"{""n_estimators"": 100, ""max_depth"": 3}"
3,RandomForestClassifier,0.907076,0.906118,0.876383,0.891002,"{""n_estimators"": 200, ""max_depth"": 4}"
4,RandomForestClassifier,0.925384,0.918104,0.908902,0.91348,"{""n_estimators"": 100, ""max_depth"": 5}"
5,RandomForestClassifier,0.892532,0.897372,0.849135,0.872587,"{""n_estimators"": 200, ""max_depth"": 3}"
6,RandomForestClassifier,0.999968,1.0,0.999926,0.999963,"{""n_estimators"": 100, ""max_depth"": null}"
7,RandomForestClassifier,0.928183,0.921904,0.9115,0.916673,"{""n_estimators"": 300, ""max_depth"": 5}"
8,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 300, ""max_depth"": null}"
9,RandomForestClassifier,0.895428,0.893796,0.861014,0.877099,"{""n_estimators"": 300, ""max_depth"": 3}"


## 3.4 Logistic Regression

In [None]:
resul_rf = pd.DataFrame()
model = LogisticRegression
para = {
        'C': [0.1, 0.5, 1.0],
        'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
    }

for i in range(2, 20, 1):
    param = {
        'C': random.choice(para['C']),
        'solver': random.choice(para['solver']),
    }

    result = classifier_evaluation(model, param, data_validation)


    resul_rf = pd.concat([resul_rf,result]).reset_index(drop=True)

In [22]:
resul_rf

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,LogisticRegression,0.873323,0.86786,0.834806,0.851012,"{""C"": 0.1, ""solver"": ""newton-cg""}"
1,LogisticRegression,0.648637,0.631189,0.455268,0.528986,"{""C"": 0.1, ""solver"": ""liblinear""}"
2,LogisticRegression,0.829917,0.780105,0.846017,0.811725,"{""C"": 0.5, ""solver"": ""lbfgs""}"
3,LogisticRegression,0.873934,0.868167,0.836068,0.851815,"{""C"": 1.0, ""solver"": ""newton-cholesky""}"
4,LogisticRegression,0.873323,0.86786,0.834806,0.851012,"{""C"": 0.1, ""solver"": ""newton-cg""}"
5,LogisticRegression,0.873645,0.868019,0.835474,0.851436,"{""C"": 0.5, ""solver"": ""newton-cg""}"
6,LogisticRegression,0.566878,0.833333,0.000742,0.001484,"{""C"": 0.1, ""solver"": ""saga""}"
7,LogisticRegression,0.648637,0.631189,0.455268,0.528986,"{""C"": 0.5, ""solver"": ""liblinear""}"
8,LogisticRegression,0.873323,0.86786,0.834806,0.851012,"{""C"": 0.1, ""solver"": ""newton-cg""}"
9,LogisticRegression,0.873323,0.86786,0.834806,0.851012,"{""C"": 0.1, ""solver"": ""newton-cg""}"


# 4.0 Test

In [23]:
data_test = {'x': x_test, 'y': y_test}

## 4.1 KNN

In [24]:
resul_knn = pd.DataFrame()
model = KNeighborsClassifier

for i in range(2, 20, 1):
    param = {
        'n_neighbors': i
    }

    result = classifier_evaluation(model, param, data_test)

    resul_knn = pd.concat([resul_knn,result]).reset_index(drop=True)

In [25]:
resul_knn

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,KNeighborsClassifier,0.816707,1.0,0.582402,0.736099,"{""n_neighbors"": 2}"
1,KNeighborsClassifier,0.812768,0.795878,0.771227,0.783359,"{""n_neighbors"": 3}"
2,KNeighborsClassifier,0.763836,0.842332,0.568324,0.678716,"{""n_neighbors"": 4}"
3,KNeighborsClassifier,0.759317,0.738811,0.698636,0.718162,"{""n_neighbors"": 5}"
4,KNeighborsClassifier,0.732901,0.778864,0.546678,0.642436,"{""n_neighbors"": 6}"
5,KNeighborsClassifier,0.732051,0.712163,0.653762,0.681714,"{""n_neighbors"": 7}"
6,KNeighborsClassifier,0.714286,0.746489,0.528553,0.618896,"{""n_neighbors"": 8}"
7,KNeighborsClassifier,0.712007,0.691569,0.620678,0.654208,"{""n_neighbors"": 9}"
8,KNeighborsClassifier,0.699842,0.725946,0.507875,0.597639,"{""n_neighbors"": 10}"
9,KNeighborsClassifier,0.69652,0.677426,0.589089,0.630177,"{""n_neighbors"": 11}"


## 4.2 Decision Tree

In [26]:
resul_dt = pd.DataFrame()
model = DecisionTreeClassifier

for i in range(2, 20, 1):
    param = {
        'max_depth': i
    }

    result = classifier_evaluation(model, param, data_test)

    resul_dt = pd.concat([resul_dt,result]).reset_index(drop=True)

In [27]:
resul_dt

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,DecisionTreeClassifier,0.860426,0.858876,0.816102,0.836943,"{""max_depth"": 2}"
1,DecisionTreeClassifier,0.883868,0.840088,0.908315,0.87287,"{""max_depth"": 3}"
2,DecisionTreeClassifier,0.888271,0.903045,0.835108,0.867749,"{""max_depth"": 4}"
3,DecisionTreeClassifier,0.904801,0.90308,0.877255,0.88998,"{""max_depth"": 5}"
4,DecisionTreeClassifier,0.919863,0.899811,0.919842,0.909716,"{""max_depth"": 6}"
5,DecisionTreeClassifier,0.932453,0.950356,0.892741,0.920648,"{""max_depth"": 7}"
6,DecisionTreeClassifier,0.943962,0.951704,0.918962,0.935046,"{""max_depth"": 8}"
7,DecisionTreeClassifier,0.952497,0.956244,0.934536,0.945265,"{""max_depth"": 9}"
8,DecisionTreeClassifier,0.959255,0.963663,0.942719,0.953076,"{""max_depth"": 10}"
9,DecisionTreeClassifier,0.965859,0.968278,0.953454,0.960809,"{""max_depth"": 11}"


## 4.3 Random Forest

In [28]:
resul_rf = pd.DataFrame()
model = RandomForestClassifier
para = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 3, 4, 5]
    }

for i in range(2, 20, 1):
    param = {
        'n_estimators': random.choice(para['n_estimators']),
        'max_depth': random.choice(para['max_depth'])
    }

    result = classifier_evaluation(model, param, data_test)


    resul_rf = pd.concat([resul_rf,result]).reset_index(drop=True)

In [29]:
resul_rf

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,RandomForestClassifier,0.913992,0.913934,0.887637,0.900594,"{""n_estimators"": 300, ""max_depth"": 4}"
1,RandomForestClassifier,0.897617,0.900239,0.862297,0.880859,"{""n_estimators"": 300, ""max_depth"": 3}"
2,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 100, ""max_depth"": null}"
3,RandomForestClassifier,0.928398,0.920283,0.916234,0.918254,"{""n_estimators"": 300, ""max_depth"": 5}"
4,RandomForestClassifier,0.897656,0.899075,0.863792,0.881081,"{""n_estimators"": 300, ""max_depth"": 3}"
5,RandomForestClassifier,1.0,1.0,1.0,1.0,"{""n_estimators"": 300, ""max_depth"": null}"
6,RandomForestClassifier,0.892056,0.894349,0.855081,0.874275,"{""n_estimators"": 200, ""max_depth"": 3}"
7,RandomForestClassifier,0.911212,0.910523,0.884646,0.897398,"{""n_estimators"": 100, ""max_depth"": 4}"
8,RandomForestClassifier,0.928011,0.920957,0.914474,0.917704,"{""n_estimators"": 100, ""max_depth"": 5}"
9,RandomForestClassifier,0.914765,0.915442,0.887813,0.901416,"{""n_estimators"": 200, ""max_depth"": 4}"


## 4.4 Logistic Regression

In [None]:
resul_rf = pd.DataFrame()
model = LogisticRegression
para = {
        'C': [0.1, 0.5, 1.0],
        'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
    }

for i in range(2, 20, 1):
    param = {
        'C': random.choice(para['C']),
        'solver': random.choice(para['solver']),
    }

    result = classifier_evaluation(model, param, data_test)


    resul_rf = pd.concat([resul_rf,result]).reset_index(drop=True)

In [31]:
resul_rf

Unnamed: 0,name,accuracy,precision_score,recall_score,f1_score,param
0,LogisticRegression,0.561078,0.5,0.000528,0.001055,"{""C"": 0.1, ""solver"": ""sag""}"
1,LogisticRegression,0.803538,0.740352,0.85077,0.79173,"{""C"": 0.5, ""solver"": ""liblinear""}"
2,LogisticRegression,0.847372,0.83431,0.813902,0.82398,"{""C"": 0.1, ""solver"": ""lbfgs""}"
3,LogisticRegression,0.561156,1.0,0.000176,0.000352,"{""C"": 1.0, ""solver"": ""saga""}"
4,LogisticRegression,0.561078,0.5,0.000528,0.001055,"{""C"": 0.1, ""solver"": ""sag""}"
5,LogisticRegression,0.561156,1.0,0.000176,0.000352,"{""C"": 0.1, ""solver"": ""saga""}"
6,LogisticRegression,0.803769,0.740508,0.85121,0.792009,"{""C"": 0.1, ""solver"": ""liblinear""}"
7,LogisticRegression,0.803538,0.740352,0.85077,0.79173,"{""C"": 1.0, ""solver"": ""liblinear""}"
8,LogisticRegression,0.561078,0.5,0.000528,0.001055,"{""C"": 0.5, ""solver"": ""sag""}"
9,LogisticRegression,0.561156,1.0,0.000176,0.000352,"{""C"": 0.1, ""solver"": ""saga""}"
