In [1]:
%pip install pandas numpy seaborn matplotlib scikit-learn xgboost catboost

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import catboost
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier


import pickle as pkl

import os

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, f1_score, roc_auc_score
from sklearn.preprocessing import MinMaxScaler as Scaler
# import hyperopt

from models import decision_tree, k_nearest, logistic_regression, random_forest, support_vector_cls, xgboost, catboost

In [3]:
FILENAME = 'pyradiomics_extraction_segmentation_no_maskcorrect_with_FS_with_Demographic_Features'

RESULTS_PATH = f'Results without preprocessing/{FILENAME}.csv'
CLASS_LABELS = '../../Data/Patient class labels.csv'
DF_PATH = '../../Data/With Demographic Features/pyradiomics_extraction_segmentation_maskcorrect.csv'
MODEL_PICKLING = f'pyradiomics_extraction_segmentation_maskcorrect.pkl'

In [4]:
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

SEED = 2454259

FEATURE_SCALE = True
CRITERION = 'accuracy'

In [5]:
labels = pd.read_csv(CLASS_LABELS)
features = pd.read_csv(DF_PATH).drop(columns = 'sequence', errors='ignore')

total_features = pd.merge(features, labels, left_on = 'patient', right_on = 'Patient ID').drop(columns = ['Patient ID', 'patient'])

## Train test split

In [6]:
def train_test_val_splits(df, train_ratio = 0.8, val_ratio = 0.2, random_state = 2454259):
   
    val_ratio_adj = val_ratio / (1-train_ratio)

    train_df, val_df = train_test_split(df, train_size = train_ratio, random_state= random_state)
    val_df, test_df = train_test_split(val_df, train_size = val_ratio_adj, random_state= random_state)

    return train_df, val_df, test_df


train_df, val_df, test_df = train_test_val_splits(total_features, TRAIN_RATIO, VAL_RATIO, random_state = SEED)

In [7]:
train_x= train_df.drop(columns = ['ER', 'PR', 'HER2', 'Mol Subtype'])
train_y_er = train_df['ER'].to_numpy()
train_y_pr = train_df['PR'].to_numpy()
train_y_her = train_df['HER2'].to_numpy()
train_y_mol_subtype = train_df['Mol Subtype'].to_numpy()


val_x = val_df.drop(columns = ['ER', 'PR', 'HER2', 'Mol Subtype'])
val_y_er = val_df['ER'].to_numpy()
val_y_pr = val_df['PR'].to_numpy()
val_y_her = val_df['HER2'].to_numpy()
val_y_mol_subtype = val_df['Mol Subtype'].to_numpy()

test_x = test_df.drop(columns = ['ER', 'PR', 'HER2', 'Mol Subtype'])
test_y_er = test_df['ER'].to_numpy()
test_y_pr = test_df['PR'].to_numpy()
test_y_her = test_df['HER2'].to_numpy()
test_y_mol_subtype = test_df['Mol Subtype'].to_numpy()

## Training

In [8]:
if FEATURE_SCALE == True:
    scaler = Scaler()
    scaler.fit(train_x)
    train_x = scaler.transform(train_x)
    val_x = scaler.transform(val_x)
    test_x = scaler.transform(test_x)

## Fitting models

In [9]:
classifications_classes = {
    'ER' : (train_x, train_y_er, val_x, val_y_er, test_x, test_y_er),
    'PR' : (train_x, train_y_pr, val_x, val_y_pr, test_x, test_y_pr),
    'HER2': (train_x, train_y_her, val_x, val_y_her, test_x, test_y_her),
    'Mol Subtype': (train_x, train_y_mol_subtype, val_x, val_y_mol_subtype, test_x, test_y_mol_subtype)
}

In [10]:
def get_metrics(model, x, truey, split):
    pred_y = model.predict(x)
    acc = accuracy_score(truey, pred_y)
    prec = precision_score(truey, pred_y, average = 'weighted')
    rec = recall_score(truey, pred_y, average = 'weighted')
    f1 = f1_score(truey, pred_y, average = 'weighted')

    return {f'{split}_acc': acc, f'{split}_prec': prec, f'{split}_rec': rec, f'{split}_f1': f1}


In [11]:
def get_NB_model(tx, ty, vx, vy, testx, testy, classification = None):
    nb = GaussianNB()
    nb.fit(tx, ty)
    
    train_metrics = get_metrics(nb, tx, ty, 'train')
    val_metrics = get_metrics(nb, vx, vy, 'val')
    test_metrics = get_metrics(nb, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'Naive Bayes'
    if classification is not None:
        result['classification'] = classification

    return result, nb

def get_DT_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get Decision Tree model with metrics'''
    model = decision_tree.get_best_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'Decision Tree'
    if classification is not None:
        result['classification'] = classification

    return result, model

def get_KNN_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get KNN model with metrics'''
    model = k_nearest.get_best_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'KNN'
    if classification is not None:
        result['classification'] = classification

    return result, model

def get_LR_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get Logistic Regression model with metrics'''
    model = logistic_regression.get_best_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'Logistic Regression'
    if classification is not None:
        result['classification'] = classification

    return result, model

def get_RF_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get Random forest model with metrics'''
    model = random_forest.get_best_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'Random Forest'
    if classification is not None:
        result['classification'] = classification

    return result, model

def get_SVM_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get SVM model with metrics'''
    model = support_vector_cls.get_best_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'SVM'
    if classification is not None:
        result['classification'] = classification

    return result, model
    
def get_XGB_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get XGBoost model with metrics'''
    model = xgboost.get_best_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **train_metrics}

    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'XGBoost'
    if classification is not None:
        result['classification'] = classification

    return result, model

def get_catboost_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get CatBoost model with metrics'''
    model = catboost.tune_catboost_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}


    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'CatBoost'
    if classification is not None:
        result['classification'] = classification

    return result, model

In [12]:
from sklearn.model_selection import PredefinedSplit, GridSearchCV
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import catboost
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split


def tune_catboost_hyperparameters(train_x, train_y, val_x, val_y, parameters = None, criterion = 'accuracy'):

    param_grid = {
    'depth': [6, 8, 10, 12],
    'learning_rate': [0.1, 0.2, 0.25],
    'iterations': [150, 200, 250],
    'l2_leaf_reg': [1, 3, 5, 7]
    }

    if parameters is not None:
        param_grid = parameters
    
    split_index = [-1]*len(train_x) + [0]*len(val_x)
    X = np.concatenate((train_x, val_x), axis=0)
    y = np.concatenate((train_y, val_y), axis=0)
    pds = PredefinedSplit(test_fold = split_index)
    model = CatBoostClassifier()


    grid_search = GridSearchCV(model, param_grid = param_grid, cv = pds, scoring = criterion)
    grid_search.fit(X, y)  
    best_params = grid_search.best_params_
    best_model = grid_search.best_estimator_

    # print("Best Hyperparameters: ", best_params)
    # print("Best Score: ", best_model.score(val_x, val_y))
    return best_model 

def get_catboost_model(tx, ty, vx, vy, testx, testy, criterion = 'accuracy', classification = None):
    '''Get CatBoost model with metrics'''
    model = tune_catboost_hyperparameters(tx, ty, vx, vy, criterion = criterion)

    train_metrics = get_metrics(model, tx, ty, 'train')
    val_metrics = get_metrics(model, vx, vy, 'val')
    test_metrics = get_metrics(model, testx, testy, 'test')

    combined_metrics = {**train_metrics, **val_metrics, **test_metrics}


    result = pd.DataFrame(combined_metrics, index = [0])
    result['Algorithm'] = 'CatBoost'
    if classification is not None:
        result['classification'] = classification

    return result, model

In [13]:
classification_results = pd.DataFrame()
models = {}

for key, value in classifications_classes.items():

    tx, ty, vx, vy, testx, testy = value
    
    res_nb, nb = get_NB_model(tx, ty, vx, vy, testx, testy, classification = key)
    res_dt, dt = get_DT_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)
    res_knn, knn = get_KNN_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)
    res_lr, lr = get_LR_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)
    res_rf, rf = get_RF_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)
    res_svm, svm = get_SVM_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)
    res_xgb, xgb = get_XGB_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)
    res_cat, cat = get_catboost_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)

    models[key] = {'Naive Bayes' : nb,
                   'Decision Trees' : dt,
                   'K Nearest Neighbours': knn,
                   'Logistic Regression' : lr,
                   'Random Forest' : rf,
                   'SVM': svm,
                   'XGBoost': xgb,
                   'CatBoost': cat}

    classification_results = pd.concat([classification_results,
                                        res_nb,
                                        res_dt,
                                        res_knn,
                                        res_lr,
                                        res_rf,
                                        res_svm,
                                        res_xgb,
                                        res_cat]) 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0:	learn: 0.6534199	total: 158ms	remaining: 23.5s
1:	learn: 0.6270325	total: 189ms	remaining: 14s
2:	learn: 0.6037863	total: 230ms	remaining: 11.3s
3:	learn: 0.5903683	total: 257ms	remaining: 9.38s
4:	learn: 0.5749164	total: 295ms	remaining: 8.55s
5:	learn: 0.5598126	total: 335ms	remaining: 8.05s
6:	learn: 0.5502690	total: 367ms	remaining: 7.5s
7:	learn: 0.5413311	total: 412ms	remaining: 7.3s
8:	learn: 0.5341481	total: 452ms	remaining: 7.07s
9:	learn: 0.5188086	total: 492ms	remaining: 6.89s
10:	learn: 0.5120935	total: 534ms	remaining: 6.75s
11:	learn: 0.5036422	total: 561ms	remaining: 6.45s
12:	learn: 0.4969619	total: 610ms	remaining: 6.43s
13:	learn: 0.4889711	total: 644ms	remaining: 6.25s
14:	learn: 0.4803131	total: 688ms	remaining: 6.2s
15:	learn: 0.4764947	total: 732ms	remaining: 6.13s
16:	learn: 0.4712581	total: 749ms	remaining: 5.86s
17:	learn: 0.4655427	total: 795ms	remaining: 5.83s
18:	learn: 0.4586919	total: 836ms	remaining: 5.76s
19:	learn: 0.4527140	total: 867ms	remaining: 5

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0:	learn: 0.6711040	total: 87.9ms	remaining: 13.1s
1:	learn: 0.6588436	total: 126ms	remaining: 9.29s
2:	learn: 0.6419281	total: 187ms	remaining: 9.17s
3:	learn: 0.6273895	total: 250ms	remaining: 9.12s
4:	learn: 0.6172747	total: 531ms	remaining: 15.4s
5:	learn: 0.6016400	total: 591ms	remaining: 14.2s
6:	learn: 0.5915377	total: 674ms	remaining: 13.8s
7:	learn: 0.5865440	total: 746ms	remaining: 13.2s
8:	learn: 0.5769021	total: 827ms	remaining: 13s
9:	learn: 0.5703564	total: 916ms	remaining: 12.8s
10:	learn: 0.5640511	total: 1.04s	remaining: 13.1s
11:	learn: 0.5605319	total: 1.1s	remaining: 12.7s
12:	learn: 0.5506148	total: 1.19s	remaining: 12.5s
13:	learn: 0.5443611	total: 1.36s	remaining: 13.2s
14:	learn: 0.5350703	total: 1.43s	remaining: 12.8s
15:	learn: 0.5315549	total: 1.53s	remaining: 12.9s
16:	learn: 0.5271648	total: 1.65s	remaining: 12.9s
17:	learn: 0.5220090	total: 1.69s	remaining: 12.4s
18:	learn: 0.5171860	total: 1.77s	remaining: 12.2s
19:	learn: 0.5131080	total: 1.97s	remaining

  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please als

0:	learn: 0.6347558	total: 64.6ms	remaining: 9.62s
1:	learn: 0.5944960	total: 184ms	remaining: 13.6s
2:	learn: 0.5616575	total: 239ms	remaining: 11.7s
3:	learn: 0.5400317	total: 264ms	remaining: 9.63s
4:	learn: 0.5065073	total: 334ms	remaining: 9.69s
5:	learn: 0.4929085	total: 365ms	remaining: 8.77s
6:	learn: 0.4797933	total: 421ms	remaining: 8.59s
7:	learn: 0.4699000	total: 464ms	remaining: 8.24s
8:	learn: 0.4553335	total: 535ms	remaining: 8.38s
9:	learn: 0.4427275	total: 605ms	remaining: 8.47s
10:	learn: 0.4379017	total: 659ms	remaining: 8.32s
11:	learn: 0.4275685	total: 712ms	remaining: 8.19s
12:	learn: 0.4190006	total: 767ms	remaining: 8.09s
13:	learn: 0.4166019	total: 832ms	remaining: 8.08s
14:	learn: 0.4076879	total: 875ms	remaining: 7.87s
15:	learn: 0.4055521	total: 894ms	remaining: 7.48s
16:	learn: 0.3994840	total: 947ms	remaining: 7.41s
17:	learn: 0.3930025	total: 1s	remaining: 7.35s
18:	learn: 0.3837222	total: 1.06s	remaining: 7.33s
19:	learn: 0.3783688	total: 1.12s	remaining

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS 

0:	learn: 1.3022990	total: 110ms	remaining: 16.4s
1:	learn: 1.2386528	total: 165ms	remaining: 12.2s
2:	learn: 1.1831987	total: 207ms	remaining: 10.1s
3:	learn: 1.1426398	total: 255ms	remaining: 9.3s
4:	learn: 1.1074969	total: 290ms	remaining: 8.42s
5:	learn: 1.0815348	total: 312ms	remaining: 7.48s
6:	learn: 1.0595309	total: 425ms	remaining: 8.68s
7:	learn: 1.0353221	total: 470ms	remaining: 8.33s
8:	learn: 1.0164405	total: 507ms	remaining: 7.93s
9:	learn: 0.9982535	total: 555ms	remaining: 7.78s
10:	learn: 0.9858212	total: 603ms	remaining: 7.62s
11:	learn: 0.9689285	total: 645ms	remaining: 7.41s
12:	learn: 0.9550899	total: 685ms	remaining: 7.22s
13:	learn: 0.9452752	total: 733ms	remaining: 7.12s
14:	learn: 0.9377242	total: 778ms	remaining: 7s
15:	learn: 0.9265294	total: 825ms	remaining: 6.91s
16:	learn: 0.9198154	total: 867ms	remaining: 6.78s
17:	learn: 0.9113642	total: 910ms	remaining: 6.67s
18:	learn: 0.9001538	total: 954ms	remaining: 6.58s
19:	learn: 0.8917906	total: 1s	remaining: 6.5

In [None]:
classification_results
cols = classification_results.columns.tolist()

cols = cols[-2:] + cols[:-2]
classification_results = classification_results[cols]

classification_results.to_csv("mask_correct_segmentation.csv", index = False)


with open(MODEL_PICKLING, 'wb') as file:
    pkl.dump(models, file)

In [None]:
classification_results

Unnamed: 0,train_acc,train_prec,train_rec,train_f1,val_acc,val_prec,val_rec,val_f1,test_acc,test_prec,test_rec,test_f1,Algorithm,classification
0,0.271318,0.703956,0.271318,0.140923,0.246377,0.436594,0.246377,0.109871,0.266187,0.435918,0.266187,0.136023,Naive Bayes,ER
0,0.792248,0.775812,0.792248,0.772159,0.775362,0.755261,0.775362,0.759328,0.719424,0.665908,0.719424,0.670372,Decision Tree,ER
0,0.784496,0.772322,0.784496,0.744835,0.797101,0.788877,0.797101,0.758286,0.71223,0.641422,0.71223,0.650558,KNN,ER
0,0.745736,0.695886,0.745736,0.654552,0.768116,0.76406,0.768116,0.691197,0.71223,0.534173,0.71223,0.610483,Logistic Regression,ER
0,0.813953,0.851163,0.813953,0.771133,0.826087,0.858696,0.826087,0.787652,0.733813,0.676659,0.733813,0.645221,Random Forest,ER
0,0.803101,0.829016,0.803101,0.757123,0.826087,0.858696,0.826087,0.787652,0.690647,0.587954,0.690647,0.61987,SVM,ER
0,0.894574,0.904647,0.894574,0.885185,0.891304,0.905005,0.891304,0.879579,,,,,XGBoost,ER
0,0.894574,0.904647,0.894574,0.885185,0.891304,0.905005,0.891304,0.879579,0.726619,0.665843,0.726619,0.659913,CatBoost,ER
0,0.35969,0.536429,0.35969,0.206277,0.333333,0.781022,0.333333,0.17559,0.359712,0.448042,0.359712,0.214201,Naive Bayes,PR
0,0.713178,0.729482,0.713178,0.665808,0.753623,0.761456,0.753623,0.718686,0.669065,0.661517,0.669065,0.603825,Decision Tree,PR


In [None]:
res_cat

Unnamed: 0,train_acc,train_prec,train_rec,train_f1,val_acc,val_prec,val_rec,val_f1,test_acc,test_prec,test_rec,test_f1,Algorithm,classification
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.748201,0.717183,0.748201,0.690592,CatBoost,ER


In [None]:
res_cat, cat = get_catboost_model(tx, ty, vx, vy, testx, testy, criterion = CRITERION, classification = key)

In [None]:
print(cat.get_params())

{'depth': 10, 'iterations': 200, 'l2_leaf_reg': 3, 'learning_rate': 0.2}
