In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, accuracy_score, auc, roc_curve
from sklearn.metrics import confusion_matrix


In [3]:
data = pd.read_csv("df.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,Animal ID,Intake Type,Intake Condition,Animal Type,Breed,Outcome Type,days_in_shelter,age_upon_intake_months,age_upon_intake_years,age_upon_outcome_months,age_upon_outcome_years,neutered_or_spayed_outcome,male_or_female_outcome,neutered_or_spayed_intake,male_or_female_intake,group_color
0,0,A006100,Public Assist,Normal,Dog,Mixed,Return to Owner,2.0,72.0,6.0,72.0,6.0,1,1.0,1,1.0,Yellow
1,1,A047759,Owner Surrender,Normal,Dog,Dachshund,Transfer,5.0,120.0,10.0,120.0,10.0,1,1.0,1,1.0,Tricolor
2,2,A134067,Public Assist,Injured,Dog,Shetland Sheepdog,Return to Owner,1.0,192.0,16.0,192.0,16.0,1,1.0,1,1.0,Brown
3,3,A141142,Stray,Aged,Dog,Mixed,Return to Owner,1.0,180.0,15.0,180.0,15.0,1,0.0,1,0.0,Black
4,4,A163459,Stray,Normal,Dog,Mixed,Return to Owner,1.0,180.0,15.0,180.0,15.0,0,0.0,0,0.0,Black


In [None]:
def classifier_y(df,column):
    target = []
    for days in df[column]:
        if days<60:
            target.append(1)
        else :
            target.append(0)
    df['target'] = target
    return df

In [None]:
classifier_y(data,"days_in_shelter")

Unnamed: 0.1,Unnamed: 0,Animal ID,Intake Type,Intake Condition,Animal Type,Breed,Outcome Type,days_in_shelter,age_upon_intake_months,age_upon_intake_years,age_upon_outcome_months,age_upon_outcome_years,neutered_or_spayed_outcome,male_or_female_outcome,neutered_or_spayed_intake,male_or_female_intake,group_color,target
0,0,A006100,Public Assist,Normal,Dog,Mixed,Return to Owner,2.0,72.0,6.0,72.0,6.0,1,1.0,1,1.0,Yellow,1
1,1,A047759,Owner Surrender,Normal,Dog,Dachshund,Transfer,5.0,120.0,10.0,120.0,10.0,1,1.0,1,1.0,Tricolor,1
2,2,A134067,Public Assist,Injured,Dog,Shetland Sheepdog,Return to Owner,1.0,192.0,16.0,192.0,16.0,1,1.0,1,1.0,Brown,1
3,3,A141142,Stray,Aged,Dog,Mixed,Return to Owner,1.0,180.0,15.0,180.0,15.0,1,0.0,1,0.0,Black,1
4,4,A163459,Stray,Normal,Dog,Mixed,Return to Owner,1.0,180.0,15.0,180.0,15.0,0,0.0,0,0.0,Black,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102720,110852,A829991,Stray,Normal,Dog,Mixed,Return to Owner,2.0,24.0,2.0,24.0,2.0,0,1.0,0,1.0,Black,1
102721,110853,A829992,Stray,Normal,Dog,Mixed,Return to Owner,2.0,24.0,2.0,24.0,2.0,1,0.0,1,0.0,White,1
102722,110861,A830034,Stray,Normal,Dog,Mixed,Return to Owner,1.0,24.0,2.0,24.0,2.0,1,1.0,1,1.0,Brown,1
102723,110874,A830080,Owner Surrender,Normal,Dog,Alaskan Husky,Rto-Adopt,2.0,12.0,1.0,12.0,1.0,1,1.0,0,1.0,Black,1


In [None]:
# Creating X and y
X = data[['Intake Type',"Animal Type",'Intake Condition','Breed','age_upon_intake_months','neutered_or_spayed_intake','male_or_female_intake','group_color']]
y = data['target']

In [None]:
# Importing the splitter, classification model, and the metric
from sklearn.model_selection import train_test_split

#Splitting the data with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 10)

In [None]:
#from sklearn import set_config ;set_config(display='diagram')
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler , FunctionTransformer,OneHotEncoder
from sklearn.compose import make_column_selector,ColumnTransformer

# Impute then Scale for numerical variables
num_transformer = Pipeline([
    ('imputer', SimpleImputer()),
    ('scaler', MinMaxScaler())])

# Encode categorical varibles 
cat_transformer = OneHotEncoder(handle_unknown='ignore',sparse=False)

# Apply transformations to desired features
preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, make_column_selector(dtype_include=['int64',"float64"])),
    ('cat_transformer', cat_transformer, make_column_selector(dtype_include=["object"]))])

In [None]:
preprocessor.fit(X_train, y_train)

ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
                  transformer_weights=None,
                  transformers=[('num_transformer',
                                 Pipeline(memory=None,
                                          steps=[('imputer',
                                                  SimpleImputer(add_indicator=False,
                                                                copy=True,
                                                                fill_value=None,
                                                                missing_values=nan,
                                                                strategy='mean',
                                                                verbose=0)),
                                                 ('scaler',
                                                  MinMaxScaler(copy=True,
                                                               feature_range=(0,
                   

In [None]:
X_train_new = preprocessor.transform(X_train)
X_test_new = preprocessor.transform(X_test)

In [None]:
#Importing SMOTE
from imblearn.over_sampling import SMOTE
#Create an oversampled training data
smote = SMOTE(random_state = 101)
X_oversample, y_oversample = smote.fit_resample(X_train_new, y_train)



In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
models = {
    'LR': {
        'model': LogisticRegression(solver='lbfgs', multi_class='auto',max_iter = 1000),
        'parameters': {
            'C': [0.1,0.5,1]
           }
    },

    'DT': {
        'model': DecisionTreeClassifier(splitter='best'),
        'parameters': {
            'criterion': ['gini', 'entropy'],
            'max_depth': [10,15]
        }
    }
}
scores = [] 
cv_shuffle = ShuffleSplit(n_splits=5, test_size=0.20, random_state=0)

for model_name, model_params in models.items():
    print(f"---------------------------{model_name}-----------------------------")
    gs = GridSearchCV(model_params['model'], model_params['parameters'],scoring= 'f1_macro',cv = cv_shuffle, return_train_score=False)
    print(f"--------------------------- fit: {model_name}-----------------------------")
    gs.fit(X_train_new, y_train)
    scores.append({
        'model': model_name,
        'best_parameters': gs.best_params_,
        'score': gs.best_score_
    })
    print(" ")
    print(f"--------------------------- scores-----------------------------")
    print(scores)

scores_df = pd.DataFrame(scores, columns=['model', 'best_parameters', 'score'])

---------------------------LR-----------------------------
--------------------------- fit: LR-----------------------------
 
--------------------------- scores-----------------------------
[{'model': 'LR', 'best_parameters': {'C': 0.1}, 'score': 0.47795196544157453}]
---------------------------DT-----------------------------
--------------------------- fit: DT-----------------------------
 
--------------------------- scores-----------------------------
[{'model': 'LR', 'best_parameters': {'C': 0.1}, 'score': 0.47795196544157453}, {'model': 'DT', 'best_parameters': {'criterion': 'gini', 'max_depth': 15}, 'score': 0.5045880926110048}]


In [None]:
models = {
    'LR': {
        'model': LogisticRegression(solver='lbfgs', multi_class='auto',max_iter = 1000),
        'parameters': {
            'C': [1,5]
           }
    },

    'DT': {
        'model': DecisionTreeClassifier(splitter='best'),
        'parameters': {
            'criterion': ['gini', 'entropy'],
            'max_depth': [5,10]
        }
    },

    'RF': {
        'model': RandomForestClassifier(criterion='gini'),
        'parameters': {
            'n_estimators': [3,10,50]
        }
    },
        'SVC': {
        'model': SVC(gamma='auto'),
        'parameters': {
            'C': [0.1,1,20],
            'kernel': ['rbf','linear']
        }
    },

    'GB': {
        'model': GradientBoostingClassifier(criterion='friedman_mse'),
        'parameters': {
            'loss': ['deviance', 'exponential']
           }
    },

    'KNN': {
        'model': KNeighborsClassifier(algorithm='auto'),
        'parameters': {
            'n_neighbors': [5,10,25],
            'weights' : ['uniform', 'distance'] 
           }
    }
    
}

scores = [] 
cv_shuffle = ShuffleSplit(n_splits=5, test_size=0.20, random_state=0)

for model_name, model_params in models.items():
    print(f"---------------------------{model_name}-----------------------------")
    gs = GridSearchCV(model_params['model'], model_params['parameters'],scoring= 'f1_macro',cv = cv_shuffle, return_train_score=False)
    print(f"--------------------------- fit: {model_name}-----------------------------")
    gs.fit(X_train_new, y_train)
    scores.append({
        'model': model_name,
        'best_parameters': gs.best_params_,
        'score': gs.best_score_
    })
    print(" ")
    print(f"--------------------------- scores-----------------------------")
    print(scores)

scores_df = pd.DataFrame(scores, columns=['model', 'best_parameters', 'score'])

---------------------------LR-----------------------------
--------------------------- fit: LR-----------------------------
 
--------------------------- scores-----------------------------
[{'model': 'LR', 'best_parameters': {'C': 1}, 'score': 0.47795196544157453}]
---------------------------DT-----------------------------
--------------------------- fit: DT-----------------------------
 
--------------------------- scores-----------------------------
[{'model': 'LR', 'best_parameters': {'C': 1}, 'score': 0.47795196544157453}, {'model': 'DT', 'best_parameters': {'criterion': 'gini', 'max_depth': 10}, 'score': 0.4971322768042901}]
---------------------------RF-----------------------------
--------------------------- fit: RF-----------------------------
 
--------------------------- scores-----------------------------
[{'model': 'LR', 'best_parameters': {'C': 1}, 'score': 0.47795196544157453}, {'model': 'DT', 'best_parameters': {'criterion': 'gini', 'max_depth': 10}, 'score': 0.49713227

In [None]:
scores_df

In [None]:
from sklearn.metrics import f1_score, accuracy_score, auc, roc_curve
from sklearn.metrics import confusion_matrix

classifier = LogisticRegression(C=1,max_iter = 1000)
classifier.fit(X_train_new, y_train)

0.9572837542927713
0.9180673632292816


array([[    0,  2525],
       [    0, 28293]])

In [None]:


y_pred = classifier.predict(X_test_new)
print(f1_score(y_test, y_pred, average = 'macro'))
print(accuracy_score(y_test, y_pred))
#print(auc(y_test, classifier.predict_proba(X_test_new)))

confusion_matrix(y_test, y_pred)

0.4786418771463857
0.9180673632292816


array([[    0,  2525],
       [    0, 28293]])

In [None]:
from sklearn.metrics import classification_report,plot_confusion_matrix

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      2525
           1       0.92      1.00      0.96     28293

    accuracy                           0.92     30818
   macro avg       0.46      0.50      0.48     30818
weighted avg       0.84      0.92      0.88     30818



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Create first pipeline for base without reducing features.

pipe = Pipeline([('classifier' , RandomForestClassifier())])
# pipe = Pipeline([('classifier', RandomForestClassifier())])

# Create param grid.

param_grid = [
    {'classifier' : [LogisticRegression()],
     'classifier__penalty' : ['l1', 'l2'],
    'classifier__C' : np.logspace(-4, 4, 20),
    'classifier__solver' : ['liblinear']},
    {'classifier' : [RandomForestClassifier()],
    'classifier__n_estimators' : list(range(10,101,10)),
    'classifier__max_features' : list(range(6,32,5))}
]

# Create grid search object

clf = GridSearchCV(pipe, param_grid = param_grid, cv = 5, verbose=True, n_jobs=-1)

# Fit on data

best_clf = clf.fit(X_train_new, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:  7.4min
[Parallel(n_jobs=-1)]: Done 446 tasks      | elapsed: 31.5min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 37.8min finished


In [None]:
 clf.best_params_

{'classifier': LogisticRegression(C=0.0001, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=100,
                    multi_class='auto', n_jobs=None, penalty='l1',
                    random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                    warm_start=False),
 'classifier__C': 0.0001,
 'classifier__penalty': 'l1',
 'classifier__solver': 'liblinear'}

In [None]:
clf.best_score_

0.916252938885673

In [None]:
from sklearn.metrics import f1_score
y_pred = best_clf.predict(X_test_new)
print(f1_score(y_test, y_pred, average = 'macro'))

0.4786418771463857


In [None]:
y_pred = best_clf.predict(X_test_new)
y_pred

array([1, 1, 1, ..., 1, 1, 1])

In [None]:
#Importing SMOTE
from imblearn.over_sampling import SMOTE
#Create an oversampled training data
smote = SMOTE(random_state = 101)
X_oversample, y_oversample = smote.fit_resample(X_train_new, y_train)



In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from imblearn.over_sampling import SMOTE

LR = LogisticRegression(max_iter = 1000)
DT = DecisionTreeClassifier()
RF = RandomForestClassifier()
KNN = KNeighborsClassifier()
SVC = SVC()
GB= GradientBoostingClassifier()
models = [LR,DT,RF,KNN,SVC,GB]

for model_SMOTE in models :
    print(f"---------------------------{model_SMOTE}-----------------------------")    
    print(f'########################### SMOTE ############################')
    model_SMOTE.fit(X_oversample, y_oversample)
    print(f"---------------------------fim do model-----------------------------")
    print(f"The f1:{f1_score(y_test, model_SMOTE.predict(X_test_new), average='macro')}")
    print(classification_report(y_test, model_SMOTE.predict(X_test_new)))

data10k

In [4]:
data10k = data[:10000].copy()

In [6]:
def classifier_y(df,column):
    target = []
    for days in df[column]:
        if days<60:
            target.append(1)
        else :
            target.append(0)
    df['target'] = target
    return df


In [7]:
classifier_y(data10k,"days_in_shelter")

Unnamed: 0.1,Unnamed: 0,Animal ID,Intake Type,Intake Condition,Animal Type,Breed,Outcome Type,days_in_shelter,age_upon_intake_months,age_upon_intake_years,age_upon_outcome_months,age_upon_outcome_years,neutered_or_spayed_outcome,male_or_female_outcome,neutered_or_spayed_intake,male_or_female_intake,group_color,target
0,0,A006100,Public Assist,Normal,Dog,Mixed,Return to Owner,2.0,72.0,6.0,72.0,6.0,1,1.0,1,1.0,Yellow,1
1,1,A047759,Owner Surrender,Normal,Dog,Dachshund,Transfer,5.0,120.0,10.0,120.0,10.0,1,1.0,1,1.0,Tricolor,1
2,2,A134067,Public Assist,Injured,Dog,Shetland Sheepdog,Return to Owner,1.0,192.0,16.0,192.0,16.0,1,1.0,1,1.0,Brown,1
3,3,A141142,Stray,Aged,Dog,Mixed,Return to Owner,1.0,180.0,15.0,180.0,15.0,1,0.0,1,0.0,Black,1
4,4,A163459,Stray,Normal,Dog,Mixed,Return to Owner,1.0,180.0,15.0,180.0,15.0,0,0.0,0,0.0,Black,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10691,A675115,Owner Surrender,Normal,Cat,Mixed,Adoption,2.0,5.0,1.0,5.0,1.0,1,0.0,0,0.0,Torbie,1
9996,10692,A675116,Owner Surrender,Normal,Cat,Mixed,Transfer,47.0,5.0,1.0,7.0,1.0,1,0.0,0,0.0,Tortie,1
9997,10693,A675117,Stray,Normal,Dog,Boxer,Adoption,34.0,36.0,3.0,36.0,3.0,1,1.0,0,1.0,Red,1
9998,10694,A675118,Stray,Normal,Dog,Boxer,Transfer,7.0,36.0,3.0,36.0,3.0,1,0.0,0,0.0,Red,1


In [8]:
# Creating X and y
X = data10k[['Intake Type',"Animal Type",'Intake Condition','Breed','age_upon_intake_months','neutered_or_spayed_intake','male_or_female_intake','group_color']]
y = data10k['target']

In [11]:
#Splitting the data with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 10)

In [13]:
#from sklearn import set_config ;set_config(display='diagram')
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler , FunctionTransformer,OneHotEncoder
from sklearn.compose import make_column_selector,ColumnTransformer

# Impute then Scale for numerical variables
num_transformer = Pipeline([
    ('imputer', SimpleImputer()),
    ('scaler', MinMaxScaler())])

# Encode categorical varibles 
cat_transformer = OneHotEncoder(handle_unknown='ignore',sparse=False)

# Apply transformations to desired features
preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, make_column_selector(dtype_include=['int64',"float64"])),
    ('cat_transformer', cat_transformer, make_column_selector(dtype_include=["object"]))])

In [14]:
preprocessor.fit(X_train, y_train)

ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
                  transformer_weights=None,
                  transformers=[('num_transformer',
                                 Pipeline(memory=None,
                                          steps=[('imputer',
                                                  SimpleImputer(add_indicator=False,
                                                                copy=True,
                                                                fill_value=None,
                                                                missing_values=nan,
                                                                strategy='mean',
                                                                verbose=0)),
                                                 ('scaler',
                                                  MinMaxScaler(copy=True,
                                                               feature_range=(0,
                   

In [15]:
X_train_new = preprocessor.transform(X_train)
X_test_new = preprocessor.transform(X_test)

In [16]:
#Importing SMOTE
from imblearn.over_sampling import SMOTE
#Create an oversampled training data
smote = SMOTE(random_state = 101)
X_oversample, y_oversample = smote.fit_resample(X_train_new, y_train)



In [None]:
from sklearn.svm import SVC
models = {
    'LR': {
        'model': LogisticRegression(solver='lbfgs', multi_class='auto',max_iter = 1000),
        'parameters': {
            'C': [0.1,0.5,0.8]
           }
    },

    'DT': {
        'model': DecisionTreeClassifier(splitter='best'),
        'parameters': {
            'criterion': ['gini', 'entropy'],
            'max_depth': [10,15,20]
        }
    },

    'RF': {
        'model': RandomForestClassifier(criterion='gini'),
        'parameters': {
            'n_estimators': [3,10,50]
        }
    },
        'SVC': {
        'model': SVC(gamma='auto'),
        'parameters': {
            'C': [0.1,0.2,0.3],
            'kernel': ['rbf','linear']
        }
    },

    'GB': {
        'model': GradientBoostingClassifier(criterion='friedman_mse'),
        'parameters': {
            'loss': ['deviance', 'exponential']
           }
    },

    'KNN': {
        'model': KNeighborsClassifier(algorithm='auto'),
        'parameters': {
            'n_neighbors': [3,4,5],
            'weights' : ['uniform', 'distance'] 
           }
    }
    
}

scores = [] 
cv_shuffle = ShuffleSplit(n_splits=5, test_size=0.20, random_state=0)

for model_name, model_params in models.items():
    print(f"---------------------------{model_name}-----------------------------")
    gs = GridSearchCV(model_params['model'], model_params['parameters'],scoring= 'f1_macro',cv = cv_shuffle, return_train_score=False)
    print(f"--------------------------- fit: {model_name}-----------------------------")
    gs.fit(X_train_new, y_train)
    scores.append({
        'model': model_name,
        'best_parameters': gs.best_params_,
        'score': gs.best_score_
    })
    print(" ")
    print(f"--------------------------- scores-----------------------------")
    print(scores)

scores_df = pd.DataFrame(scores, columns=['model', 'best_parameters', 'score'])

In [18]:
scores_df

Unnamed: 0,model,best_parameters,score
0,LR,{'C': 0.1},0.48442
1,DT,"{'criterion': 'gini', 'max_depth': 15}",0.514762
2,RF,{'n_estimators': 10},0.508047
3,SVC,"{'C': 0.1, 'kernel': 'rbf'}",0.48442
4,GB,{'loss': 'deviance'},0.492865
5,KNN,"{'n_neighbors': 4, 'weights': 'uniform'}",0.507682


In [None]:
from sklearn.svm import SVC
models = {
    'LR': {
        'model': LogisticRegression(solver='lbfgs', multi_class='auto',max_iter = 1000),
        'parameters': {
            'C': [0.1,0.5,0.8]
           }
    },

    'DT': {
        'model': DecisionTreeClassifier(splitter='best'),
        'parameters': {
            'criterion': ['gini', 'entropy'],
            'max_depth': [10,15,20]
        }
    },

    'RF': {
        'model': RandomForestClassifier(criterion='gini'),
        'parameters': {
            'n_estimators': [3,10,50]
        }
    },
        'SVC': {
        'model': SVC(gamma='auto'),
        'parameters': {
            'C': [0.1,0.2,0.3],
            'kernel': ['rbf','linear']
        }
    },

    'GB': {
        'model': GradientBoostingClassifier(criterion='friedman_mse'),
        'parameters': {
            'loss': ['deviance', 'exponential']
           }
    },

    'KNN': {
        'model': KNeighborsClassifier(algorithm='auto'),
        'parameters': {
            'n_neighbors': [3,4,5],
            'weights' : ['uniform', 'distance'] 
           }
    }
    
}

scores_oversample = [] 
cv_shuffle = ShuffleSplit(n_splits=5, test_size=0.20, random_state=0)

for model_name, model_params in models.items():
    print(f"---------------------------{model_name}-----------------------------")
    gs = GridSearchCV(model_params['model'], model_params['parameters'],scoring= 'f1_macro',cv = cv_shuffle, return_train_score=False)
    print(f"--------------------------- fit: {model_name}-----------------------------")
    gs.fit(X_oversample, y_oversample)
    scores_oversample.append({
        'model': model_name,
        'best_parameters': gs.best_params_,
        'score': gs.best_score_
    })
    print(" ")
    print(f"--------------------------- scores-----------------------------")
    print(scores)

scores_df_oversample = pd.DataFrame(scores_oversample, columns=['model', 'best_parameters', 'score'])

In [20]:
scores_df_oversample

Unnamed: 0,model,best_parameters,score
0,LR,{'C': 0.8},0.611983
1,DT,"{'criterion': 'gini', 'max_depth': 20}",0.878904
2,RF,{'n_estimators': 50},0.900607
3,SVC,"{'C': 0.2, 'kernel': 'linear'}",0.58707
4,GB,{'loss': 'deviance'},0.850841
5,KNN,"{'n_neighbors': 4, 'weights': 'distance'}",0.856655
