## <b>Carga de librerias </b>

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 

# Modelos
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# Metricas
from sklearn.metrics import roc_auc_score

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split


## <b> Carga de datos (FE): </b>

In [2]:
dataset = pd.read_csv('dataset_proyecto_FE.csv')
dataset.head()

Unnamed: 0,BUCKET,DIAS_MORA,DIAS_INCUMPLIMIENTO,MET_CALCULO,MARCA_DEFAULT,PDI,RANGO_SCORE_PN,seg_PD12mes,PD_12M_ACTUAL_FINAL,PD_12M_DESEMBOLSO_FINAL,PD_LIFETIME_ACTUAL_FINAL,PD_LIFETIME_DESEMBOLSO_FINAL,EXPOSICION_TOTAL_FA,VALOR_PROVISION,cobertura,target
0,1.0,0.0,0.0,1.67215,0.0,0.50798,1.0,1.67215,0.32591,0.32591,0.304117,0.066976,1.67215,1.67215,0.165556,0
1,1.0,0.0,0.0,1.67215,0.0,0.50798,1.67215,1.67215,0.425376,0.425376,0.43231,0.220604,1.67215,1.67215,0.238467,0
2,1.0,0.0,0.0,1.67215,0.0,0.50798,1.67215,1.67215,0.44573,0.44573,0.457113,0.245921,1.67215,1.67215,0.253006,0
3,1.0,0.0,0.0,1.67215,0.0,0.50798,1.67215,1.67215,0.434547,0.434547,0.455007,0.25777,1.67215,1.67215,0.254776,0
4,1.0,0.0,0.0,1.67215,0.0,0.50798,1.67215,1.67215,0.343117,0.343117,0.404523,0.217305,1.67215,1.67215,0.250935,0


In [3]:
X = dataset.drop(['target'], axis=1)
y = dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=2022)

## <b> Configuración de modelos (Hiperparametros) </b>

In [4]:
svm = GridSearchCV(SVC(), {'C': [0.1, 1, 10, 100], 'kernel':['linear', 'rbf']}, cv=10).fit(X_train, y_train)
randomForest = GridSearchCV(RandomForestClassifier(), {'n_estimators':[20,50,100],
                                                        'max_depth': [10,100]}).fit(X_train, y_train)

lr = GridSearchCV(LogisticRegression(),{'penalty':['none']}, cv=10).fit(X_train, y_train)

dt = GridSearchCV(DecisionTreeClassifier(), {'criterion':["gini"], 'max_depth':[4]}, cv=10).fit(X_train, y_train)

knn = GridSearchCV(KNeighborsClassifier(), {'n_neighbors':[3,5]}, cv=10).fit(X_train, y_train)

lda = GridSearchCV(LinearDiscriminantAnalysis(), {'solver':['svd'], 'store_covariance':[True]}, cv=10).fit(X_train, y_train)

qda = GridSearchCV(QuadraticDiscriminantAnalysis(), {'store_covariance':[True]}, cv=10).fit(X_train, y_train)



## <b> Resultados </b>

In [5]:
svmResults = pd.DataFrame(svm.cv_results_).sort_values('rank_test_score', ascending=True) 
randomForest = pd.DataFrame(randomForest.cv_results_).sort_values('rank_test_score', ascending=True) 
lr = pd.DataFrame(lr.cv_results_).sort_values('rank_test_score', ascending=True) 
dt = pd.DataFrame(dt.cv_results_).sort_values('rank_test_score', ascending=True) 
knn = pd.DataFrame(knn.cv_results_).sort_values('rank_test_score', ascending=True) 
lda = pd.DataFrame(lda.cv_results_).sort_values('rank_test_score', ascending=True) 
qda = pd.DataFrame(qda.cv_results_).sort_values('rank_test_score', ascending=True) 

## <b>Video </b>
https://youtu.be/MbL0Z3BCQZM