In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy.stats as ss

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import recall_score, roc_auc_score,auc,roc_curve
from sklearn.model_selection import cross_val_score,train_test_split,StratifiedKFold


In [6]:
roc_auc_todo = []
false_positive_rate_todo =[]
true_positive_rate_todo = []
resultados = []
sc = StandardScaler()
sm = SMOTE(random_state=0)
ros = RandomOverSampler(random_state=0)
rus = RandomUnderSampler(random_state=0)

In [7]:
def evaluate_model(classifier, X_test, y_test, model_name):
  y_pred = classifier.predict(X_test)
  y_pred_prob = classifier.predict_proba(X_test)
  
  sensibilidad = recall_score(y_test, y_pred)
  false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred_prob[:,1])
  roc_auc = auc(false_positive_rate, true_positive_rate)
  gini = 2 * roc_auc - 1
  roc_auc_todo.append(roc_auc)
  false_positive_rate_todo.append(false_positive_rate)
  true_positive_rate_todo.append(true_positive_rate)
  
  return {
      'Modelo': model_name,
      'Sensibilidad': sensibilidad,
      'ROC': roc_auc,
      'GINI': gini
  }

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [9]:
def modelos(X_train, X_test, y_train, y_test,):
  resultados = []
  knn_classifier = KNeighborsClassifier(n_neighbors=5)
  knn_classifier.fit(X_train, y_train)
  resultados.append(evaluate_model(knn_classifier, X_test, y_test, "KNN"))
  
  rl_classifier = LogisticRegression(random_state=0)
  rl_classifier.fit(X_train, y_train)
  resultados.append(evaluate_model(rl_classifier, X_test, y_test, "Regresion Logistic"))
  
  nb_classifier = GaussianNB()
  nb_classifier.fit(X_train, y_train)
  resultados.append(evaluate_model(nb_classifier, X_test, y_test, "Naive Bayes"))
  
  svc_classifier = SVC(kernel='rbf',C = 10,gamma = 2,probability=True).fit(X_train, y_train)
  y_pred = svc_classifier.predict(X_test)
  resultados.append(evaluate_model(svc_classifier, X_test, y_test, "SVM"))
  
  rfc_classifier = RandomForestClassifier(max_depth=4, max_features='log2', n_estimators=600).fit(X_train, y_train)
  y_pred = rfc_classifier.predict(X_test)
  resultados.append(evaluate_model(rfc_classifier, X_test, y_test, "RF"))
  
  gbc_classifier = GradientBoostingClassifier(max_depth=2, max_features='log2', n_estimators=300).fit(X_train, y_train)
  y_pred = gbc_classifier.predict(X_test)
  resultados.append(evaluate_model(gbc_classifier, X_test, y_test, "Boosting"))
  return resultados
  

In [None]:
def prueba(X_train, X_test, y_train, y_test):
  X_train_base = sc.fit_transform(X_train)
  X_test_base = sc.transform(X_test)

  #SMOTE
  X_train_smote = sc.fit_transform(X_train)
  X_test_smote = sc.transform(X_test)
  X_train_smote, y_train_smote = sm.fit_resample(X_train_smote, y_train)

  #OVER
  X_train_oversamplig, y_train_oversampling = ros.fit_resample(X_train, y_train)
  X_train_oversamplig = sc.fit_transform(X_train_oversamplig)
  X_test_oversamplig = sc.transform(X_test)

  #UNDER
  X_train_undersampling, y_train_undersampling = rus.fit_resample(X_train, y_train)
  X_train_undersampling = sc.fit_transform(X_train_undersampling)
  X_test_undersampling = sc.transform(X_test)
  return 

In [2]:
#BASE
X_train_base = sc.fit_transform(X_train)
X_test_base = sc.transform(X_test)

#SMOTE
X_train_smote = sc.fit_transform(X_train)
X_test_smote = sc.transform(X_test)
X_train_smote, y_train_smote = sm.fit_resample(X_train_smote, y_train)

#OVER
X_train_oversamplig, y_train_oversampling = ros.fit_resample(X_train, y_train)
X_train_oversamplig = sc.fit_transform(X_train_oversamplig)
X_test_oversamplig = sc.transform(X_test)

#UNDER
X_train_undersampling, y_train_undersampling = rus.fit_resample(X_train, y_train)
X_train_undersampling = sc.fit_transform(X_train_undersampling)
X_test_undersampling = sc.transform(X_test)

NameError: name 'sc' is not defined