# **Modelamiento**

## Importamos librerías necesarias

In [1]:
# importemos las librerías básicas a usar

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

## Lectura de datos

In [2]:
df = pd.read_csv('datasets/data_to_model.csv', index_col=0)
df['Exited'] = df['Exited_C'].map({'Yes':1, 'No':0})
df.drop(columns=['Exited_C'], inplace=True)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Apliquemos aprendizaje no supervisado

Vamos a aplicar aprendizaje no supervisado para crear 2 clusters sin la variable Exited y ver qué tanta similitud tendrían con esta variable.
Para esto usaremos Spectral Clustering:

In [17]:
from sklearn.cluster import SpectralClustering
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
aux = pd.get_dummies(df)
scaled = pd.DataFrame(scaler.fit_transform(aux.drop(columns=['Exited'])), \
                      columns= aux.drop(columns=['Exited']).columns.to_list())

spectral = SpectralClustering(n_clusters=2, n_jobs=-1, affinity= 'rbf', gamma = 0.05, random_state = 1234, assign_labels = 'kmeans')
scaled['spectralclustering_label'] = spectral.fit_predict(scaled)

scaled['exited'] = df.Exited

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male,spectralclustering_label,exited
0,0.538,0.324324,0.2,0.0,0.0,1.0,1.0,0.506735,1.0,0.0,0.0,1.0,0.0,1,1
1,0.516,0.310811,0.1,0.334031,0.0,0.0,1.0,0.562709,0.0,0.0,1.0,1.0,0.0,1,0
2,0.304,0.324324,0.8,0.636357,0.666667,1.0,0.0,0.569654,1.0,0.0,0.0,1.0,0.0,1,1
3,0.698,0.283784,0.1,0.0,0.333333,0.0,0.0,0.46912,1.0,0.0,0.0,1.0,0.0,1,0
4,1.0,0.337838,0.2,0.500246,0.0,1.0,1.0,0.3954,0.0,0.0,1.0,1.0,0.0,1,0


In [18]:
from sklearn.metrics import accuracy_score

print('Con Spectral Clustering se obtuvieron los siguientes resultados:')
print('----------------------------------------------------------------')
print(f"De los que no cancelaron que son {scaled[(scaled.exited == 0)].shape[0]}, agrupó correctamente {scaled[(scaled.exited == 0) & (scaled.spectralclustering_label == 0)]['exited'].count()}")
print(f"De los que sí cancelaron que son {scaled[(scaled.exited == 1)].shape[0]}, agrupó correctamente {scaled[(scaled.exited == 1) & (scaled.spectralclustering_label == 1)]['exited'].count()}")
print(f"Accuracy: {accuracy_score(scaled.exited.values, scaled.spectralclustering_label):.2%}")

Con Spectral Clustering se obtuvieron los siguientes resultados:
----------------------------------------------------------------
De los que no cancelaron que son 7963, agrupó correctamente 4559
De los que sí cancelaron que son 2037, agrupó correctamente 1139
Accuracy: 56.98%
