# Importar dato y eliminar columnas innecesarias

In [124]:
import pandas as pd

In [125]:
bank_churn_csv = pd.read_csv('files/train.csv')
bank_churn_csv

In [126]:
bank_churn_csv.shape

In [127]:
column: int = bank_churn_csv.columns[0]
bank_churn_csv[column]

In [128]:
bank_churn_df = bank_churn_csv.drop(columns=['id', 'CustomerId', 'Surname'])
bank_churn_df.head()

# Transformar datos por columna

In [129]:
# columna CreditScore se pasa a float
bank_churn_df.CreditScore = bank_churn_df.CreditScore.astype(float)
bank_churn_df.CreditScore

In [130]:
bank_churn_df.Geography, unique = bank_churn_df.Geography.factorize(use_na_sentinel=True)
bank_churn_df.Geography = bank_churn_df.Geography.astype(float)
bank_churn_df.Geography

In [131]:
bank_churn_df.Gender, unique = bank_churn_df.Gender.factorize(use_na_sentinel=True)
bank_churn_df.Gender = bank_churn_df.Gender.astype(float)
bank_churn_df.Gender

In [132]:
bank_churn_df.Age = bank_churn_df.Age.astype(float)
bank_churn_df.Age

In [133]:
bank_churn_df.Tenure = bank_churn_df.Tenure.astype(float)
bank_churn_df.Tenure

In [134]:
bank_churn_df.Balance = bank_churn_df.Balance.astype(float)
bank_churn_df.Balance

In [135]:
bank_churn_df.NumOfProducts = bank_churn_df.NumOfProducts.astype(float)
bank_churn_df.NumOfProducts

In [136]:
bank_churn_df.HasCrCard

In [137]:
bank_churn_df.IsActiveMember.unique()

In [138]:
bank_churn_df.EstimatedSalary

In [139]:
#bank_churn_df.Exited = bank_churn_df.Exited.astype(float)

In [140]:
from sklearn.model_selection import train_test_split

x_df = bank_churn_df.drop(columns=['Exited'])
y_df = bank_churn_df['Exited']

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=42)

In [141]:
# Escalar datos con MinMaxScaler para que estén dentro del rango 0-1 inclusive
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [142]:
# Creacion de tensors
import torch
from torch.utils.data import TensorDataset, DataLoader

X_train_tensor = torch.tensor(data=X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(data=X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

train_dataset_tensor = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset_tensor = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 64
train_loader = DataLoader(dataset=train_dataset_tensor, batch_size=batch_size)
test_loader = DataLoader(dataset=test_dataset_tensor, batch_size=batch_size)

In [151]:
# Creación de modelo
import torch.nn as nn
class BankChurnModel(nn.Module):
    def __init__(self, input_size):
        super(BankChurnModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        self.fc4 = nn.Linear(64, 64)
        self.fc5 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.sigmoid(self.fc5(x))
        return x

In [144]:
from torch import optim

input_size: int = X_train.shape[1]
model = BankChurnModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [145]:
# Funcion de entrenamiento modelo

def train(model, train_loader, optimizer, criterion, epochs):
    
    loss_list_train = []
    iteration_list_train = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        
        for index, (batch_x, batch_y) in enumerate(train_loader):
            optimizer.zero_grad()
            
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            
            if (epoch+1) % 10 == 0:
                print('Epoch {}/{}, Loss: {:.4f}'.format(epoch + 1, epochs, loss.item()))
        
        loss_mean = epoch_loss / len(train_loader)
        loss_list_train.append(loss_mean)
        iteration_list_train.append(epoch + 1)
    
    return loss_list_train, iteration_list_train

In [148]:
# Funcion para probar el modelo

def test(model, test_loader, criterion):
    
    model.eval()
    epoch_loss = 0.0
    loss_list_test = []
    iteration_list_test = []
    
    with torch.no_grad():
        for index, (batch_x, batch_y) in enumerate(test_loader):
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            epoch_loss += loss.item()
            
            print('Index {}, Loss: {:.4f}'.format(index + 1, loss.item()))
            
            loss_list_test.append(loss.item())
            iteration_list_test.append(index + 1)
    
    loss_test_mean =epoch_loss / len(test_loader)
    return iteration_list_test, loss_list_test, loss_test_mean

In [152]:
epochs: int = 100
loss_list_train, iteration_list_train = train(model, train_loader, optimizer, criterion, epochs)

In [153]:
import matplotlib.pyplot as plt

plt.plot(iteration_list_train, loss_list_train)
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.title('TEST: Loss curve')

In [154]:
iteration_list_test, loss_list_test, loss_test_mean = test(model, test_loader, criterion)

In [155]:
plt.plot(iteration_list_test, loss_list_test)
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.title('TEST: Loss curve')
plt.show()

In [156]:
bank_churn_csv = pd.read_csv('files/test.csv')
bank_churn_csv

In [157]:
bank_churn_csv.shape

In [158]:
bank_churn_df = bank_churn_csv.drop(columns=['id', 'CustomerId', 'Surname'])
bank_churn_df

In [159]:
bank_churn_df.CreditScore = bank_churn_df.CreditScore.astype(float)
bank_churn_df.CreditScore

In [160]:
bank_churn_df.Geography, unique = bank_churn_df.Geography.factorize(use_na_sentinel=True)
bank_churn_df.Geography = bank_churn_df.Geography.astype(float)
bank_churn_df.Geography

In [161]:
bank_churn_df.Gender, unique = bank_churn_df.Gender.factorize(use_na_sentinel=True)
bank_churn_df.Gender = bank_churn_df.Gender.astype(float)
bank_churn_df.Gender

In [162]:
bank_churn_df.Age = bank_churn_df.Age.astype(float)
bank_churn_df.Age

In [163]:
bank_churn_df.Tenure = bank_churn_df.Tenure.astype(float)
bank_churn_df.Tenure

In [164]:
bank_churn_df.Balance = bank_churn_df.Balance.astype(float)
bank_churn_df.Balance

In [165]:
bank_churn_df.NumOfProducts = bank_churn_df.NumOfProducts.astype(float)
bank_churn_df.NumOfProducts

In [166]:
bank_churn_df.HasCrCard

In [167]:
bank_churn_df.IsActiveMember

In [168]:
bank_churn_df.EstimatedSalary

In [170]:
test_df = scaler.fit_transform(bank_churn_df)
test_df

In [171]:
test_df_tensor = torch.tensor(test_df, dtype=torch.float32)

In [172]:
model.eval()
with torch.no_grad():
    predictions = model(test_df_tensor)

In [173]:
predictions = (predictions > 0.5).int()
predictions

In [174]:
result = pd.DataFrame({
    'id': bank_churn_csv.id,
    'Exited': predictions.numpy().flatten()
})

In [175]:
result.to_csv('files/bank_churn_predictions.csv', index=False)

In [176]:
result