In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, recall_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [2]:
url = "https://drive.google.com/uc?id=1l61dPkShxh6QdSCZuPmYGP7TBhnB6nCh"

# Lê o arquivo CSV
df = pd.read_csv(url, sep=";")

# Visualiza as primeiras linhas do DataFrame
df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0,1,1,1,10134888,1
1,608,Spain,Female,41,1,8380786,1,0,1,11254258,0
2,502,France,Female,42,8,1596608,3,1,0,11393157,1
3,699,France,Female,39,1,0,2,0,0,9382663,0
4,850,Spain,Female,43,2,12551082,1,1,1,790841,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0,2,1,0,9627064,0
9996,516,France,Male,35,10,5736961,1,1,1,10169977,0
9997,709,France,Female,36,7,0,1,0,1,4208558,1
9998,772,Germany,Male,42,3,7507531,2,1,0,9288852,1


In [3]:
X = df.drop("Exited", axis=1)
y = df["Exited"]

In [4]:
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0,1,1,1,10134888
1,608,Spain,Female,41,1,8380786,1,0,1,11254258
2,502,France,Female,42,8,1596608,3,1,0,11393157
3,699,France,Female,39,1,0,2,0,0,9382663
4,850,Spain,Female,43,2,12551082,1,1,1,790841
...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0,2,1,0,9627064
9996,516,France,Male,35,10,5736961,1,1,1,10169977
9997,709,France,Female,36,7,0,1,0,1,4208558
9998,772,Germany,Male,42,3,7507531,2,1,0,9288852


In [5]:
standarscaler = StandardScaler()
numerical = X.select_dtypes(include=['int64','float64']).columns
X[numerical] = standarscaler.fit_transform(X[numerical])

In [6]:
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,-0.326221,France,Female,0.293517,-1.041760,-1.110553,-0.911583,0.646092,0.970243,0.170614
1,-0.440036,Spain,Female,0.198164,-1.387538,0.222782,-0.911583,-1.547768,0.970243,0.353281
2,-1.536794,France,Female,0.293517,1.032908,-0.856542,2.527057,0.646092,-1.030670,0.375948
3,0.501521,France,Female,0.007457,-1.387538,-1.110553,0.807737,-1.547768,-1.030670,0.047859
4,2.063884,Spain,Female,0.388871,-1.041760,0.886252,-0.911583,0.646092,0.970243,-1.354223
...,...,...,...,...,...,...,...,...,...,...
9995,1.246488,France,Male,0.007457,-0.004426,-1.110553,0.807737,0.646092,-1.030670,0.087743
9996,-1.391939,France,Male,-0.373958,1.724464,-0.197835,-0.911583,0.646092,0.970243,0.176340
9997,0.604988,France,Female,-0.278604,0.687130,-1.110553,-0.911583,-1.547768,0.970243,-0.796492
9998,1.256835,Germany,Male,0.293517,-0.695982,0.083852,0.807737,0.646092,-1.030670,0.032551


In [7]:
labelencoder = LabelEncoder()
categorical = X.select_dtypes(include='object').columns
for col in categorical:
  X[col] = labelencoder.fit_transform(X[col])

In [8]:
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,-0.326221,0,0,0.293517,-1.041760,-1.110553,-0.911583,0.646092,0.970243,0.170614
1,-0.440036,2,0,0.198164,-1.387538,0.222782,-0.911583,-1.547768,0.970243,0.353281
2,-1.536794,0,0,0.293517,1.032908,-0.856542,2.527057,0.646092,-1.030670,0.375948
3,0.501521,0,0,0.007457,-1.387538,-1.110553,0.807737,-1.547768,-1.030670,0.047859
4,2.063884,2,0,0.388871,-1.041760,0.886252,-0.911583,0.646092,0.970243,-1.354223
...,...,...,...,...,...,...,...,...,...,...
9995,1.246488,0,1,0.007457,-0.004426,-1.110553,0.807737,0.646092,-1.030670,0.087743
9996,-1.391939,0,1,-0.373958,1.724464,-0.197835,-0.911583,0.646092,0.970243,0.176340
9997,0.604988,0,0,-0.278604,0.687130,-1.110553,-0.911583,-1.547768,0.970243,-0.796492
9998,1.256835,1,1,0.293517,-0.695982,0.083852,0.807737,0.646092,-1.030670,0.032551


In [9]:
X_treino, X_teste, y_treino, y_teste = train_test_split(X,y, test_size=0.3, random_state=0)

In [10]:
modelo = Sequential()
modelo.add(Dense(units=64, activation='relu', input_dim=X_treino.shape[1]))
modelo.add(Dropout(0.4))
modelo.add(Dense(units=32, activation='relu'))
modelo.add(Dropout(0.4))
modelo.add(Dense(units=64, activation='relu'))
modelo.add(Dropout(0.4))
modelo.add(Dense(units=1, activation='sigmoid'))

In [11]:
modelo.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
modelo.fit(X_treino, y_treino, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7a36d3c24880>

In [12]:
previsoes = modelo.predict(X_teste)
previsoes



array([[0.30404145],
       [0.2590343 ],
       [0.24789357],
       ...,
       [0.07973928],
       [0.09027538],
       [0.46480608]], dtype=float32)

In [13]:
y_pred = (previsoes > 0.5).astype('int32')
y_pred

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int32)

In [14]:
print('Acurácia: ', accuracy_score(y_teste, y_pred))
print('F1: ', f1_score(y_teste, y_pred))
print('Recall: ', recall_score(y_teste, y_pred))
print('Matriz de Confusão: ', confusion_matrix(y_teste, y_pred))

Acurácia:  0.8633333333333333
F1:  0.5948616600790515
Recall:  0.48470209339774556
Matriz de Confusão:  [[2289   90]
 [ 320  301]]
