In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [3]:
data = pd.read_csv('../../../data/Churn_Modelling.csv')

X = data.iloc[:, 3:-1].values
y = data.iloc[:, -1].values

le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = ct.fit_transform(X)
print(X.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

ann = keras.models.Sequential()
ann.add(keras.layers.Dense(10, activation='relu'))
ann.add(keras.layers.Dense(10, activation='relu'))
ann.add(keras.layers.Dropout(0.3))
ann.add(keras.layers.Dense(1, activation='sigmoid'))
ann.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

check_cb = keras.callbacks.ModelCheckpoint('test.keras', save_best_only=True)
early_cb = keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)

ann.fit(X_train, y_train, epochs=100, verbose=1, validation_data=(X_test, y_test), callbacks=[check_cb, early_cb])
#verbose=1: 진행 막대 표시
ann.summary()

(10000, 12)
Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 849us/step - accuracy: 0.6850 - loss: 0.6674 - val_accuracy: 0.8040 - val_loss: 0.5163
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 450us/step - accuracy: 0.7873 - loss: 0.5358 - val_accuracy: 0.8135 - val_loss: 0.4492
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 450us/step - accuracy: 0.7987 - loss: 0.4700 - val_accuracy: 0.8235 - val_loss: 0.4200
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443us/step - accuracy: 0.8072 - loss: 0.4472 - val_accuracy: 0.8280 - val_loss: 0.4087
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437us/step - accuracy: 0.8101 - loss: 0.4349 - val_accuracy: 0.8340 - val_loss: 0.4019
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445us/step - accuracy: 0.8169 - loss: 0.4297 - val_accuracy: 0.8360 - val_loss: 0.393

In [None]:
a = ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5
b = ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]))
print(a)
print(b)
#0 -> 계속 이용 / 1 -> 떠남

pred = ann.predict(X_test)
pred = (pred > 0.5)
print(np.concatenate((pred.reshape(len(pred), 1), y_test.reshape(len(y_test), 1)), 1))

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, pred)
print(cm)

accuracy_score(y_test, pred) #정확도 89%

#(1)이용 유지 1519명 예측
#(4)이용 취소 212명 예측
#(2)이용 취소 예측 76명 틀림
#(3)이용 유지 193명 틀림

[[False]]
[[0.03657942]]
[[0 0]
 [0 0]
 [1 1]
 ...
 [0 0]
 [0 0]
 [1 0]]
[[1568   61]
 [ 196  175]]


0.8715