### 금융상품 갱신 여부 예측하는 ANN

Churn_Modelling.csv 파일을 보면, 고객 정보와 해당 고객이 금융상품을 갱신했는지 안했는지의 여부에 대한 데이터가 있다.

이 데이터를 가지고 갱신여부를 예측하는 딥러닝을 구성하시오.

# Neural Networks and Deep Learning

In [2]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/딥러닝/data')

In [6]:
df = pd.read_csv('Churn_Modelling.csv')

In [7]:
df.shape

(10000, 14)

In [9]:
df.isna().sum()

Unnamed: 0,0
RowNumber,0
CustomerId,0
Surname,0
CreditScore,0
Geography,0
Gender,0
Age,0
Tenure,0
Balance,0
NumOfProducts,0


In [10]:
df.head(2)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0


In [11]:
y = df['Exited']

In [12]:
X = df.loc[ : ,  'CreditScore' :  'EstimatedSalary' ]

In [13]:
y.head(3)

Unnamed: 0,Exited
0,1
1,0
2,1


In [14]:
X.head(3)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57


In [17]:
sorted( df['Geography'].unique() )

['France', 'Germany', 'Spain']

In [19]:
sorted( df['Gender'].unique() )

['Female', 'Male']

In [20]:
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler

In [21]:
df['IsActiveMember'].unique()

array([1, 0])

In [22]:
df['HasCrCard'].unique()

array([1, 0])

In [23]:
from sklearn.compose import ColumnTransformer

In [30]:
ct = ColumnTransformer( [ ( 'label', OrdinalEncoder(), [2]  ) ,
                          ( 'onehot' , OneHotEncoder(), [1]  ) ,
                          ( 'scaler' , MinMaxScaler(), [0, 3, 4, 5, 6, 9] ) ] , remainder='passthrough')

In [31]:
ct

In [32]:
X_scaled = ct.fit_transform(X)

In [33]:
X_scaled[0, ]

array([0.        , 1.        , 0.        , 0.        , 0.538     ,
       0.32432432, 0.2       , 0.        , 0.        , 0.50673489,
       1.        , 1.        ])

In [34]:
y.head(3)

Unnamed: 0,Exited
0,1
1,0
2,1


In [35]:
from sklearn.model_selection import train_test_split

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

### 딥러닝으로 모델링

In [38]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

In [41]:
# 입력 데이터
X_scaled.shape

(10000, 12)

In [43]:
X_scaled[ 0 ,  ]

array([0.        , 1.        , 0.        , 0.        , 0.538     ,
       0.32432432, 0.2       , 0.        , 0.        , 0.50673489,
       1.        , 1.        ])

In [51]:
# 1. 딥러닝 빈 깡통 틀 => 깡통 인공지능
model = Sequential()

# 2. 인풋 레이어 셋팅
model.add(  keras.Input( shape= (12,) )  )

# 히든 레이어는, 정답이 없습니다. 하고 싶은대로 하시고, 나중에 평가했을때 가장 좋은 것으로 선택!
# 3. 첫번째 히든레이어 셋팅
#    유닛이란? 뉴런(노드)의 갯수를 의미한다.
model.add( Dense(units=6, activation='relu')  )

# 4, 두번째 히든레이어 셋팅
model.add( Dense(units= 8, activation= 'relu' ) )

# 5. 아웃풋 레이어 셋팅
model.add( Dense(units= 1, activation='sigmoid') )

In [52]:
# 내가 만든 모델 요약하기
model.summary()

In [53]:
# 딥러닝은, 컴파일 이라는 작업을 해야 한다.
model.compile( optimizer= keras.optimizers.Adam() ,
              loss= keras.losses.BinaryCrossentropy() ,
               metrics= [ keras.metrics.BinaryAccuracy() ])

In [54]:
# 학습한다.
model.fit(X_train, y_train, epochs=50, batch_size=10)

Epoch 1/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - binary_accuracy: 0.7898 - loss: 0.5409
Epoch 2/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.7912 - loss: 0.4848
Epoch 3/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.7968 - loss: 0.4661
Epoch 4/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.8026 - loss: 0.4458
Epoch 5/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - binary_accuracy: 0.8089 - loss: 0.4412
Epoch 6/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.8094 - loss: 0.4292
Epoch 7/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.8240 - loss: 0.4069
Epoch 8/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.8399

<keras.src.callbacks.history.History at 0x7fa7a6fbcb00>

In [56]:
y_pred = model.predict(X_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [57]:
y_pred

array([[0.04596638],
       [0.02609593],
       [0.14051066],
       ...,
       [0.6886176 ],
       [0.11119075],
       [0.30276674]], dtype=float32)

In [None]:
# Part 2 - Now let's make the ANN!

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

## 다음 신규 데이터를 통해 분류해 보자

- Geography: France
- Credit Score: 600
- Gender: Male
- Age: 40
- Tenure: 3
- Balance: 60000
- Number of Products: 2
- Has Credit Card: Yes
- Is Active Member: Yes
- Estimated Salary: 50000

### 용어 정리

epoch

- 한 번의 epoch는 신경망에서 전체 데이터 셋에 대해 forward pass/backward pass 과정을 거친 것을 말함. 즉, 전체 데이터 셋에 대해 한 번 학습을 완료한 상태


batch_size

메모리의 한계와 속도 저하 때문에 대부분의 경우에는 한 번의 epoch에서 모든 데이터를 한꺼번에 집어넣을 수는 없습니다. 그래서 데이터를 나누어서 주게 되는데 이때 몇 번 나누어서 주는가를 iteration, 각 iteration마다 주는 데이터 사이즈를 batch size라고 합니다.

출처: https://www.slideshare.net/w0ong/ss-82372826