## 預測類別(y=>客戶是否訂閱了定期存款？)

In [1]:
import numpy as np
import pandas as pd
import keras.backend as K
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

#參考文獻
#https://www.itread01.com/elpc.html
#https://www.796t.com/content/1541950927.html
#https://datascience.stackexchange.com/questions/45165/how-to-get-accuracy-f1-precision-and-recall-for-a-keras-model

In [2]:
from keras import backend as K

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    return true_positives / (predicted_positives + K.epsilon())

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [3]:
#Import data
data = pd.read_csv("C:\\bank-full.csv", sep=";")

In [4]:
#Preprocessing-Delete Data data
data = data.drop(["day", "month"], axis=1)

In [5]:
#Preprocessing-LabelEncoder
for col in data[["job", "marital", "education", "default", "housing", "loan", "contact", "poutcome", "y"]]:
    LE = LabelEncoder()
    data[col] = LE.fit_transform(data[col])

In [6]:
#Preprocessing-Split data
X = data.drop("y", axis=1)
y = data[["y"]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) #訓練資料(包含驗證資料)85%，測試資料15%

In [7]:
#Preprocessing-Standardization
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
#epochs=5, batch_size=100
model = Sequential()
model.add(Dense(input_dim=14, units=1))
model.add(Dense(20, activation="relu"))
model.add(Dense(1))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=[precision_m, recall_m, f1_m])

#訓練資料(含驗證資料)占資料集85%，驗證資料須占資料集15%，因此設定為15/85=17%(占85%裡的17%)
history = model.fit(X_train, y_train, validation_split=0.17, epochs=5, batch_size=100)

model.evaluate(X_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.3114885687828064,
 0.2012578696012497,
 0.07319368422031403,
 0.10063298046588898]

In [9]:
#epochs=10, batch_size=100
model = Sequential()
model.add(Dense(input_dim=14, units=1))
model.add(Dense(20, activation="relu"))
model.add(Dense(1))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=[precision_m, recall_m, f1_m])

#訓練資料(含驗證資料)占資料集85%，驗證資料須占資料集15%，因此設定為15/85=17%(占85%裡的17%)
history = model.fit(X_train, y_train, validation_split=0.17, epochs=10, batch_size=100)

model.evaluate(X_test, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.26497888565063477,
 0.25982704758644104,
 0.10810887813568115,
 0.14119061827659607]

In [10]:
#epochs=5, batch_size=50
model = Sequential()
model.add(Dense(input_dim=14, units=1))
model.add(Dense(20, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=[precision_m, recall_m, f1_m])

#訓練資料(含驗證資料)占資料集85%，驗證資料須占資料集15%，因此設定為15/85=17%(占85%裡的17%)
history = model.fit(X_train, y_train, validation_split=0.17, epochs=5, batch_size=50)

model.evaluate(X_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.25297310948371887,
 0.37932389974594116,
 0.1588669866323471,
 0.21030974388122559]

## 預測數值(age=>客戶年齡)

In [11]:
#Import data
data = pd.read_csv("C:\\bank-full.csv", sep=";")

In [12]:
#Preprocessing-Delete Data data
data = data.drop(["day", "month"], axis=1)

In [13]:
#Preprocessing-LabelEncoder
for col in data[["job", "marital", "education", "default", "housing", "loan", "contact", "poutcome", "y"]]:
    LE = LabelEncoder()
    data[col] = LE.fit_transform(data[col])

In [14]:
#Preprocessing-Split data
X = data.drop("age", axis=1)
y = data[["age"]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) #訓練資料(含驗證資料)85%，測試資料15%

In [15]:
#Preprocessing-Standardization
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [16]:
#epochs=5, batch_size=100
model = Sequential()
model.add(Dense(input_dim=14, units=1))
model.add(Dense(20, activation="relu"))
model.add(Dense(1))

model.compile(loss="mse", optimizer="adam", metrics=["MAE", "MAPE", "MSE"])

#訓練資料(含驗證資料)占資料集85%，驗證資料須占資料集15%，因此設定為15/85=17%(占85%裡的17%)
history = model.fit(X_train, y_train, validation_split=0.17, epochs=5, batch_size=100)

model.evaluate(X_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[184.5299072265625, 9.951435089111328, 24.50676155090332, 184.5299072265625]

In [17]:
#epochs=10, batch_size=100
model = Sequential()
model.add(Dense(input_dim=14, units=1))
model.add(Dense(20, activation="relu"))
model.add(Dense(1))

model.compile(loss="mse", optimizer="adam", metrics=["MAE", "MAPE", "MSE"])

#訓練資料(含驗證資料)占資料集85%，驗證資料須占資料集15%，因此設定為15/85=17%(占85%裡的17%)
history = model.fit(X_train, y_train, validation_split=0.17, epochs=10, batch_size=100)

model.evaluate(X_test, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[98.13658142089844, 7.969728946685791, 20.59163475036621, 98.13658142089844]

In [18]:
#epochs=5, batch_size=50
model = Sequential()
model.add(Dense(input_dim=14, units=1))
model.add(Dense(20, activation="relu"))
model.add(Dense(1))

model.compile(loss="mse", optimizer="adam", metrics=["MAE", "MAPE", "MSE"])

#訓練資料(含驗證資料)占資料集85%，驗證資料須占資料集15%，因此設定為15/85=17%(占85%裡的17%)
history = model.fit(X_train, y_train, validation_split=0.17, epochs=5, batch_size=50)

model.evaluate(X_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[104.40863800048828, 8.286643028259277, 21.362796783447266, 104.40863800048828]