In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv1D, MaxPool1D
from keras.layers import Embedding, Dense, LSTM, Activation, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
import pandas as pd
import numpy as np
import  seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
ger=pd.read_table('./German/german.data', header=None,sep=' ')
colnames=['Status','Duration','History','Purpose','Amount','Savings','Employment','Installment%','Personal','Other','Residence','Property','Age','Plans','Housing','Existing','Job', 'People','Telephone','Foreign','Label']
ger.columns=colnames

In [4]:
ger_pre=ger.copy(deep=True)
ger_pre=ger_pre.drop(columns=['Label'])
numer_ger=StandardScaler().fit_transform(ger_pre.select_dtypes(include='int64'))
numer_ger=pd.DataFrame(numer_ger,columns = ger_pre.select_dtypes(include='number').columns)
#print(numer_ger)

cate_ger=pd.get_dummies(ger_pre.select_dtypes(exclude='int64'))


scale_ger=pd.concat([numer_ger, cate_ger, ger[['Label']]], axis=1)

In [5]:
from sklearn.ensemble import RandomForestClassifier
X, y = scale_ger.loc[:, np.delete(scale_ger.columns.values, 
                                        np.where(scale_ger.columns.values == ['Label']))], \
        scale_ger.loc[:, 'Label']

# train on randomForest to get important features
clf = RandomForestClassifier(n_estimators=100, max_depth=2,
                             random_state=0)
clf.fit(X, y) 

feature_importances = pd.DataFrame(sorted(zip(scale_ger.columns, clf.feature_importances_), key=lambda x: x[1] * -1),
                                    columns = ['feature','importance'])

In [8]:
top_features = feature_importances[feature_importances.importance>0]['feature'].values

X, y = scale_ger.loc[:,top_features], scale_ger.loc[:,'Label']

In [9]:
from imblearn.over_sampling import SMOTE

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0,stratify = y)
sm = SMOTE(random_state=0,sampling_strategy=1)
x_train_b, y_train_b = sm.fit_resample(x_train, y_train.ravel())

In [11]:
from scipy.stats import ks_2samp

In [12]:
def ks_stat(y, yhat):
    return ks_2samp(yhat[y==1], yhat[y!=1]).statistic

In [13]:
def type2_calcu(y,yhat):
    confusion =confusion_matrix(y,yhat)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    return FP / float(FP + TN)
def type1_calcu(y,yhat):
    confusion =confusion_matrix(y,yhat)
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    return FN / float(TP + FN)

In [14]:
def scores(model):
    train=[]
    test=[]
    y_train_c=model.predict(x_train)
    y_train_pre=model.predict_proba(x_train)
    y_pred_c=model.predict(x_test)
    y_pred = model.predict_proba(x_test)
    #roc
    train.append(roc_auc_score(y_train,y_train_pre))
    test.append(roc_auc_score(y_test, y_pred))
    #ks
    train.append(ks_stat(y_train,np.round(y_train_c[:,0],0)))
    test.append(ks_stat(y_test, np.round(y_pred_c[:,0],0)))
    #brier
    train.append(brier_score_loss(y_train,y_train_pre))
    test.append(brier_score_loss(y_test, y_pred))
    #acc
    train.append(accuracy_score(y_train,np.round(y_train_c,0)))
    test.append(accuracy_score(y_test, np.round(y_pred_c,0)))
    #t1
    train.append(type1_calcu(y_train,np.round(y_train_c,0)))
    test.append(type1_calcu(y_test, np.round(y_pred_c,0)))
    #t2
    train.append(type2_calcu(y_train,np.round(y_train_c,0)))
    test.append(type2_calcu(y_test, np.round(y_pred_c,0)))
    return train,test

In [15]:
print(x_train.shape)

(800, 52)


In [16]:
x_train=StandardScaler().fit_transform(x_train)
x_test=StandardScaler().fit_transform(x_test)

x_train_b=StandardScaler().fit_transform(x_train_b)

In [17]:
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

x_train_b = x_train_b.reshape(x_train_b.shape[0], x_train_b.shape[1], 1)

In [18]:
epochs = 20
model = Sequential()
model.add(Conv1D(32, 2, activation='relu',input_shape = x_train[0].shape))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv1D(64, 2, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

Metal device set to: Apple M1 Pro


2022-07-29 00:45:25.308740: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-29 00:45:25.309108: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 51, 32)            96        
_________________________________________________________________
batch_normalization (BatchNo (None, 51, 32)            128       
_________________________________________________________________
module_wrapper (ModuleWrappe (None, 51, 32)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 50, 64)            4160      
_________________________________________________________________
batch_normalization_1 (Batch (None, 50, 64)            256       
_________________________________________________________________
module_wrapper_1 (ModuleWrap (None, 50, 64)            0         
_________________________________________________________________
module_wrapper_2 (ModuleWrap (None, 3200)              0

In [20]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss = 'binary_crossentropy', metrics=['accuracy'])

In [21]:
history = model.fit(x_train, y_train, epochs=20, verbose=1)

2022-07-29 00:45:27.581759: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-07-29 00:45:27.584387: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/20


2022-07-29 00:45:27.954032: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [22]:
from sklearn.metrics import roc_auc_score, recall_score, precision_score,make_scorer,confusion_matrix,brier_score_loss,accuracy_score

In [23]:
print(scores(model))

2022-07-29 00:45:40.074096: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-07-29 00:45:40.219973: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


([0.5, 0.0, 0.7, 0.7, 1.0, 0.0], [0.5, 0.0, 0.7, 0.7, 1.0, 0.0])


In [27]:
model2 = Sequential()
model2.add(LSTM(32, input_shape=(1,52), activation='relu', return_sequences=True))
model2.add(Dropout(0.2))

model2.add(LSTM(64, activation='relu'))
model2.add(Dropout(0.5))

model2.add(Dense(64, activation='relu'))
model2.add(Dropout(0.5))

model2.add(Flatten())
model2.add(Dense(1, activation='sigmoid'))



In [25]:
x_train = x_train.reshape(x_train.shape[0], 1,x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0], 1,x_test.shape[1])

In [28]:
x_train.shape

(800, 1, 52)

In [29]:
model2.compile(optimizer=Adam(learning_rate=0.001), loss = 'binary_crossentropy', metrics=['accuracy'])

In [30]:
history=model2.fit(x_train, y_train, epochs=20)

Epoch 1/20


2022-07-29 00:46:11.370983: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [31]:
print(scores(model2))

2022-07-29 00:46:20.024195: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


([0.5, 0.0, 0.7, 0.7, 1.0, 0.0], [0.5, 0.0, 0.7, 0.7, 1.0, 0.0])


2022-07-29 00:46:20.490920: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


In [32]:
epochs = 20
model3 = Sequential()
model3.add(Conv1D(32, 2, activation='relu',input_shape = x_train_b[0].shape))
model3.add(BatchNormalization())
model3.add(Dropout(0.2))

model3.add(Conv1D(64, 2, activation='relu'))
model3.add(BatchNormalization())
model3.add(Dropout(0.5))

model3.add(Flatten())
model3.add(Dense(64, activation='relu'))
model3.add(Dropout(0.5))

model3.add(Dense(1, activation='sigmoid'))

In [33]:
x_train=x_train_b
y_train=y_train_b

In [34]:
model3.compile(optimizer=Adam(learning_rate=0.0001), loss = 'binary_crossentropy', metrics=['accuracy'])

In [35]:
history = model3.fit(x_train, y_train, epochs=20, verbose=1)

Epoch 1/20
 6/35 [====>.........................] - ETA: 0s - loss: 0.5735 - accuracy: 0.3490 

2022-07-29 00:46:36.435347: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [37]:
x_test = x_test.reshape(x_test.shape[0],x_test.shape[2],1)

In [38]:
print(scores(model3))

([0.5, 0.0, 0.5, 0.5, 1.0, 0.0], [0.5, 0.0, 0.7, 0.7, 1.0, 0.0])


2022-07-29 00:47:37.292553: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


In [39]:
model4 = Sequential()
model4.add(LSTM(32, input_shape=(1,52), activation='relu', return_sequences=True))
model4.add(Dropout(0.2))

model4.add(LSTM(64, activation='relu'))
model4.add(Dropout(0.5))

model4.add(Dense(64, activation='relu'))
model4.add(Dropout(0.5))

model4.add(Flatten())
model4.add(Dense(1, activation='sigmoid'))



In [40]:
x_train_b = x_train_b.reshape(x_train_b.shape[0],1,x_train_b.shape[1])

In [41]:
x_train=x_train_b
y_train=y_train_b

In [42]:
model4.compile(optimizer=Adam(learning_rate=0.001), loss = 'binary_crossentropy', metrics=['accuracy'])

In [43]:
history=model4.fit(x_train_b, y_train_b, epochs=20)

Epoch 1/20


2022-07-29 00:48:01.789157: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [44]:
x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1])

In [45]:
print(scores(model4))

2022-07-29 00:48:43.864156: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


([0.5, 0.0, 0.5, 0.5, 1.0, 0.0], [0.5, 0.0, 0.7, 0.7, 1.0, 0.0])


2022-07-29 00:48:44.384313: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
