In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.models import Model
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import plot_roc_curve, roc_curve, auc, roc_auc_score
from scipy import interp
from sklearn.metrics import confusion_matrix, average_precision_score
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold, GridSearchCV, KFold, train_test_split
from sklearn.utils import shuffle
from tensorflow.keras.initializers import HeNormal, Constant
import shutil
import matplotlib.pyplot as plt
%matplotlib inline
from pprint import pprint

df = pd.read_csv(r"Data.csv", sep=";")
y = df["Target"]
X = df.iloc[:, 1:-1]
# print(X)

X, y = shuffle(X, y, random_state=0)

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

# Final test set
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, test_size=0.30, random_state=1)
X_train_val, X_test_val, y_train_val, y_test_val = train_test_split(X_train, 
                                                    y_train, test_size=0.30, random_state=1)
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
X_train_val = X_train_val.astype(np.float32)
X_test_val = X_test_val.astype(np.float32)
y_train_val = y_train_val.astype(np.float32)
y_test_val = y_test_val.astype(np.float32)

y_train = np.array(y_train).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)
y_train_val = np.array(y_train_val).reshape(-1,1)
y_test_val = np.array(y_test_val).reshape(-1,1)




## Training data

In [None]:
# Best parameters = {'batch_size': 128, 'dropout_rate': 0.0, 'learning_rate': 0.001, 'n_hidden': 3}

def build_model(num_features: int, 
                learning_rate: float,
                n_hidden: int,
                batch_size: int,
                dropout_rate: float) -> Model:
    
    init_w = tf.keras.initializers.HeNormal()
    init_b = tf.keras.initializers.Constant(value=0.0)
    
    np.random.seed(0)
    

    model = Sequential()
    model.add(Dense(units=1_000, kernel_initializer=init_w, bias_initializer=init_b, input_shape=(num_features,)))
    model.add(Dropout(dropout_rate))
    model.add(Activation("elu"))
    for i in range(0, n_hidden):
        model.add(Dropout(dropout_rate))
        model.add(Dense(units=1_000-i*250, kernel_initializer=init_w, bias_initializer=init_b))
        model.add(Dropout(dropout_rate))
        model.add(Activation("elu"))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=num_targets, kernel_initializer=init_w, bias_initializer=init_b))
    model.add(Dropout(dropout_rate))
    model.add(Activation("sigmoid"))    
    # model.summary()
    
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(learning_rate=learning_rate),
        metrics=["accuracy"]
    )    
    
    return model

num_features = X_train.shape[1]
num_targets = 1 
epochs = 50

for idx, comb in enumerate(grid):
    param_grid = {
        'n_hidden': [3],
        'learning_rate': [0.001],
        'dropout_rate': [0.0],
        'batch_size' : [128]
    }

    results = {
        "best_score": -np.inf,
        "best_params": {},
        "val_scores": [],
        "params": []
    }

    grid = ParameterGrid(param_grid)
    print(f"Parameter combinations in total: {len(grid)}")
    pprint(param_grid) 

    model = build_model(
        num_features, 
        **comb)

    ANN = model.fit(
        X_train_val,
        y_train_val,
        epochs=epochs,
        batch_size=batch_size,
        verbose=1,
    )
    #       berechnet die Accuracy
    scores = model.evaluate(
        X_train_val,
        y_train_val,
        verbose=1
        )

    results["val_scores"].append(scores)
    results["params"].append(comb)
    print(f"Accuracy = {scores}")

ypred = model.predict(X_test_val) 
ypred = tf.math.round(ypred)

print(classification_report(y_test_val, ypred, digits=3))
print(confusion_matrix(y_test_val,ypred))
print(f"Average precision score {average_precision_score(y_test_val, ypred):.3f}")

## Test data

In [None]:
# Parameter initialization

num_features = X_train.shape[1]
num_targets = 1 

param_grid = {
    'n_hidden': [3],
    'learning_rate': [0.001],
    'dropout_rate': [0],
    'batch_size': [128]
}

results = {
    "best_score": -np.inf,
    "best_params": {},
    "val_scores": [],
    "params": []
}

grid = ParameterGrid(param_grid)
print(f"Parameter combinations in total: {len(grid)}")
pprint(param_grid) 

for idx, comb in enumerate(grid):
    model = build_model(
        num_features, 
        **comb)

    ANN = model.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        verbose=1,
    )
#       berechnet die Accuracy
    scores = model.evaluate(
        X_train,
        y_train,
        verbose=1
        )

    results["val_scores"].append(scores)
    results["params"].append(comb)
    print(f"Accuracy = {scores}")

ypred = model.predict(X_test) 
ypred = tf.math.round(ypred)

print(classification_report(y_test, ypred, digits=3))
print(confusion_matrix(y_test,ypred))
print(f"Average precision score {average_precision_score(y_test, ypred):.3f}")

## Cross Validation

In [None]:
## ROC Curve 

   
from sklearn.metrics import plot_roc_curve, roc_curve, auc, roc_auc_score
init_w = tf.keras.initializers.HeNormal()
init_b = tf.keras.initializers.Constant(value=0.0)
num_features = X_train.shape[1]
num_targets = 1 
learning_rate = 0.001
n_hidden = 3
batch_size = 128
epochs = 50
dropout_rate = 0.0
no_splits = 5

cv = StratifiedKFold(n_splits=no_splits)
results = np.zeros_like(y_train, dtype=np.float32)


prbs = []
tprs = []
fprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)


fig, ax = plt.subplots()
    
j = 0
for train_val_idx, test_val_idx in cv.split(X_train_val, y_train_val):
    while j < no_splits: 
        print(">>")
        print(f"Fold = {j}")
        model = Sequential()
        model.add(Dense(units=1_000, kernel_initializer=init_w, bias_initializer=init_b, 
                        input_shape=(num_features,)))
        model.add(Dropout(dropout_rate))
        model.add(Activation("elu"))
        for i in range(0, n_hidden):
            model.add(Dropout(dropout_rate))
            model.add(Dense(units=1_000-i*250, kernel_initializer=init_w, bias_initializer=init_b))
            model.add(Dropout(dropout_rate))
            model.add(Activation("elu"))
        model.add(Dropout(dropout_rate))
        model.add(Dense(units=num_targets, kernel_initializer=init_w, bias_initializer=init_b))
        model.add(Dropout(dropout_rate))
        model.add(Activation("sigmoid"))    
        model.summary()

        model.compile(
            loss="binary_crossentropy",
            optimizer=Adam(learning_rate=learning_rate),
            metrics=["accuracy"]
        )

        # Fit the model
        model.fit(X[train_idx,:], y.iloc[train_idx], epochs=epochs, 
                  batch_size=batch_size, verbose=1)

        probas_ = model.predict(X[test_idx,:])
        prbs.append(probas_)
        fpr, tpr, thresholds = roc_curve(y.iloc[test_idx], probas_[ :])

        interp_tpr = np.interp(mean_fpr, fpr, tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        auc_ = auc(fpr, tpr)
        aucs.append(auc_)

        plt.plot(fpr, tpr,
            label=r'ROC fold %i (AUC = %0.3f )' % (j, auc_),
            lw=1, alpha=.3)        
        j += 1

# Average the predictions        
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc =np.std(aucs)

ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8)


std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)

ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
                 label=r'$\pm$ 1 std. dev.')
ax.plot(mean_fpr, mean_tpr, color='b',
        label=r'Mean ROC (AUC = %0.3f $\pm$ %0.3f)' % (mean_auc, std_auc),
        lw=2, alpha=.8)

ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
       title="e) Receiver operating characteristic DNN")
ax.set_xlabel('False Positive Rate (Positive label: 1)')
ax.set_ylabel('True Positive Rate (Positive label: 1)')

ax.legend(loc="lower right")

plt.savefig("ann.pdf", dpi=600,transparent=True)
plt.savefig("ann.jpg", dpi=600,transparent=True)
plt.show()


## Y-Randomisierung

In [None]:
from sklearn.metrics import average_precision_score
accur = []
for i in range(1,100):
    y_rand = shuffle(y)
    X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y_rand, test_size=0.30, random_state=1)
    model.fit(X_train, y_train)
    ypred = model.predict(X_test)
    ypred = tf.round(ypred)
    accur.append(average_precision_score(y_test, ypred))
    print(classification_report(y_test,ypred,digits=3,zero_division=1 ))
    print(confusion_matrix(y_test,ypred))
print(f"Mean f1 score = {np.mean(accur):.3f}, std {np.std(accur):.3f}")