In [130]:
from sklearn.metrics import f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np

In [131]:
#Constants
NUM_FOLDS = 5

In [132]:
# fetch dataset 
heart_disease = fetch_ucirepo(id=45) 

# data (as pandas dataframes) 
X = heart_disease.data.features 
y = heart_disease.data.targets
y = y.rename(columns={"num":"diagnosis"})

In [133]:
#Samples data so that there is an equal distribution of classes

# data = X.join(y)

# split_data = []
# for i in range(0,5):
#     dataset = data.loc[data["diagnosis"]==i]
#     split_data.append(dataset)

# min_sample = min([len(dataset) for dataset in split_data])

# split_samples = []
# for split in split_data:
#     sampled_split = split.sample(min_sample)
#     split_samples.append(sampled_split)
    
# data = pd.concat(split_samples, axis=0, ignore_index=True)
# X = data.iloc[:, :-1]
# y = data.iloc[:, -1:]


#Due to the low number of samples, a k-fold validation scheme will be used
fold_split = int(len(X)/NUM_FOLDS)

In [134]:
#Normalizes data
X = (X-X.mean())/X.std()

In [135]:
#Creates model
def neural_net(input_shape=13, output_shape=5):
    model = Sequential( [
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dense(128, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(output_shape, activation='softmax')
        ])
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['F1Score'])
    return model


In [136]:
f1scores = []
pre_training_f1scores = []

for i in range(0,NUM_FOLDS):
    print(f"Starting fold {i}...")
    fold_start = i*fold_split
    fold_end = fold_start + fold_split

    X_val = X[fold_start:fold_end]
    y_val = y[fold_start:fold_end]

    X_train = pd.concat([X[:fold_start], X[fold_end:]], axis=0, ignore_index=True)
    y_train = pd.concat([y[:fold_start], y[fold_end:]], axis=0, ignore_index = True)

    nn = neural_net()

    pre_training_pred = np.argmax(nn.predict(X_val), axis=1)
    pre_training_f1 = f1_score(y_val, pre_training_pred, average='weighted')
    pre_training_f1scores.append(pre_training_f1)
    print(f"Fold {i} pre-trainiing f1-score: {f1}")

    nn.fit(X_train, y_train, epochs = 100, batch_size = 10, verbose=0)

    y_pred = np.argmax(nn.predict(X_val), axis=1)
    f1 = f1_score(y_val, y_pred, average='weighted')
    f1scores.append(f1)
    print(f"Fold {i} f1-score: {f1}")

Starting fold 0...
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Fold 0 pre-trainiing f1-score: 0.3333333333333333


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Fold 0 f1-score: 0.4298245614035087
Starting fold 1...
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Fold 1 pre-trainiing f1-score: 0.4298245614035087


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Fold 1 f1-score: 0.352014652014652
Starting fold 2...
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Fold 2 pre-trainiing f1-score: 0.352014652014652


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Fold 2 f1-score: 0.40992907801418443
Starting fold 3...
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Fold 3 pre-trainiing f1-score: 0.40992907801418443


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Fold 3 f1-score: 0.39032258064516134
Starting fold 4...
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Fold 4 pre-trainiing f1-score: 0.39032258064516134


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Fold 4 f1-score: 0.3333333333333333


In [137]:
mean_pretraining_f1 = np.average(pre_training_f1scores)
print(f"Average pre-training f1-score: {mean_pretraining_f1}")

mean_f1 = np.average(f1scores)
print(f"Average f1-score: {mean_f1}")

Average pre-training f1-score: 0.1685142567841366
Average f1-score: 0.38308484108216795
