In [43]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [44]:
# Focal Loss Function
def focal_loss(gamma=3.0, alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon)
        focal_loss = -y_true * (alpha * tf.math.pow(1 - y_pred, gamma) * tf.math.log(y_pred))
        return tf.reduce_sum(focal_loss, axis=-1)
    return focal_loss_fixed

In [45]:
# Define the MLP model with ReLU activation
def create_mlp_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [46]:
num_classes = 2

# Load dataset hasil oversampling ADASYN_Extreme
file_path_oversampled_extreme = 'D:\Tugas Akhir\Stroke\data_oversampled_extreme.csv'
df_oversampled_extreme = pd.read_csv(file_path_oversampled_extreme)

# Ganti koma dengan titik dan ubah tipe data ke float32 untuk kolom tertentu
columns_to_convert = ['age', 'avg_glucose_level', 'bmi']
df_oversampled_extreme[columns_to_convert] = df_oversampled_extreme[columns_to_convert].replace(',', '.', regex=True).astype('float32')

# Pisahkan fitur dan target untuk dataset hasil oversampling ADASYN_Extreme
X_oversampled_extreme = df_oversampled_extreme.drop('stroke', axis=1)
y_oversampled_extreme = df_oversampled_extreme['stroke']

# Mengonversi target menjadi one-hot encoding untuk ADASYN_Extreme
y_oversampled_one_hot_extreme = tf.keras.utils.to_categorical(y_oversampled_extreme, num_classes)

# Pastikan tipe data float32 untuk fitur
X_oversampled_extreme = X_oversampled_extreme.astype('float32')


# Load dataset hasil PCA-KMeans_Extreme
file_path_pca_kmeans_extreme = 'D:/Tugas Akhir/Stroke/data_hasil_nearmiss_extreme.csv'
df_pca_kmeans_extreme = pd.read_csv(file_path_pca_kmeans_extreme)

# Pisahkan fitur dan target untuk dataset hasil PCA-KMeans_Extreme
X_pca_kmeans_extreme = df_pca_kmeans_extreme.drop(['stroke', 'Cluster'], axis=1)
y_pca_kmeans_extreme = df_pca_kmeans_extreme['stroke']

# Mengonversi target menjadi one-hot encoding untuk PCA-KMeans_Extreme
y_pca_kmeans_one_hot_extreme = tf.keras.utils.to_categorical(y_pca_kmeans_extreme, num_classes)

# Pastikan tipe data float32 untuk fitur
X_pca_kmeans_extreme = X_pca_kmeans_extreme.astype('float32')

In [50]:
df_oversampled_extreme

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type_Govt_job,work_type_Private,work_type_Self-employed,Residence_type,avg_glucose_level,bmi,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,stroke
0,0,0.7368,0,1,1,0,1,0,0,0.8012,0.5227,0,1,0,0,1
1,1,0.9474,1,0,1,0,0,1,1,0.5491,0.2624,0,0,1,0,1
2,0,0.8596,1,1,1,0,1,0,1,0.0687,0.3326,0,0,1,0,1
3,1,0.9825,1,0,1,0,1,0,1,0.1164,0.3802,0,0,1,0,1
4,1,0.4386,1,0,1,0,0,1,1,0.5182,0.4050,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5726,0,0.9322,0,0,1,0,0,1,1,0.5251,0.3395,0,0,0,0,1
5727,0,0.9124,0,0,1,0,0,1,0,0.4925,0.4394,0,0,0,0,1
5728,0,0.9134,0,0,1,0,0,1,0,0.4806,0.4331,0,0,0,0,1
5729,0,0.9311,0,0,1,0,0,1,1,0.5234,0.3450,0,0,0,0,1


In [51]:
df_pca_kmeans_extreme

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,stroke,Cluster
0,0.0454,1.4776,0.2833,-0.4726,1.1645,-1.4793,0.0496,-0.0434,-1.2224,0.8218,-0.2692,-0.5128,0.2727,0,1
1,-0.5580,-0.5396,1.6776,-0.3919,0.1744,0.0910,0.0103,-0.0305,-1.1521,0.2849,-0.5984,-0.1249,0.6249,0,0
2,0.3382,1.5483,0.3136,-0.6456,1.0693,-1.5518,-0.2368,-0.2153,-1.3136,0.8603,-0.1137,-0.2689,0.9716,0,1
3,-0.3545,-0.4968,1.7308,-0.5302,0.0238,0.0279,-0.4167,-0.0653,-1.1065,0.5004,-0.4960,-0.7621,0.6737,0,3
4,-0.4800,-0.5194,1.7212,-0.4384,0.0940,0.0782,-0.2168,-0.0025,-1.0881,0.4419,-0.5802,-0.7045,0.4531,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,1.6072,-1.4407,0.0553,-0.7620,-1.5520,-0.8354,0.2250,1.1951,2.2515,-0.9046,0.2886,-0.3301,1.9275,1,3
72,-0.0660,-0.4206,2.0552,0.1762,-0.4346,0.1627,0.9061,0.2003,1.5649,1.5725,2.5932,1.1732,2.0806,1,4
73,-0.0451,1.4449,0.3273,-1.2067,0.8030,2.3934,1.0336,0.0741,-1.1068,0.8241,-1.0349,-1.0975,0.7019,1,4
74,2.5613,3.5270,2.6722,1.1776,0.9326,0.8750,0.2209,0.1416,1.4190,2.9943,-0.5556,0.7633,-1.5516,1,1


In [52]:

# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df_oversampled_extreme['stroke'].value_counts()
# Tampilkan output
print("Data Pada ADASYN Extreme 'stroke':")
print(stroke_counts)

# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df_pca_kmeans_extreme['stroke'].value_counts()
# Tampilkan output
print("Data Pada PCA-KMeans Extreme 'stroke':")
print(stroke_counts)

Data Pada Normalisasi 'stroke':
stroke
0    3481
1      30
Name: count, dtype: int64
Data Pada ADASYN Extreme 'stroke':
stroke
0    3481
1    2250
Name: count, dtype: int64
Data Pada PCA-KMeans Extreme 'stroke':
stroke
0    46
1    30
Name: count, dtype: int64


In [41]:
# Pisahkan data ADASYN menjadi set pelatihan dan pengujian
X_train_oversampled_extreme, X_test_oversampled_extreme, y_train_oversampled_extreme, y_test_oversampled_extreme = train_test_split(X_oversampled_extreme, y_oversampled_extreme, test_size=0.2, random_state=42)

# Pisahkan data PCA-KMeans menjadi set pelatihan dan pengujian
X_train_pca_kmeans_extreme, X_test_pca_kmeans_extreme, y_train_pca_kmeans_extreme, y_test_pca_kmeans_extreme = train_test_split(X_pca_kmeans_extreme, y_pca_kmeans_extreme, test_size=0.2, random_state=42)


In [42]:
import numpy as np
# Menghitung jumlah kelas 0 dan 1 pada ADASYN (data latih)
unique_oversampled_train, counts_oversampled_train = np.unique(y_train_oversampled_extreme, return_counts=True)
num_class_0_oversampled_train = counts_oversampled_train[unique_oversampled_train == 0][0]
num_class_1_oversampled_train = counts_oversampled_train[unique_oversampled_train == 1][0]

# Menghitung jumlah kelas 0 dan 1 pada ADASYN (data uji)
unique_oversampled_test, counts_oversampled_test = np.unique(y_test_oversampled_extreme, return_counts=True)
num_class_0_oversampled_test = counts_oversampled_test[unique_oversampled_test == 0][0]
num_class_1_oversampled_test = counts_oversampled_test[unique_oversampled_test == 1][0]

print("Data Latih ADASYN Extreme:")
print(f"Jumlah data latih: {len(y_train_oversampled_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_oversampled_train}")
print(f"Jumlah kelas 1: {num_class_1_oversampled_train}")
print("\nData Uji ADASYN Extreme:")
print(f"Jumlah data uji: {len(y_test_oversampled_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_oversampled_test}")
print(f"Jumlah kelas 1: {num_class_1_oversampled_test}")
print("===========================================")

# Menghitung jumlah kelas 0 dan 1 pada PCA-KMeans (data latih)
unique_pca_kmeans_train, counts_pca_kmeans_train = np.unique(y_train_pca_kmeans_extreme, return_counts=True)
num_class_0_pca_kmeans_train = counts_pca_kmeans_train[unique_pca_kmeans_train == 0][0]
num_class_1_pca_kmeans_train = counts_pca_kmeans_train[unique_pca_kmeans_train == 1][0]

# Menghitung jumlah kelas 0 dan 1 pada PCA-KMeans (data uji)
unique_pca_kmeans_test, counts_pca_kmeans_test = np.unique(y_test_pca_kmeans_extreme, return_counts=True)
num_class_0_pca_kmeans_test = counts_pca_kmeans_test[unique_pca_kmeans_test == 0][0]
num_class_1_pca_kmeans_test = counts_pca_kmeans_test[unique_pca_kmeans_test == 1][0]

print("Data Latih PCA-KMeans Extreme:")
print(f"Jumlah data latih: {len(y_train_pca_kmeans_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_pca_kmeans_train}")
print(f"Jumlah kelas 1: {num_class_1_pca_kmeans_train}")
print("\nData Uji PCA-KMeans Extreme:")
print(f"Jumlah data uji: {len(y_test_pca_kmeans_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_pca_kmeans_test}")
print(f"Jumlah kelas 1: {num_class_1_pca_kmeans_test}")
print("===========================================")

Data Latih ADASYN Extreme:
Jumlah data latih: 4584
Jumlah kelas 0: 2782
Jumlah kelas 1: 1802

Data Uji ADASYN Extreme:
Jumlah data uji: 1147
Jumlah kelas 0: 699
Jumlah kelas 1: 448
Data Latih PCA-KMeans Extreme:
Jumlah data latih: 60
Jumlah kelas 0: 37
Jumlah kelas 1: 23

Data Uji PCA-KMeans Extreme:
Jumlah data uji: 16
Jumlah kelas 0: 9
Jumlah kelas 1: 7


In [24]:
# Menetapkan bentuk input dan jumlah kelas berdasarkan fitur dan target untuk ADASYN
input_shape_adasyn = X_train_oversampled_extreme.shape[1:]
num_classes_adasyn = y_oversampled_one_hot_extreme.shape[1]  # Disesuaikan untuk mendapatkan jumlah kelas secara dinamis
y_train_oversampled_one_hot = tf.keras.utils.to_categorical(y_train_oversampled_extreme, num_classes_adasyn)
y_test_oversampled_one_hot = tf.keras.utils.to_categorical(y_test_oversampled_extreme, num_classes_adasyn)

# Menetapkan bentuk input dan jumlah kelas berdasarkan fitur dan target untuk PCA-KMeans
input_shape_pca_kmeans = X_train_pca_kmeans_extreme.shape[1:]
num_classes_pca_kmeans = y_pca_kmeans_one_hot_extreme.shape[1]  # Disesuaikan untuk mendapatkan jumlah kelas secara dinamis
y_train_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_train_pca_kmeans_extreme, num_classes_pca_kmeans)
y_test_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_test_pca_kmeans_extreme, num_classes_pca_kmeans)

In [25]:
# Create the MLP model for ADASYN
model_adasyn = create_mlp_model(input_shape_adasyn, num_classes_adasyn)

# Compile the model with Focal Loss
model_adasyn.compile(optimizer='adam',
                     loss=focal_loss(),
                     metrics=['accuracy'])

# Create the MLP model for PCA-KMeans
model_pca_kmeans = create_mlp_model(input_shape_pca_kmeans, num_classes_pca_kmeans)

# Compile the model with Focal Loss
model_pca_kmeans.compile(optimizer='adam',
                         loss=focal_loss(),
                         metrics=['accuracy'])


In [26]:
# Calculate class weights for ADASYN
class_weights_adasyn = {0: num_class_1_oversampled_train / num_class_0_oversampled_train, 1: 1.0}

# Print model summary for ADASYN
model_adasyn.summary()

# Train the model for ADASYN with class weights
history_adasyn = model_adasyn.fit(X_train_oversampled_extreme, y_train_oversampled_one_hot, epochs=50, batch_size=32,
                                  validation_split=0.2, class_weight=class_weights_adasyn)
# Evaluate the model on the test set for ADASYN
test_loss_adasyn, test_acc_adasyn = model_adasyn.evaluate(X_test_oversampled_extreme, y_test_oversampled_one_hot)
print(f'Test Loss (ADASYN) Extreme: {test_loss_adasyn}, Test Accuracy (ADASYN) Extreme: {test_acc_adasyn}')

# Predict classes for the test set for ADASYN
y_pred_probabilities_adasyn = model_adasyn.predict(X_test_oversampled_extreme)
y_pred_adasyn = y_pred_probabilities_adasyn.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values for ADASYN
y_true_adasyn = np.array(y_test_oversampled_extreme)

# Calculate precision, recall, and F1-score for ADASYN
report_adasyn = classification_report(y_true_adasyn, y_pred_adasyn)
print("Classification Report (ADASYN) Extreme:\n", report_adasyn)

# Confusion Matrix for ADASYN
conf_matrix_adasyn = confusion_matrix(y_true_adasyn, y_pred_adasyn)
print("Confusion Matrix (ADASYN) Extreme:\n", conf_matrix_adasyn)


# Calculate class weights for PCA-KMeans
class_weights_pca_kmeans = {0: num_class_1_pca_kmeans_train / num_class_0_pca_kmeans_train, 1: 1.0}

# Print model summary for PCA-KMeans
model_pca_kmeans.summary()

# Train the model for PCA-KMeans with class weights
history_pca_kmeans = model_pca_kmeans.fit(X_train_pca_kmeans_extreme, y_train_pca_kmeans_one_hot, epochs=50, batch_size=32,
                                          validation_split=0.2, class_weight=class_weights_pca_kmeans)

# Evaluate the model on the test set for PCA-KMeans
test_loss_pca_kmeans, test_acc_pca_kmeans = model_pca_kmeans.evaluate(X_test_pca_kmeans_extreme, y_test_pca_kmeans_one_hot)
print(f'Test Loss (PCA-KMeans) Extreme: {test_loss_pca_kmeans}, Test Accuracy (PCA-KMeans) Extreme: {test_acc_pca_kmeans}')

# Predict classes for the test set for PCA-KMeans
y_pred_probabilities_pca_kmeans = model_pca_kmeans.predict(X_test_pca_kmeans_extreme)
y_pred_pca_kmeans = y_pred_probabilities_pca_kmeans.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values for PCA-KMeans
y_true_pca_kmeans = y_test_pca_kmeans_extreme.to_numpy()

# Calculate precision, recall, and F1-score for PCA-KMeans
report_pca_kmeans = classification_report(y_true_pca_kmeans, y_pred_pca_kmeans)
print("Classification Report (PCA-KMeans) Extreme:\n", report_pca_kmeans)

# Confusion Matrix for PCA-KMeans
conf_matrix_pca_kmeans = confusion_matrix(y_true_pca_kmeans, y_pred_pca_kmeans)
print("Confusion Matrix (PCA-KMeans) Extreme:\n", conf_matrix_pca_kmeans)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 15)                0         
                                                                 
 dense_6 (Dense)             (None, 128)               2048      
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dense_8 (Dense)             (None, 2)                 130       
                                                                 
Total params: 10434 (40.76 KB)
Trainable params: 10434 (40.76 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epo