In [29]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [30]:
# Focal Loss Function
def focal_loss(gamma=3.0, alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon)
        focal_loss = -y_true * (alpha * tf.math.pow(1 - y_pred, gamma) * tf.math.log(y_pred))
        return tf.reduce_sum(focal_loss, axis=-1)
    return focal_loss_fixed

In [31]:
# Define the MLP model with ReLU activation
def create_mlp_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [32]:
num_classes = 2

# Load dataset hasil oversampling ADASYN_Extreme
file_path_oversampled_extreme = 'D:\Tugas Akhir\Stroke\data_oversampled_extreme.csv'
df_oversampled_extreme = pd.read_csv(file_path_oversampled_extreme)

# Ganti koma dengan titik dan ubah tipe data ke float32 untuk kolom tertentu
columns_to_convert = ['age', 'avg_glucose_level', 'bmi']
df_oversampled_extreme[columns_to_convert] = df_oversampled_extreme[columns_to_convert].replace(',', '.', regex=True).astype('float32')

# Pisahkan fitur dan target untuk dataset hasil oversampling ADASYN_Extreme
X_oversampled_extreme = df_oversampled_extreme.drop('stroke', axis=1)
y_oversampled_extreme = df_oversampled_extreme['stroke']

# Mengonversi target menjadi one-hot encoding untuk ADASYN_Extreme
y_oversampled_one_hot_extreme = tf.keras.utils.to_categorical(y_oversampled_extreme, num_classes)

# Pastikan tipe data float32 untuk fitur
X_oversampled_extreme = X_oversampled_extreme.astype('float32')


# Load dataset hasil PCA-KMeans_Extreme
file_path_pca_kmeans_extreme = 'D:/Tugas Akhir/Stroke/data_hasil_nearmiss_extreme.csv'
df_pca_kmeans_extreme = pd.read_csv(file_path_pca_kmeans_extreme)

# Pisahkan fitur dan target untuk dataset hasil PCA-KMeans_Extreme
X_pca_kmeans_extreme = df_pca_kmeans_extreme.drop(['stroke', 'Cluster'], axis=1)
y_pca_kmeans_extreme = df_pca_kmeans_extreme['stroke']

# Mengonversi target menjadi one-hot encoding untuk PCA-KMeans_Extreme
y_pca_kmeans_one_hot_extreme = tf.keras.utils.to_categorical(y_pca_kmeans_extreme, num_classes)

# Pastikan tipe data float32 untuk fitur
X_pca_kmeans_extreme = X_pca_kmeans_extreme.astype('float32')

In [33]:
df_oversampled_extreme

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type_Govt_job,work_type_Private,work_type_Self-employed,Residence_type,avg_glucose_level,bmi,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,stroke
0,0,0.7368,0,1,1,0,1,0,0,0.8012,0.5227,0,1,0,0,1
1,1,0.9474,1,0,1,0,0,1,1,0.5491,0.2624,0,0,1,0,1
2,0,0.8596,1,1,1,0,1,0,1,0.0687,0.3326,0,0,1,0,1
3,1,0.9825,1,0,1,0,1,0,1,0.1164,0.3802,0,0,1,0,1
4,1,0.4386,1,0,1,0,0,1,1,0.5182,0.4050,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5726,0,0.9322,0,0,1,0,0,1,1,0.5251,0.3395,0,0,0,0,1
5727,0,0.9124,0,0,1,0,0,1,0,0.4925,0.4394,0,0,0,0,1
5728,0,0.9134,0,0,1,0,0,1,0,0.4806,0.4331,0,0,0,0,1
5729,0,0.9311,0,0,1,0,0,1,1,0.5234,0.3450,0,0,0,0,1


In [34]:
df_pca_kmeans_extreme

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,stroke,Cluster
0,0.1017,1.8351,0.1562,-0.2998,-1.4598,-1.1414,-1.0581,0.4803,-0.5821,-0.6871,0.1376,-1.2289,-0.0697,0,1
1,0.5088,2.0720,0.3820,-0.2467,-1.5627,-0.8683,-0.6548,0.3571,-0.7074,-0.7990,0.0082,-0.5304,0.9272,0,5
2,0.2754,1.8993,0.1897,-0.3529,-1.4865,-1.1165,-0.9765,0.3561,-0.6710,-0.7826,0.1997,-0.6809,0.5477,0,4
3,-0.1028,-0.4707,1.5459,-0.6289,0.2972,-0.2778,-0.6863,-1.5858,0.2950,0.6064,-0.2588,-0.6498,1.1997,0,3
4,0.0499,1.9262,0.3027,-0.0547,-1.5671,-0.8605,-0.6120,0.6701,-0.5170,-0.6790,-0.1828,-1.4858,-0.4539,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,0.8354,-1.0846,-2.2162,-0.7739,-0.8871,1.8987,-1.0936,-0.2023,-1.1913,0.9801,0.2443,-0.5530,0.3005,1,3
72,2.9074,-0.4864,1.3193,1.4171,-0.5581,2.0166,-3.1483,1.5489,0.3570,0.7514,-2.2940,-0.0458,-0.4782,1,5
73,2.0769,0.2358,-0.5899,1.4076,-2.9188,0.6616,1.1090,0.2995,-1.1977,0.9873,-1.2534,-0.6970,0.3167,1,5
74,1.8260,-1.6206,0.4460,-0.9911,1.0541,-0.8300,0.9519,-1.0016,0.3297,-0.3870,-0.4687,0.7926,0.8552,1,4


In [35]:
# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df_oversampled_extreme['stroke'].value_counts()
# Tampilkan output
print("Data Pada ADASYN 'stroke':")
print(stroke_counts)

# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df_pca_kmeans_extreme['stroke'].value_counts()
# Tampilkan output
print("Data Pada PCA-KMeans 'stroke':")
print(stroke_counts)

Data Pada ADASYN 'stroke':
stroke
0    3481
1    2250
Name: count, dtype: int64
Data Pada PCA-KMeans 'stroke':
stroke
0    46
1    30
Name: count, dtype: int64


In [36]:
# Pisahkan data ADASYN menjadi set pelatihan dan pengujian
X_train_oversampled_extreme, X_test_oversampled_extreme, y_train_oversampled_extreme, y_test_oversampled_extreme = train_test_split(X_oversampled_extreme, y_oversampled_extreme, test_size=0.2, random_state=42)

# Pisahkan data PCA-KMeans menjadi set pelatihan dan pengujian
X_train_pca_kmeans_extreme, X_test_pca_kmeans_extreme, y_train_pca_kmeans_extreme, y_test_pca_kmeans_extreme = train_test_split(X_pca_kmeans_extreme, y_pca_kmeans_extreme, test_size=0.2, random_state=42)


In [37]:
import numpy as np
# Menghitung jumlah kelas 0 dan 1 pada ADASYN (data latih)
unique_oversampled_train, counts_oversampled_train = np.unique(y_train_oversampled_extreme, return_counts=True)
num_class_0_oversampled_train = counts_oversampled_train[unique_oversampled_train == 0][0]
num_class_1_oversampled_train = counts_oversampled_train[unique_oversampled_train == 1][0]

# Menghitung jumlah kelas 0 dan 1 pada ADASYN (data uji)
unique_oversampled_test, counts_oversampled_test = np.unique(y_test_oversampled_extreme, return_counts=True)
num_class_0_oversampled_test = counts_oversampled_test[unique_oversampled_test == 0][0]
num_class_1_oversampled_test = counts_oversampled_test[unique_oversampled_test == 1][0]

print("Data Latih ADASYN Extreme:")
print(f"Jumlah data latih: {len(y_train_oversampled_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_oversampled_train}")
print(f"Jumlah kelas 1: {num_class_1_oversampled_train}")
print("\nData Uji ADASYN Extreme:")
print(f"Jumlah data uji: {len(y_test_oversampled_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_oversampled_test}")
print(f"Jumlah kelas 1: {num_class_1_oversampled_test}")
print("===========================================")

# Menghitung jumlah kelas 0 dan 1 pada PCA-KMeans (data latih)
unique_pca_kmeans_train, counts_pca_kmeans_train = np.unique(y_train_pca_kmeans_extreme, return_counts=True)
num_class_0_pca_kmeans_train = counts_pca_kmeans_train[unique_pca_kmeans_train == 0][0]
num_class_1_pca_kmeans_train = counts_pca_kmeans_train[unique_pca_kmeans_train == 1][0]

# Menghitung jumlah kelas 0 dan 1 pada PCA-KMeans (data uji)
unique_pca_kmeans_test, counts_pca_kmeans_test = np.unique(y_test_pca_kmeans_extreme, return_counts=True)
num_class_0_pca_kmeans_test = counts_pca_kmeans_test[unique_pca_kmeans_test == 0][0]
num_class_1_pca_kmeans_test = counts_pca_kmeans_test[unique_pca_kmeans_test == 1][0]

print("Data Latih PCA-KMeans Extreme:")
print(f"Jumlah data latih: {len(y_train_pca_kmeans_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_pca_kmeans_train}")
print(f"Jumlah kelas 1: {num_class_1_pca_kmeans_train}")
print("\nData Uji PCA-KMeans Extreme:")
print(f"Jumlah data uji: {len(y_test_pca_kmeans_extreme)}")
print(f"Jumlah kelas 0: {num_class_0_pca_kmeans_test}")
print(f"Jumlah kelas 1: {num_class_1_pca_kmeans_test}")
print("===========================================")

Data Latih ADASYN Extreme:
Jumlah data latih: 4584
Jumlah kelas 0: 2782
Jumlah kelas 1: 1802

Data Uji ADASYN Extreme:
Jumlah data uji: 1147
Jumlah kelas 0: 699
Jumlah kelas 1: 448
Data Latih PCA-KMeans Extreme:
Jumlah data latih: 60
Jumlah kelas 0: 37
Jumlah kelas 1: 23

Data Uji PCA-KMeans Extreme:
Jumlah data uji: 16
Jumlah kelas 0: 9
Jumlah kelas 1: 7


In [38]:
# Menetapkan bentuk input dan jumlah kelas berdasarkan fitur dan target untuk ADASYN
input_shape_adasyn = X_train_oversampled_extreme.shape[1:]
num_classes_adasyn = y_oversampled_one_hot_extreme.shape[1]  # Disesuaikan untuk mendapatkan jumlah kelas secara dinamis
y_train_oversampled_one_hot = tf.keras.utils.to_categorical(y_train_oversampled_extreme, num_classes_adasyn)
y_test_oversampled_one_hot = tf.keras.utils.to_categorical(y_test_oversampled_extreme, num_classes_adasyn)

# Menetapkan bentuk input dan jumlah kelas berdasarkan fitur dan target untuk PCA-KMeans
input_shape_pca_kmeans = X_train_pca_kmeans_extreme.shape[1:]
num_classes_pca_kmeans = y_pca_kmeans_one_hot_extreme.shape[1]  # Disesuaikan untuk mendapatkan jumlah kelas secara dinamis
y_train_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_train_pca_kmeans_extreme, num_classes_pca_kmeans)
y_test_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_test_pca_kmeans_extreme, num_classes_pca_kmeans)

In [39]:
# Create the MLP model for ADASYN
model_adasyn = create_mlp_model(input_shape_adasyn, num_classes_adasyn)

# Compile the model with Focal Loss
model_adasyn.compile(optimizer='adam',
                     loss=focal_loss(),
                     metrics=['accuracy'])

# Create the MLP model for PCA-KMeans
model_pca_kmeans = create_mlp_model(input_shape_pca_kmeans, num_classes_pca_kmeans)

# Compile the model with Focal Loss
model_pca_kmeans.compile(optimizer='adam',
                         loss=focal_loss(),
                         metrics=['accuracy'])


In [40]:
# Calculate class weights for ADASYN
class_weights_adasyn = {0: num_class_1_oversampled_train / num_class_0_oversampled_train, 1: 1.0}

# Print model summary for ADASYN
model_adasyn.summary()

# Train the model for ADASYN with class weights
history_adasyn = model_adasyn.fit(X_train_oversampled_extreme, y_train_oversampled_one_hot, epochs=50, batch_size=32,
                                  validation_split=0.2, class_weight=class_weights_adasyn)
# Evaluate the model on the test set for ADASYN
test_loss_adasyn, test_acc_adasyn = model_adasyn.evaluate(X_test_oversampled_extreme, y_test_oversampled_one_hot)
print(f'Test Loss (ADASYN): {test_loss_adasyn}, Test Accuracy (ADASYN): {test_acc_adasyn}')

# Predict classes for the test set for ADASYN
y_pred_probabilities_adasyn = model_adasyn.predict(X_test_oversampled_extreme)
y_pred_adasyn = y_pred_probabilities_adasyn.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values for ADASYN
y_true_adasyn = np.array(y_test_oversampled_extreme)

# Calculate precision, recall, and F1-score for ADASYN
report_adasyn = classification_report(y_true_adasyn, y_pred_adasyn)
print("Classification Report (ADASYN):\n", report_adasyn)

# Confusion Matrix for ADASYN
conf_matrix_adasyn = confusion_matrix(y_true_adasyn, y_pred_adasyn)
print("Confusion Matrix (ADASYN):\n", conf_matrix_adasyn)


# Calculate class weights for PCA-KMeans
class_weights_pca_kmeans = {0: num_class_1_pca_kmeans_train / num_class_0_pca_kmeans_train, 1: 1.0}

# Print model summary for PCA-KMeans
model_pca_kmeans.summary()

# Train the model for PCA-KMeans with class weights
history_pca_kmeans = model_pca_kmeans.fit(X_train_pca_kmeans_extreme, y_train_pca_kmeans_one_hot, epochs=50, batch_size=32,
                                          validation_split=0.2, class_weight=class_weights_pca_kmeans)

# Evaluate the model on the test set for PCA-KMeans
test_loss_pca_kmeans, test_acc_pca_kmeans = model_pca_kmeans.evaluate(X_test_pca_kmeans_extreme, y_test_pca_kmeans_one_hot)
print(f'Test Loss (PCA-KMeans): {test_loss_pca_kmeans}, Test Accuracy (PCA-KMeans): {test_acc_pca_kmeans}')

# Predict classes for the test set for PCA-KMeans
y_pred_probabilities_pca_kmeans = model_pca_kmeans.predict(X_test_pca_kmeans_extreme)
y_pred_pca_kmeans = y_pred_probabilities_pca_kmeans.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values for PCA-KMeans
y_true_pca_kmeans = y_test_pca_kmeans_extreme.to_numpy()

# Calculate precision, recall, and F1-score for PCA-KMeans
report_pca_kmeans = classification_report(y_true_pca_kmeans, y_pred_pca_kmeans)
print("Classification Report (PCA-KMeans):\n", report_pca_kmeans)

# Confusion Matrix for PCA-KMeans
conf_matrix_pca_kmeans = confusion_matrix(y_true_pca_kmeans, y_pred_pca_kmeans)
print("Confusion Matrix (PCA-KMeans):\n", conf_matrix_pca_kmeans)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 15)                0         
                                                                 
 dense (Dense)               (None, 128)               2048      
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 2)                 130       
                                                                 
Total params: 10434 (40.76 KB)
Trainable params: 10434 (40.76 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch