In [42]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [43]:
# Focal Loss Function
def focal_loss(gamma=3.0, alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon)
        focal_loss = -y_true * (alpha * tf.math.pow(1 - y_pred, gamma) * tf.math.log(y_pred))
        return tf.reduce_sum(focal_loss, axis=-1)
    return focal_loss_fixed

In [44]:
# Define the MLP model with ReLU activation
def create_mlp_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [45]:
num_classes = 2
# Load dataset hasil oversampling ADASYN
file_path_oversampled = 'D:\Tugas Akhir\Stroke\data_oversampled.csv'
df_oversampled = pd.read_csv(file_path_oversampled)

# Ganti koma dengan titik dan ubah tipe data ke float32 untuk kolom tertentu
columns_to_convert = ['age', 'avg_glucose_level', 'bmi']
df_oversampled[columns_to_convert] = df_oversampled[columns_to_convert].replace(',', '.', regex=True).astype('float32')

# Pisahkan fitur dan target untuk dataset hasil oversampling ADASYN
X_oversampled = df_oversampled.drop('stroke', axis=1)
y_oversampled = df_oversampled['stroke']

# Mengonversi target menjadi one-hot encoding untuk ADASYN
y_oversampled_one_hot = tf.keras.utils.to_categorical(y_oversampled, num_classes)

# Pastikan tipe data float32 untuk fitur
X_oversampled = X_oversampled.astype('float32')


# Load dataset hasil PCA-KMeans
file_path_pca_kmeans = 'D:/Tugas Akhir/Stroke/data_hasil_nearmiss.csv'
df_pca_kmeans = pd.read_csv(file_path_pca_kmeans)

# Pisahkan fitur dan target untuk dataset hasil PCA-KMeans
X_pca_kmeans = df_pca_kmeans.drop(['stroke', 'Cluster'], axis=1)
y_pca_kmeans = df_pca_kmeans['stroke']

# Mengonversi target menjadi one-hot encoding untuk PCA-KMeans
y_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_pca_kmeans, num_classes)

# Pastikan tipe data float32 untuk fitur
X_pca_kmeans = X_pca_kmeans.astype('float32')

In [46]:
df_oversampled

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type_Govt_job,work_type_Private,work_type_Self-employed,Residence_type,avg_glucose_level,bmi,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,stroke
0,0,0.736800,0,1,1,0,1,0,0,0.801200,0.522700,0,1,0,0,1
1,0,0.964900,0,1,1,0,1,0,1,0.234200,0.438000,0,0,1,0,1
2,1,0.421100,0,0,1,0,1,0,0,0.535800,0.477300,0,0,0,1,1
3,1,0.947400,1,0,1,0,0,1,1,0.549100,0.262400,0,0,1,0,1
4,0,0.982500,0,0,1,0,1,0,0,0.605000,0.365700,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5766,1,0.880409,0,0,1,0,1,0,1,0.631082,0.307279,1,0,0,0,1
5767,0,0.858301,0,0,1,0,1,0,1,0.140335,0.254772,1,0,0,0,1
5768,1,0.929800,0,0,1,0,1,0,0,0.090441,0.190357,1,0,0,0,1
5769,1,0.900879,0,0,1,0,1,0,1,0.209695,0.238608,1,0,0,0,1


In [47]:
df_pca_kmeans

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,stroke,Cluster
0,0.372886,0.609620,1.908025,-0.031899,-0.089738,0.583286,-0.888792,0,2
1,0.447158,0.626318,1.922847,-0.082947,-0.141569,0.562977,-1.051663,0,1
2,0.357072,0.680069,1.970703,0.114688,0.049290,0.737212,-0.445223,0,0
3,0.527247,0.503328,2.196309,0.137498,-0.323546,0.412172,-0.677501,0,5
4,0.351856,0.397202,2.244470,-0.025082,-0.096812,0.418106,-0.776446,0,2
...,...,...,...,...,...,...,...,...,...
523,0.345868,0.990180,2.426873,0.501795,0.661659,1.428750,0.694201,1,0
524,2.518617,3.816928,2.039622,1.036978,0.859396,0.882988,0.387169,1,3
525,-0.872821,1.118255,-0.778655,-1.291987,0.286761,1.786304,0.021523,1,1
526,2.387014,0.075662,-0.949177,-1.419237,0.255891,-2.077057,0.444078,1,3


In [48]:
# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df_oversampled['stroke'].value_counts()
# Tampilkan output
print("Data Pada ADASYN 'stroke':")
print(stroke_counts)

# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df_pca_kmeans['stroke'].value_counts()
# Tampilkan output
print("Data Pada PCA-KMeans 'stroke':")
print(stroke_counts)

Data Pada ADASYN 'stroke':
stroke
0    3481
1    2290
Name: count, dtype: int64
Data Pada PCA-KMeans 'stroke':
stroke
0    320
1    208
Name: count, dtype: int64


In [49]:
# Pisahkan data ADASYN menjadi set pelatihan dan pengujian
X_train_oversampled, X_test_oversampled, y_train_oversampled, y_test_oversampled = train_test_split(X_oversampled, y_oversampled, test_size=0.2, random_state=42)

# Pisahkan data PCA-KMeans menjadi set pelatihan dan pengujian
X_train_pca_kmeans, X_test_pca_kmeans, y_train_pca_kmeans, y_test_pca_kmeans = train_test_split(X_pca_kmeans, y_pca_kmeans, test_size=0.2, random_state=42)


In [50]:
import numpy as np

# Menghitung jumlah kelas 0 dan 1 pada ADASYN
unique_oversampled, counts_oversampled = np.unique(y_train_oversampled, return_counts=True)
num_class_0_oversampled = counts_oversampled[unique_oversampled == 0][0]
num_class_1_oversampled = counts_oversampled[unique_oversampled == 1][0]

print(f"Jumlah data latih ADASYN: {len(y_train_oversampled)}")
print(f"Jumlah kelas 0 ADASYN: {num_class_0_oversampled}")
print(f"Jumlah kelas 1 ADASYN: {num_class_1_oversampled}")
print("===========================================")
# Menghitung jumlah kelas 0 dan 1 pada PCA-KMeans
unique_pca_kmeans, counts_pca_kmeans = np.unique(y_train_pca_kmeans, return_counts=True)
num_class_0_pca_kmeans = counts_pca_kmeans[unique_pca_kmeans == 0][0]
num_class_1_pca_kmeans = counts_pca_kmeans[unique_pca_kmeans == 1][0]

print(f"Jumlah data latih PCA-KMeans: {len(y_train_pca_kmeans)}")
print(f"Jumlah kelas 0 PCA-KMeans: {num_class_0_pca_kmeans}")
print(f"Jumlah kelas 1 PCA-KMeans: {num_class_1_pca_kmeans}")


Jumlah data latih ADASYN: 4616
Jumlah kelas 0 ADASYN: 2788
Jumlah kelas 1 ADASYN: 1828
Jumlah data latih PCA-KMeans: 422
Jumlah kelas 0 PCA-KMeans: 253
Jumlah kelas 1 PCA-KMeans: 169


In [51]:
# Menetapkan bentuk input dan jumlah kelas berdasarkan fitur dan target untuk ADASYN
input_shape_adasyn = X_train_oversampled.shape[1:]
num_classes_adasyn = y_oversampled_one_hot.shape[1]  # Disesuaikan untuk mendapatkan jumlah kelas secara dinamis
y_train_oversampled_one_hot = tf.keras.utils.to_categorical(y_train_oversampled, num_classes_adasyn)
y_test_oversampled_one_hot = tf.keras.utils.to_categorical(y_test_oversampled, num_classes_adasyn)

# Menetapkan bentuk input dan jumlah kelas berdasarkan fitur dan target untuk PCA-KMeans
input_shape_pca_kmeans = X_train_pca_kmeans.shape[1:]
num_classes_pca_kmeans = y_pca_kmeans_one_hot.shape[1]  # Disesuaikan untuk mendapatkan jumlah kelas secara dinamis
y_train_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_train_pca_kmeans, num_classes_pca_kmeans)
y_test_pca_kmeans_one_hot = tf.keras.utils.to_categorical(y_test_pca_kmeans, num_classes_pca_kmeans)

In [52]:
# Create the MLP model for ADASYN
model_adasyn = create_mlp_model(input_shape_adasyn, num_classes_adasyn)

# Compile the model with Focal Loss
model_adasyn.compile(optimizer='adam',
                     loss=focal_loss(),
                     metrics=['accuracy'])

# Create the MLP model for PCA-KMeans
model_pca_kmeans = create_mlp_model(input_shape_pca_kmeans, num_classes_pca_kmeans)

# Compile the model with Focal Loss
model_pca_kmeans.compile(optimizer='adam',
                         loss=focal_loss(),
                         metrics=['accuracy'])



In [53]:
# Calculate class weights for ADASYN
class_weights_adasyn = {0: num_class_1_oversampled / num_class_0_oversampled, 1: 1.0}

# Print model summary for ADASYN
model_adasyn.summary()

# Train the model for ADASYN with class weights
history_adasyn = model_adasyn.fit(X_train_oversampled, y_train_oversampled_one_hot, epochs=60, batch_size=32,
                                  validation_split=0.2, class_weight=class_weights_adasyn)
# Evaluate the model on the test set for ADASYN
test_loss_adasyn, test_acc_adasyn = model_adasyn.evaluate(X_test_oversampled, y_test_oversampled_one_hot)
print(f'Test Loss (ADASYN): {test_loss_adasyn}, Test Accuracy (ADASYN): {test_acc_adasyn}')

# Predict classes for the test set for ADASYN
y_pred_probabilities_adasyn = model_adasyn.predict(X_test_oversampled)
y_pred_adasyn = y_pred_probabilities_adasyn.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values for ADASYN
y_true_adasyn = np.array(y_test_oversampled)

# Calculate precision, recall, and F1-score for ADASYN
report_adasyn = classification_report(y_true_adasyn, y_pred_adasyn)
print("Classification Report (ADASYN):\n", report_adasyn)

# Confusion Matrix for ADASYN
conf_matrix_adasyn = confusion_matrix(y_true_adasyn, y_pred_adasyn)
print("Confusion Matrix (ADASYN):\n", conf_matrix_adasyn)


# Calculate class weights for PCA-KMeans
class_weights_pca_kmeans = {0: num_class_1_pca_kmeans / num_class_0_pca_kmeans, 1: 1.0}

# Print model summary for PCA-KMeans
model_pca_kmeans.summary()

# Train the model for PCA-KMeans with class weights
history_pca_kmeans = model_pca_kmeans.fit(X_train_pca_kmeans, y_train_pca_kmeans_one_hot, epochs=60, batch_size=32,
                                          validation_split=0.2, class_weight=class_weights_pca_kmeans)

# Evaluate the model on the test set for PCA-KMeans
test_loss_pca_kmeans, test_acc_pca_kmeans = model_pca_kmeans.evaluate(X_test_pca_kmeans, y_test_pca_kmeans_one_hot)
print(f'Test Loss (PCA-KMeans): {test_loss_pca_kmeans}, Test Accuracy (PCA-KMeans): {test_acc_pca_kmeans}')

# Predict classes for the test set for PCA-KMeans
y_pred_probabilities_pca_kmeans = model_pca_kmeans.predict(X_test_pca_kmeans)
y_pred_pca_kmeans = y_pred_probabilities_pca_kmeans.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values for PCA-KMeans
y_true_pca_kmeans = y_test_pca_kmeans.to_numpy()

# Calculate precision, recall, and F1-score for PCA-KMeans
report_pca_kmeans = classification_report(y_true_pca_kmeans, y_pred_pca_kmeans)
print("Classification Report (PCA-KMeans):\n", report_pca_kmeans)

# Confusion Matrix for PCA-KMeans
conf_matrix_pca_kmeans = confusion_matrix(y_true_pca_kmeans, y_pred_pca_kmeans)
print("Confusion Matrix (PCA-KMeans):\n", conf_matrix_pca_kmeans)



Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 15)                0         
                                                                 
 dense_12 (Dense)            (None, 128)               2048      
                                                                 
 dense_13 (Dense)            (None, 64)                8256      
                                                                 
 dense_14 (Dense)            (None, 2)                 130       
                                                                 
Total params: 10434 (40.76 KB)
Trainable params: 10434 (40.76 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epo