In [110]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [111]:
# Focal Loss Function
def focal_loss(gamma=3.0, alpha=0.5):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1.0 - epsilon)
        focal_loss = -y_true * (alpha * tf.math.pow(1 - y_pred, gamma) * tf.math.log(y_pred))
        return tf.reduce_sum(focal_loss, axis=-1)
    return focal_loss_fixed

In [112]:
# Define the MLP model with ReLU activation
def create_mlp_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [113]:
# Load dataset
file_path = 'D:\Tugas Akhir\Stroke\data_oversampled.csv'
df = pd.read_csv(file_path)

# Ganti koma dengan titik dan ubah tipe data ke float32 untuk kolom tertentu
columns_to_convert = ['age', 'avg_glucose_level', 'bmi']
df[columns_to_convert] = df[columns_to_convert].replace(',', '.', regex=True).astype('float32')

# Pisahkan fitur dan target
X = df.drop('stroke', axis=1)
y = df['stroke']


In [114]:
df

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type_Govt_job,work_type_Private,work_type_Self-employed,Residence_type,avg_glucose_level,bmi,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,stroke
0,0,0.7368,0,1,1,0,1,0,0,0.8012,0.5227,0,1,0,0,1
1,0,0.9649,0,1,1,0,1,0,1,0.2342,0.4380,0,0,1,0,1
2,1,0.4211,0,0,1,0,1,0,0,0.5358,0.4773,0,0,0,1,1
3,1,0.9474,1,0,1,0,0,1,1,0.5491,0.2624,0,0,1,0,1
4,0,0.9825,0,0,1,0,1,0,0,0.6050,0.3657,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5766,1,0.8804,0,0,1,0,1,0,1,0.6311,0.3073,1,0,0,0,1
5767,0,0.8583,0,0,1,0,1,0,1,0.1403,0.2548,1,0,0,0,1
5768,1,0.9298,0,0,1,0,1,0,0,0.0904,0.1904,1,0,0,0,1
5769,1,0.9009,0,0,1,0,1,0,1,0.2097,0.2386,1,0,0,0,1


In [115]:
# Hitung frekuensi nilai unik dalam kolom 'stroke'
stroke_counts = df['stroke'].value_counts()
# Tampilkan output
print("Frekuensi nilai unik dalam kolom 'stroke':")
print(stroke_counts)

Frekuensi nilai unik dalam kolom 'stroke':
stroke
0    3481
1    2290
Name: count, dtype: int64


In [116]:
# Pisahkan data menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [117]:
# Hitung frekuensi nilai unik dalam kolom 'stroke' pada data pelatihan
stroke_counts_train = y_train.value_counts()
# Tampilkan output
print("Frekuensi nilai unik dalam kolom 'stroke' pada data pelatihan:")
print(stroke_counts_train)

Frekuensi nilai unik dalam kolom 'stroke' pada data pelatihan:
stroke
0    2788
1    1828
Name: count, dtype: int64


In [118]:
# Specify input shape and number of classes based on the features and target
input_shape = X_train.shape[1:]
num_classes = 2 
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [119]:
# Pastikan tipe data float32 untuk input
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')


In [120]:
# Create the MLP model
model = create_mlp_model(input_shape, num_classes)

# Compile the model with Focal Loss
model.compile(optimizer='adam',
              loss=focal_loss(),
              metrics=['accuracy'])

In [121]:
# Print model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=60, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_acc}')

# Predict classes for the test set
y_pred_probabilities = model.predict(X_test)
y_pred = y_pred_probabilities.argmax(axis=1)

# Convert one-hot encoded y_test back to numerical values
y_true = y_test.argmax(axis=1)

# Calculate precision, recall, and F1-score
report = classification_report(y_true, y_pred)
print("Classification Report:\n", report)

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_9 (Flatten)         (None, 15)                0         
                                                                 
 dense_27 (Dense)            (None, 128)               2048      
                                                                 
 dense_28 (Dense)            (None, 64)                8256      
                                                                 
 dense_29 (Dense)            (None, 2)                 130       
                                                                 
Total params: 10434 (40.76 KB)
Trainable params: 10434 (40.76 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Test Loss: 0.02673451229929924, Test Accuracy: 0.8580086827278137
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.83      0.87       693
           1       0.78      0.90      0.84       462

    accuracy                       