In [13]:
# Import library yang diperlukan
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

In [15]:
# 1. Load Model dan Metadata
print("=== Loading Model and Metadata ===")
model = joblib.load('../data/models/optimized_stroke_model.joblib')
metadata = joblib.load('../data/models/model_metadata.joblib')
optimal_threshold = metadata['optimized_performance']['optimal_threshold']

# Load data asli untuk mendapatkan urutan kolom yang benar
original_data = pd.read_csv('../data/processed/stroke_data_final.csv')
EXPECTED_COLUMNS = original_data.drop('stroke', axis=1).columns.tolist()

print("Model dan metadata berhasil dimuat")
print(f"Jumlah fitur yang diharapkan: {len(EXPECTED_COLUMNS)}")
print("Fitur yang diharapkan:", EXPECTED_COLUMNS)

=== Loading Model and Metadata ===
Model dan metadata berhasil dimuat
Jumlah fitur yang diharapkan: 21
Fitur yang diharapkan: ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'Residence_type', 'avg_glucose_level', 'bmi', 'work_type_Govt_job', 'work_type_Never_worked', 'work_type_Private', 'work_type_Self-employed', 'work_type_children', 'smoking_status_Unknown', 'smoking_status_formerly smoked', 'smoking_status_never smoked', 'smoking_status_smokes', 'risk_factors', 'age_health_interaction', 'bmi_glucose_risk', 'age_lifestyle_risk']


In [16]:
# 2. Functions untuk preprocessing
def create_features(data_dict):
    """Membuat fitur tambahan yang diperlukan"""
    # Konversi ke float untuk perhitungan
    for key in ['age', 'bmi', 'avg_glucose_level', 'hypertension', 'heart_disease']:
        data_dict[key] = float(data_dict[key])
    
    # Buat fitur tambahan
    data_dict['age_health_interaction'] = data_dict['age'] * (data_dict['hypertension'] + data_dict['heart_disease'])
    data_dict['bmi_glucose_risk'] = data_dict['bmi'] * data_dict['avg_glucose_level']
    
    # Hitung risk factors
    high_bmi = 1 if data_dict['bmi'] >= 25 else 0
    high_glucose = 1 if data_dict['avg_glucose_level'] >= 200 else 0
    data_dict['risk_factors'] = (data_dict['hypertension'] + 
                                data_dict['heart_disease'] + 
                                high_bmi + high_glucose)
    
    data_dict['age_lifestyle_risk'] = data_dict['age'] * data_dict['risk_factors']
    
    return data_dict

def prepare_input_data(data_dict):
    """Menyiapkan data input sesuai format yang diharapkan model"""
    # Buat fitur tambahan
    data_dict = create_features(data_dict)
    
    # Buat DataFrame dengan satu baris
    df = pd.DataFrame([data_dict])
    
    # Pastikan semua kolom yang diharapkan ada
    for col in EXPECTED_COLUMNS:
        if col not in df.columns:
            df[col] = 0
    
    # Urutkan kolom sesuai dengan urutan training
    df = df[EXPECTED_COLUMNS]
    
    return df

In [17]:
# 3. Function untuk prediksi
def predict_stroke(model, data, threshold):
    """Melakukan prediksi stroke"""
    proba = model.predict_proba(data)[:, 1]
    predictions = (proba >= threshold).astype(int)
    return predictions, proba

In [18]:
# 4. Function untuk testing
def test_single_patient(model, threshold, data_dict):
    """Testing untuk satu pasien"""
    # Preprocessing data
    df = prepare_input_data(data_dict)
    
    # Prediksi
    pred, proba = predict_stroke(model, df, threshold)
    
    # Print hasil
    print("\nHasil Prediksi:")
    print(f"Probabilitas Stroke: {proba[0]:.2%}")
    print(f"Prediksi: {'Risiko Stroke' if pred[0] == 1 else 'Tidak Berisiko Stroke'}")
    
    # Tentukan tingkat kepercayaan
    confidence_margin = abs(proba[0] - 0.5)
    if confidence_margin > 0.3:
        confidence = "Tinggi"
    elif confidence_margin > 0.15:
        confidence = "Sedang"
    else:
        confidence = "Rendah"
    
    print(f"Confidence: {confidence}")
    
    # Print faktor risiko
    print("\nFaktor Risiko:")
    if data_dict['hypertension'] == 1:
        print("- Memiliki hipertensi")
    if data_dict['heart_disease'] == 1:
        print("- Memiliki penyakit jantung")
    if data_dict['bmi'] >= 25:
        print("- BMI tinggi")
    if data_dict['avg_glucose_level'] >= 200:
        print("- Kadar glukosa tinggi")
    
    return pred[0], proba[0]

In [19]:
# 5. Test Cases
print("\n=== Model Testing ===")

# 5.1 Test Case 1: Pasien Berisiko Tinggi
print("\nTest Case 1: Pasien Berisiko Tinggi")
high_risk_patient = {
    'age': 70,
    'hypertension': 1,
    'heart_disease': 1,
    'avg_glucose_level': 200,
    'bmi': 35,
    'gender': 1,
    'ever_married': 1,
    'work_type_Govt_job': 0,
    'work_type_Never_worked': 0,
    'work_type_Private': 1,
    'work_type_Self-employed': 0,
    'work_type_children': 0,
    'Residence_type': 1,
    'smoking_status_Unknown': 0,
    'smoking_status_formerly smoked': 1,
    'smoking_status_never smoked': 0,
    'smoking_status_smokes': 0
}

pred1, prob1 = test_single_patient(model, optimal_threshold, high_risk_patient)

# 5.2 Test Case 2: Pasien Berisiko Rendah
print("\nTest Case 2: Pasien Berisiko Rendah")
low_risk_patient = {
    'age': 30,
    'hypertension': 0,
    'heart_disease': 0,
    'avg_glucose_level': 90,
    'bmi': 22,
    'gender': 0,
    'ever_married': 0,
    'work_type_Govt_job': 0,
    'work_type_Never_worked': 0,
    'work_type_Private': 1,
    'work_type_Self-employed': 0,
    'work_type_children': 0,
    'Residence_type': 0,
    'smoking_status_Unknown': 0,
    'smoking_status_formerly smoked': 0,
    'smoking_status_never smoked': 1,
    'smoking_status_smokes': 0
}

pred2, prob2 = test_single_patient(model, optimal_threshold, low_risk_patient)


=== Model Testing ===

Test Case 1: Pasien Berisiko Tinggi

Hasil Prediksi:
Probabilitas Stroke: 56.00%
Prediksi: Risiko Stroke
Confidence: Rendah

Faktor Risiko:
- Memiliki hipertensi
- Memiliki penyakit jantung
- BMI tinggi
- Kadar glukosa tinggi

Test Case 2: Pasien Berisiko Rendah

Hasil Prediksi:
Probabilitas Stroke: 41.00%
Prediksi: Tidak Berisiko Stroke
Confidence: Rendah

Faktor Risiko:


In [20]:
# 6. Interactive Testing Function
def interactive_test():
    """Function untuk testing interaktif"""
    print("\nInteractive Testing Mode")
    print("Enter patient information:")
    
    patient_data = {}
    
    # Input data dasar
    try:
        patient_data['age'] = float(input("Age (years): "))
        patient_data['gender'] = int(input("Gender (0:Female, 1:Male): "))
        patient_data['hypertension'] = int(input("Hypertension (0:No, 1:Yes): "))
        patient_data['heart_disease'] = int(input("Heart Disease (0:No, 1:Yes): "))
        patient_data['avg_glucose_level'] = float(input("Average Glucose Level (mg/dL): "))
        patient_data['bmi'] = float(input("BMI: "))
        
        # Set nilai default untuk fitur lainnya
        patient_data.update({
            'ever_married': 1,
            'work_type_Govt_job': 0,
            'work_type_Never_worked': 0,
            'work_type_Private': 1,
            'work_type_Self-employed': 0,
            'work_type_children': 0,
            'Residence_type': 0,
            'smoking_status_Unknown': 0,
            'smoking_status_formerly smoked': 0,
            'smoking_status_never smoked': 1,
            'smoking_status_smokes': 0
        })
        
        return test_single_patient(model, optimal_threshold, patient_data)
    
    except ValueError:
        print("Error: Please enter valid numeric values")
        return None

In [22]:
# 7. Main Testing Interface
if __name__ == "__main__":
    print("\n=== Stroke Prediction Testing Interface ===")
    print("1. Run predefined test cases")
    print("2. Interactive testing")
    
    choice = input("\nSelect testing mode (1-2): ")
    
    if choice == '1':
        # Already run above
        pass
    elif choice == '2':
        interactive_test()
    else:
        print("Invalid choice!")

print("\nModel testing completed!")


=== Stroke Prediction Testing Interface ===
1. Run predefined test cases
2. Interactive testing

Interactive Testing Mode
Enter patient information:
Error: Please enter valid numeric values

Model testing completed!
