In [1]:
# SVM (Support Vector Machine)
## indah wulandari 

In [None]:
# Import library yang diperlukan
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Preprocessing dan Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC 
from sklearn.metrics import (
    accuracy_score, 
    classification_report, 
    confusion_matrix,
    roc_curve,
    auc
)


In [None]:
df = pd.read_csv('K02_diabetes.csv')
df

In [None]:
# 2. Exploratory Data Analysis
print("Informasi Dataset:")
print(df.info())

print("\nStatistik Deskriptif:")
print(df.describe())


In [None]:
# Cek missing values
print("\nMissing Values:")
print(df.isnull().sum())


In [None]:
# 3. Preprocessing Data
# Pisahkan fitur dan target
X = df.drop('smoking_history', axis=1)  
y = df['smoking_history']

# Identifikasi kolom kategorikal dan numerik
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(exclude=['object']).columns.tolist()

# Buat pipeline untuk preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),  # Scaling untuk kolom numerik
        ('cat', OneHotEncoder(), categorical_cols)   # One-Hot Encoding untuk kolom kategorikal
    ]
)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42
)

In [None]:
# 4. Membangun Model SVM dengan Pipeline
svm_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(kernel='rbf', probability=True, random_state=42))  # Mengaktifkan probabilitas untuk ROC
])

In [None]:
# Latih model
svm_model.fit(X_train, y_train)

In [None]:
# 5. Prediksi
y_pred = svm_model.predict(X_test)

In [None]:
# 6. Evaluasi Model
print("\nMetrik Evaluasi:")
print("Akurasi:", accuracy_score(y_test, y_pred))
print("\nLaporan Klasifikasi:")
print(classification_report(y_test, y_pred))

In [None]:
# 7. Visualisasi Confusion Matrix
plt.figure(figsize=(8,6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(
    cm, 
    annot=True, 
    fmt='d', 
    cmap='Blues',
    xticklabels=['Non-Smoker', 'Smoker'],  # Sesuaikan label sesuai dengan data Anda
    yticklabels=['Non-Smoker', 'Smoker']
)
plt.title('Confusion Matrix SVM')
plt.xlabel('Prediksi')
plt.ylabel('Aktual')
plt.show()

In [None]:
# 8. Kurva ROC
y_pred_proba = svm_model.predict_proba(X_test)[:, 1]  # Ambil probabilitas untuk kelas positif
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()