In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, roc_curve, classification_report
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt

# Membaca file CSV
file_path = 'MaterialStrength.csv'
data = pd.read_csv(file_path)

# Menampilkan beberapa baris data untuk memahami strukturnya
print("=== Sample Data ===")
print(data.head())

# Menentukan kolom target (asumsi target adalah kolom terakhir)
target_column = data.columns[-1]

# Mengonversi target menjadi format numerik
data[target_column] = LabelEncoder().fit_transform(data[target_column])

# Membagi fitur dan target
X = data.drop(columns=[target_column])
y = data[target_column]

# Encoding data kategoris (jika ada)
for col in X.select_dtypes(include=['object']).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

# Normalisasi fitur menggunakan StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Membagi data menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Melatih model SVM
svm_model = SVC(probability=True, kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

# Membuat prediksi
y_pred = svm_model.predict(X_test)
y_pred_prob = svm_model.predict_proba(X_test)

# Menghitung metrik evaluasi
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' untuk multiclass
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Menghitung ROC AUC (per kelas dan rata-rata 'macro')
try:
    auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr', average='macro')
except ValueError as e:
    auc = None  # Jika AUC tidak dapat dihitung

# Menampilkan hasil
print("\n=== Evaluation Metrics ===")
print(f"Accuracy : {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall   : {recall:.2f}")
print(f"F1 Score : {f1:.2f}")
if auc is not None:
    print(f"AUC      : {auc:.2f}")
else:
    print("AUC      : Tidak dapat dihitung untuk multiclass ROC AUC tanpa nilai probabilitas yang memadai.")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Membuat kurva ROC hanya jika dataset adalah multiclass
if auc is not None and y_pred_prob.shape[1] == len(np.unique(y)):
    plt.figure(figsize=(8, 6))
    for i in range(len(np.unique(y))):
        fpr, tpr, _ = roc_curve(y_test == i, y_pred_prob[:, i])
        plt.plot(fpr, tpr, label=f"Class {i} (AUC = {roc_auc_score(y_test == i, y_pred_prob[:, i]):.2f})")
    plt.plot([0, 1], [0, 1], color='red', linestyle='--', label='Random Guess')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve - Multiclass')
    plt.legend()
    plt.grid()
    plt.show()
else:
    print("Kurva ROC tidak dapat ditampilkan untuk multiclass tanpa nilai probabilitas per kelas.")


=== Sample Data ===
      x1     x2   x3     x4   x5      x6     x7   x8        x9    x10  \
0  540.0    0.0  0.0  162.0  2.5  1040.0  676.0   28  0.300000  540.0   
1  540.0    0.0  0.0  162.0  2.5  1055.0  676.0   28  0.300000  540.0   
2  332.5  142.5  0.0  228.0  0.0   932.0  594.0  270  0.685714  475.0   
3  332.5  142.5  0.0  228.0  0.0   932.0  594.0  365  0.685714  475.0   
4  198.6  132.4  0.0  192.0  0.0   978.4  825.5  360  0.966767  331.0   

        x11  x12       x13  x14  x15  target_feature  
0  1.538462    1  0.350044  YEs   NO           79.99  
1  1.560651    1  0.452416  yES  nOO           61.89  
2  1.569024    0  6.704743  yEs   NO           40.27  
3  1.569024    0  8.891596  yes  NOO           41.05  
4  1.185221    0  8.126411  YeS   no           44.30  

=== Evaluation Metrics ===
Accuracy : 0.05
Precision: 0.02
Recall   : 0.03
F1 Score : 0.03
AUC      : Tidak dapat dihitung untuk multiclass ROC AUC tanpa nilai probabilitas yang memadai.

Classification Report:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
