In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
# === 1. Baca data dari file Excel ===
df = pd.read_excel('data uji pemodelan.xlsx')  # ganti dengan nama file kamu
df.dropna(subset=['stem_clean', 'sentimen pakar', 'sentimen ai'], inplace=True)  # pastikan tidak ada nilai kosong

# === 2. TF-IDF Vectorization ===
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(df['stem_clean'])

# === 3. Analisis Sentimen dengan SVM berdasarkan Label Ahli Bahasa ===
y_ahli = df['sentimen pakar']

# Split data (bisa juga pakai cross-validation)
X_train_ahli, X_test_ahli, y_train_ahli, y_test_ahli = train_test_split(
    X_tfidf, y_ahli, test_size=0.2, random_state=42
)

# Buat dan latih model SVM
svm_ahli = SVC(kernel='linear')  # kernel bisa diubah ke 'rbf', 'poly', dsb.
svm_ahli.fit(X_train_ahli, y_train_ahli)

# Prediksi dan evaluasi
y_pred_ahli = svm_ahli.predict(X_test_ahli)
print("=== Evaluasi Model (Label Ahli) ===")
print("Akurasi:", accuracy_score(y_test_ahli, y_pred_ahli))
print(classification_report(y_test_ahli, y_pred_ahli))

# === 4. Analisis Sentimen dengan SVM berdasarkan Label AI ===
y_ai = df['sentimen ai']

# Split data
X_train_ai, X_test_ai, y_train_ai, y_test_ai = train_test_split(
    X_tfidf, y_ai, test_size=0.2, random_state=42
)

# Buat dan latih model SVM
svm_ai = SVC(kernel='linear')
svm_ai.fit(X_train_ai, y_train_ai)

# Prediksi dan evaluasi
y_pred_ai = svm_ai.predict(X_test_ai)
print("\n=== Evaluasi Model (Label AI) ===")
print("Akurasi:", accuracy_score(y_test_ai, y_pred_ai))
print(classification_report(y_test_ai, y_pred_ai))

=== Evaluasi Model (Label Ahli) ===
Akurasi: 0.75
              precision    recall  f1-score   support

     negatif       0.00      0.00      0.00         5
      netral       1.00      0.09      0.17        22
     positif       0.74      1.00      0.85        73

    accuracy                           0.75       100
   macro avg       0.58      0.36      0.34       100
weighted avg       0.76      0.75      0.66       100


=== Evaluasi Model (Label AI) ===
Akurasi: 0.89
              precision    recall  f1-score   support

      netral       0.89      1.00      0.94        89
     positif       0.00      0.00      0.00        11

    accuracy                           0.89       100
   macro avg       0.45      0.50      0.47       100
weighted avg       0.79      0.89      0.84       100



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
def analisis_confusion_matrix(y_true, y_pred, label_nama=""):
    cm = confusion_matrix(y_true, y_pred, labels=['positif', 'netral', 'negatif'])
    print(f"\n=== Confusion Matrix ({label_nama}) ===")
    print("Label urutan: [positif, netral, negatif]")
    print(cm)

    TP = cm[0, 0]
    TT = cm[1, 1]
    TN = cm[2, 2]

    FNP = cm[1, 0]  # netral diprediksi sebagai positif
    FNN = cm[1, 2]  # netral diprediksi sebagai negatif
    FP_Netral = cm[0, 1]  # positif diprediksi sebagai netral
    FN_Netral = cm[2, 1]  # negatif diprediksi sebagai netral
    FP = cm[2, 0]  # negatif diprediksi sebagai positif
    FN = cm[0, 2]  # positif diprediksi sebagai negatif

    print(f"""
    True Positif (positif → positif): {TP}
    True Netral (netral → netral): {TT}
    True Negatif (negatif → negatif): {TN}

    False Positif (negatif → positif): {FP}
    False Negatif (positif → negatif): {FN}

    False Positif Netral (positif → netral): {FP_Netral}
    False Negatif Netral (negatif → netral): {FN_Netral}

    False Netral Positif (netral → positif): {FNP}
    False Netral Negatif (netral → negatif): {FNN}
    """)

# === 4. SVM Model untuk Label Ahli ===
svm_ahli = SVC(kernel='linear')
svm_ahli.fit(X_tfidf, df['sentimen pakar'])  # latih dengan seluruh data
y_pred_ahli = svm_ahli.predict(X_tfidf)
analisis_confusion_matrix(df['sentimen pakar'], y_pred_ahli, label_nama="sentimen pakar")

# === 5. SVM Model untuk Label AI ===
svm_ai = SVC(kernel='linear')
svm_ai.fit(X_tfidf, df['sentimen ai'])
y_pred_ai = svm_ai.predict(X_tfidf)
analisis_confusion_matrix(df['sentimen ai'], y_pred_ai, label_nama="sentimen ai")


=== Confusion Matrix (sentimen pakar) ===
Label urutan: [positif, netral, negatif]
[[372   0   0]
 [ 55  40   0]
 [  7   0  23]]

    True Positif (positif → positif): 372
    True Netral (netral → netral): 40
    True Negatif (negatif → negatif): 23
    
    False Positif (negatif → positif): 7
    False Negatif (positif → negatif): 0
    
    False Positif Netral (positif → netral): 0
    False Negatif Netral (negatif → netral): 0
    
    False Netral Positif (netral → positif): 55
    False Netral Negatif (netral → negatif): 0
    

=== Confusion Matrix (sentimen ai) ===
Label urutan: [positif, netral, negatif]
[[ 15  34   0]
 [  0 439   0]
 [  0  11   1]]

    True Positif (positif → positif): 15
    True Netral (netral → netral): 439
    True Negatif (negatif → negatif): 1
    
    False Positif (negatif → positif): 0
    False Negatif (positif → negatif): 0
    
    False Positif Netral (positif → netral): 34
    False Negatif Netral (negatif → netral): 11
    
    False Netral