In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score

In [2]:
# Load the data
data = pd.read_csv("data_traveloka_fix.csv", sep='|')
data.head(15)

Unnamed: 0,label,tweet
0,positive,Beli dear butter only 0 rupiah from #Traveloka...
1,positive,Super deals by @traveloka and they have a lot ...
2,negative,rip traveloka eats
3,negative,"harga shopeefood, grabfood, gofood, dan travel..."
4,negative,Traveloka eats diskonnya dikit doang sekarang ...
5,negative,Traveloka eats paling sadis bakar uangnya tapi...
6,negative,Travelokaeats dan shopeefood udah kembali ke n...
7,negative,"Pada ngerasa gak sih sekarang gofood, grabfood..."
8,negative,Harga ongkir ojol food termahal jatuh kpd @tra...
9,negative,@traveloka ni traveloka gada diskon buat pengg...


In [3]:
# Preprocess the data
cv = CountVectorizer()
X = cv.fit_transform(data["tweet"])
y = data["label"]

# Convert the sparse matrix to a dense matrix
X = X.toarray()

In [22]:
# Create the SVM, Naive Bayes, and Logistic Regression models
svm_model = SVC(probability=True)  # Set probability=True
nb_model = GaussianNB()
lr_model = LogisticRegression()

# Train the models
svm_model.fit(X, y)
nb_model.fit(X, y)
lr_model.fit(X, y)

# Evaluate the models
svm_predictions = svm_model.predict(X)
nb_predictions = nb_model.predict(X)
lr_predictions = lr_model.predict(X)
# y_score = svm_model.predict_proba(X)[:, 1]
# y_score = y_score.astype('float32')

# Calculate average precision score for SVM
if hasattr(svm_model, "predict_proba"):
    svm_scores = svm_model.predict_proba(X)[:, 1]
    svm_scores = svm_scores.astype('float32')
else:
    svm_scores = svm_model.decision_function(X)
    svm_scores = (svm_scores - svm_scores.min()) / (svm_scores.max() - svm_scores.min())

# Calculate average precision score for Naive Bayes
if hasattr(nb_model, "predict_proba"):
    nb_scores = nb_model.predict_proba(X)[:, 1]
    nb_scores = nb_scores.astype('float32')
else:
    nb_scores = nb_model.predict(X)  # Use predicted probabilities instead of predicted labels
    nb_scores = (nb_scores == 'positive').astype('float32')
    
# Calculate average precision score for Logistic Regression
if hasattr(lr_model, "predict_proba"):
    lr_scores = lr_model.predict_proba(X)[:, 1]
    lr_scores = lr_scores.astype('float32')
else:
    lr_scores = (lr_predictions == 'positive').astype('float32')

In [29]:
# Print the results
print("Logistic Regression Precision:", precision_score(y, lr_predictions, pos_label='positive'))
print("Logistic Regression Recall:", recall_score(y, lr_predictions, pos_label='positive'))
print("Logistic Regression F1 Score:", f1_score(y, lr_predictions, pos_label='positive'))
print("Logistic Regression Average Precision Score:", average_precision_score(y, lr_scores, pos_label='positive'))
print("")

print("SVM Precision:", precision_score(y, svm_predictions, pos_label='positive'))
print("SVM Recall:", recall_score(y, svm_predictions, pos_label='positive'))
print("SVM F1 Score:", f1_score(y, svm_predictions, pos_label='positive'))
print("SVM Average Precision Score:", average_precision_score(y, svm_scores, pos_label='positive'))
print("")

print("Naive Bayes Precision:", precision_score(y, nb_predictions, pos_label='positive'))
print("Naive Bayes Recall:", recall_score(y, nb_predictions, pos_label='positive'))
print("Naive Bayes F1 Score:", f1_score(y, nb_predictions, pos_label='positive'))
print("Naive Bayes Average Precision Score:", average_precision_score(y, nb_scores, pos_label='positive'))
print("")

Logistic Regression Precision: 0.9828571428571429
Logistic Regression Recall: 0.9985486211901307
Logistic Regression F1 Score: 0.9906407487401008
Logistic Regression Average Precision Score: 0.999719443937999

SVM Precision: 0.9145527369826435
SVM Recall: 0.9941944847605225
SVM F1 Score: 0.952712100139082
SVM Average Precision Score: 0.9947039946067919

Naive Bayes Precision: 1.0
Naive Bayes Recall: 0.9216255442670537
Naive Bayes F1 Score: 0.959214501510574
Naive Bayes Average Precision Score: 0.9678616352201258

