In [None]:
import pandas as pd
df = pd.read_excel('Dataset preprocessed.xlsx')
df

In [None]:
from sklearn.model_selection import StratifiedKFold
import statistics
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.over_sampling import SMOTE 
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score

In [None]:
X = df["Mentions"]
y = df["Sentiment"]
kf = StratifiedKFold(n_splits=5, shuffle=True)
oversample = SMOTE(sampling_strategy='minority')

In [None]:
accuracy_list, precision_list, recall_list, f1_list =[], [], [], []

for train_index, test_index in kf.split(X, y):
      X_train, X_test = X[train_index], X[test_index] 
      y_train, y_test = y[train_index], y[test_index]
      
      vect = TfidfVectorizer()

      X_train_dtm = vect.fit_transform(X_train)
      X_over, y_over = oversample.fit_resample(X_train_dtm, y_train)
      X_test_dtm = vect.transform(X_test)

      model = SVC(kernel='linear')
      model.fit(X_over, y_over)
      y_pred_class = model.predict(X_test_dtm)

      accuracy = accuracy_score(y_test, y_pred_class)*100
      precision = precision_score(y_test, y_pred_class, average='weighted')
      recall = recall_score(y_test, y_pred_class, average='weighted')
      f1 = f1_score(y_test, y_pred_class, average='weighted')

      accuracy_list.append(accuracy)
      precision_list.append(precision)
      recall_list.append(recall)
      f1_list.append(f1)
      #print(accuracy)

print("Rata-rata Akurasi =", round(statistics.mean(accuracy_list), 3))
print("Rata-rata Precision =", round(statistics.mean(precision_list), 3))
print("Rata-rata Recall =", round(statistics.mean(recall_list), 3))
print("Rata-rata F1 Score =", round(statistics.mean(f1_list), 3))