In [19]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [13]:
df = pd.read_csv('data_tweet_clean.csv')
df.head()

Unnamed: 0,Tweet,Label,clean_tweet
0,RT @spardaxyz: Fadli Zon Minta Mendagri Segera...,Non_HS,fadli zon minta mendagri segera nonaktif ahok ...
1,RT @baguscondromowo: Mereka terus melukai aksi...,Non_HS,mereka terus luka aksi dalam rangka penjara ah...
2,Sylvi: bagaimana gurbernur melakukan kekerasan...,Non_HS,sylvi bagaimana gurbernur laku keras perempuan...
3,"Ahmad Dhani Tak Puas Debat Pilkada, Masalah Ja...",Non_HS,ahmad dhani tak puas debat pilkada masalah jal...
4,RT @lisdaulay28: Waspada KTP palsu.....kawal P...,Non_HS,waspada ktp palsu kawal pilkada


In [14]:
df.shape

(713, 3)

In [16]:
df['Label'].value_counts()

Non_HS    453
HS        260
Name: Label, dtype: int64

# Vectorization

In [17]:
tfidf_vectorizer = TfidfVectorizer()
X  = tfidf_vectorizer.fit_transform(df['clean_tweet']).toarray()

In [21]:
le = LabelEncoder()
y = le.fit_transform(df['Label'])

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [40]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

mlp = MLPClassifier(random_state=1, max_iter=300)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
accs = []
print("Report : \n", classification_report(y_test, y_pred))
print("Accuracy : ",accuracy_score(y_test,y_pred))
accs.append(accuracy_score(y_test, y_pred))

Report : 
               precision    recall  f1-score   support

           0       0.79      0.65      0.71        57
           1       0.79      0.88      0.84        86

    accuracy                           0.79       143
   macro avg       0.79      0.77      0.77       143
weighted avg       0.79      0.79      0.79       143

Accuracy :  0.7902097902097902


In [41]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train,y_train)
y_pred = gnb.predict(X_test)
print("Report : \n", classification_report(y_test, y_pred))
print("Accuracy : ",accuracy_score(y_test,y_pred))
accs.append(accuracy_score(y_test, y_pred))

Report : 
               precision    recall  f1-score   support

           0       0.70      0.65      0.67        57
           1       0.78      0.81      0.80        86

    accuracy                           0.75       143
   macro avg       0.74      0.73      0.73       143
weighted avg       0.75      0.75      0.75       143

Accuracy :  0.7482517482517482


In [42]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print("Report : \n", classification_report(y_test, y_pred))
print("Accuracy : ",accuracy_score(y_test,y_pred))
accs.append(accuracy_score(y_test, y_pred))

Report : 
               precision    recall  f1-score   support

           0       0.79      0.65      0.71        57
           1       0.79      0.88      0.84        86

    accuracy                           0.79       143
   macro avg       0.79      0.77      0.77       143
weighted avg       0.79      0.79      0.79       143

Accuracy :  0.7902097902097902


In [43]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(max_depth=10, random_state=1)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Report : \n", classification_report(y_test, y_pred))
print("Accuracy : ",accuracy_score(y_test,y_pred))
accs.append(accuracy_score(y_test, y_pred))

Report : 
               precision    recall  f1-score   support

           0       0.96      0.46      0.62        57
           1       0.73      0.99      0.84        86

    accuracy                           0.78       143
   macro avg       0.85      0.72      0.73       143
weighted avg       0.82      0.78      0.75       143

Accuracy :  0.7762237762237763


In [44]:
from sklearn.svm import SVC
svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print("Report : \n", classification_report(y_test, y_pred))
print("Accuracy : ",accuracy_score(y_test,y_pred))
accs.append(accuracy_score(y_test, y_pred))

Report : 
               precision    recall  f1-score   support

           0       1.00      0.54      0.70        57
           1       0.77      1.00      0.87        86

    accuracy                           0.82       143
   macro avg       0.88      0.77      0.79       143
weighted avg       0.86      0.82      0.80       143

Accuracy :  0.8181818181818182


In [45]:
# Perbandingan Akurasi
models = ['Multi Layer Perceptron','Naive Bayes','K-Nearest Neighbor','Random Forest', 'Support Vector Machine']
result_df = pd.DataFrame(list(zip(models, accs)), columns =['Model', 'Accuracy']) 
result_df

Unnamed: 0,Model,Accuracy
0,Multi Layer Perceptron,0.79021
1,Naive Bayes,0.748252
2,K-Nearest Neighbor,0.79021
3,Random Forest,0.776224
4,Support Vector Machine,0.818182
