In [13]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from build_df import build_df
from sklearn import feature_extraction, model_selection, linear_model, pipeline, metrics, svm, naive_bayes

In [14]:
df = build_df()

X = df['text']
y = df['party_num_label']
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)

In [43]:
# Stochastic gradient descent SVM

svm_model = pipeline.Pipeline([
    ('tfidf', feature_extraction.text.TfidfVectorizer()),
    ('clf', linear_model.SGDClassifier(loss='hinge'))
])

svm_model.fit(X_train, y_train)

y_predict = svm_model.predict(X_test)

accuracy = metrics.accuracy_score(y_predict, y_test)
print(f'SVM accuracy: {accuracy}')
print(metrics.classification_report(y_test, y_predict))

SVM accuracy: 0.7554744525547445
              precision    recall  f1-score   support

           0       0.75      0.76      0.75       136
           1       0.76      0.75      0.76       138

    accuracy                           0.76       274
   macro avg       0.76      0.76      0.76       274
weighted avg       0.76      0.76      0.76       274



In [41]:
# SVC

svc_model = pipeline.Pipeline([
    ('tfidf', feature_extraction.text.TfidfVectorizer()),
    ('clf', svm.LinearSVC())
])

svc_model.fit(X_train, y_train)

y_predict = svc_model.predict(X_test)

accuracy = metrics.accuracy_score(y_predict, y_test)
print(f'SVC accuracy: {accuracy}')
print(metrics.classification_report(y_test, y_predict))

SVC accuracy: 0.7408759124087592
              precision    recall  f1-score   support

           0       0.74      0.74      0.74       136
           1       0.74      0.74      0.74       138

    accuracy                           0.74       274
   macro avg       0.74      0.74      0.74       274
weighted avg       0.74      0.74      0.74       274



In [40]:
# Naive bayes

nb_model = pipeline.Pipeline([
    ('tfidf', feature_extraction.text.TfidfVectorizer()),
    ('clf', naive_bayes.MultinomialNB())
])

nb_model.fit(X_train, y_train)

y_predict = nb_model.predict(X_test)

accuracy = metrics.accuracy_score(y_predict, y_test)
print(f'NB accuracy: {accuracy}')
print(metrics.classification_report(y_test, y_predict))

NB accuracy: 0.6970802919708029
              precision    recall  f1-score   support

           0       0.72      0.64      0.68       136
           1       0.68      0.75      0.71       138

    accuracy                           0.70       274
   macro avg       0.70      0.70      0.70       274
weighted avg       0.70      0.70      0.70       274

