In [1]:
import pandas as pd
import os
import argparse
from torch.optim import AdamW

from torch import nn
import torch
import numpy as np
import logging
import time

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import classification_report

In [2]:
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)
logger = logging.getLogger(__name__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
word_to_idx = {"None" : 0, "Positive" : 1, "Negative" : 2}
data_dir = "data/"
data = pd.read_csv(os.path.join(data_dir, "new_train_QA_M.csv"))
test_data = pd.read_csv(os.path.join(data_dir, "new_test_QA_M.csv"))

test_data["label"] = test_data.label.map(word_to_idx)
data["label"] = data.label.map(word_to_idx)


In [4]:
# Create feature vectors
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)

train_vectors = vectorizer.fit_transform(data['sentence1'])
test_vectors = vectorizer.transform(test_data['sentence1'])

In [5]:
train_vectors.shape

(9144, 1947)

In [6]:
test_vectors.shape

(4652, 1947)

In [7]:
# Perform classification with SVM, kernel=rbf
classifier = svm.SVC(kernel='rbf',probability=True)
t0 = time.time()
classifier.fit(train_vectors, data['label'])
t1 = time.time()
prediction = classifier.predict(test_vectors)
t2 = time.time()
time_train = t1-t0
time_predict = t2-t1
# results
print("Training time: %fs; Prediction time: %fs" % (time_train, time_predict))
report = classification_report(test_data['label'], prediction, output_dict=True)

Training time: 75.103540s; Prediction time: 5.280090s


In [8]:
print('none: ', report['0'])
print('pos: ', report['1'])
print('neg: ', report['2'])

none:  {'precision': 0.9108335753703166, 'recall': 0.9126891734575088, 'f1-score': 0.911760430295101, 'support': 3436}
pos:  {'precision': 0.6247357293868921, 'recall': 0.7296296296296296, 'f1-score': 0.6731207289293849, 'support': 810}
neg:  {'precision': 0.7452471482889734, 'recall': 0.4827586206896552, 'f1-score': 0.5859491778774291, 'support': 406}


In [9]:
prob = classifier.predict_proba(test_vectors)
pre = classifier.predict(test_vectors)

In [10]:
with open(os.path.join('data/', "svm.txt"), "w") as f_test:
  for output_i in range(len(prob)):
    f_test.write(str(pre[output_i]))
    for ou in prob[output_i]:
      f_test.write(" "+str(ou))
    f_test.write("\n")

In [13]:
!python3 evaluation.py --pred_data_dir data/svm.txt

aspect_strict_Acc = 0.6208082545141874
aspect_Macro_F1 = 0.7456001831019412
aspect_Macro_AUC = 0.9098300310949083
sentiment_Acc = 0.8231907894736842
sentiment_Macro_AUC = 0.8645656099754613
