In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
vectorizer = TfidfVectorizer()
svc_classifier = SVC(kernel="rbf", class_weight="balanced")

In [None]:
df = pd.read_csv("../data/App_Training.csv", names=["sno", "id", "text", "lbl"])
Xtrain = vectorizer.fit_transform([x.strip() for x in df["text"]])
ytrain = df["lbl"].to_numpy()

In [None]:
svc_classifier.fit(Xtrain, ytrain)

In [None]:
df = pd.read_csv("../data/App_Test_Labeled.csv", names=["sno", "id", "text", "lbl"])
Xtest = vectorizer.transform([x.strip() for x in df["text"]])
ytest = df["lbl"].to_numpy()

In [None]:
ypred = svc_classifier.predict(Xtest)
print(f"test f1_score (SUG): {f1_score(ytest, ypred, zero_division=0):.4f}\n")
print(classification_report(ytest, ypred, target_names=['Positive', 'Negative'], digits=4))

# Use NER Data and perform Suggestion Classification

In [None]:
vectorizer = TfidfVectorizer()
svc_classifier = SVC(kernel="rbf", class_weight="balanced")

In [None]:
df_ner = pd.read_csv("../data/train_290818.txt", 
                     sep=' ',
                     header=None,
                     names=['a', 'b', 'c'],
                     encoding="utf-8",
                     converters={'a': pd.eval, 
                                 'b': pd.eval})

df_ner['c'] = df_ner['c'].apply(lambda x: 0 if not x else 1)
df_ner['a'] = df_ner['a'].apply(lambda x: ' '.join(x))

Xtrain_ner = vectorizer.fit_transform([x for x in df_ner['a']])
ytrain_ner = df_ner['c'].to_numpy()

In [None]:
svc_classifier.fit(Xtrain_ner, ytrain_ner)

In [None]:
df_ner = pd.read_csv("../data/test_290818.txt", 
                     sep=' ',
                     header=None,
                     names=['a', 'b', 'c'],
                     encoding="utf-8",
                     converters={'a': pd.eval, 
                                 'b': pd.eval})

df_ner['c'] = df_ner['c'].apply(lambda x: 0 if not x else 1)
df_ner['a'] = df_ner['a'].apply(lambda x: ' '.join(x))

Xtest_ner = vectorizer.transform([x for x in df_ner['a']])
ytest_ner = df_ner['c'].to_numpy()

In [None]:
ypred_ner = svc_classifier.predict(Xtest_ner)
print(f"test f1_score (NER): {f1_score(ytest_ner, ypred_ner, zero_division=0):.4f}\n")
print(classification_report(ytest_ner, ypred_ner, target_names=['Positive', 'Negative'], digits=4))