In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron, LogisticRegressionCV
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score
import gensim.downloader as api
from tqdm import tqdm
from nltk import word_tokenize

from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer
import util.helpers as helper

In [2]:
data_path = "data/"

intent = pd.read_csv(data_path + "intent_Tweets.csv", index_col=0)

print("Number of Negative Reasons: ", intent.negativereason.count())
intent.negativereason.value_counts()

Number of Negative Reasons:  9178


Customer Service Issue         2910
Late Flight                    1665
Can't Tell                     1190
Cancelled Flight                847
Lost Luggage                    724
Bad Flight                      580
Flight Booking Problems         529
Flight Attendant Complaints     481
longlines                       178
Damaged Luggage                  74
Name: negativereason, dtype: int64

In [3]:
intent = intent[intent['negativereason'].notna()]
intent.reset_index(drop=True, inplace=True)
num_labels = len(intent.negativereason.unique())

rmap = {reason: i for i, reason in enumerate(intent.negativereason.unique())}
imap = {i: reason for reason, i in rmap.items()}

intent.negativereason = intent.negativereason.apply(lambda x: rmap[x])

In [4]:
vectorizer = TfidfVectorizer(min_df=0.0001, max_df=0.9999, ngram_range=(1,3), max_features=10000)
X = vectorizer.fit_transform(intent.text)

scaler = StandardScaler(with_mean=False)
scaler.fit(X)

xTrain, xTest, yTrain, yTest = train_test_split(X, intent.negativereason, train_size=0.8, stratify=intent.negativereason)

In [5]:
clf = OneVsRestClassifier(Perceptron(n_jobs=-1), n_jobs=-1)
clf.fit(scaler.transform(xTrain), yTrain)
# clf.fit(xTrain, yTrain)

yPred = clf.predict(xTest)

precisions = precision_score(yTest, yPred, average=None)
recalls = recall_score(yTest, yPred, average=None)
f1s = f1_score(yTest, yPred, average=None)

helper.evaluate_score(yTest, yPred)

Accuracy:	 0.5615468409586056
Precision:	 [0.43421053 0.36796537 0.65486726 0.59499264 0.57894737 0.62773723
 0.46153846 0.75757576 0.07692308 0.36363636]
Recall: 	 [0.28448276 0.35714286 0.66666667 0.69415808 0.31132075 0.59310345
 0.3125     0.73964497 0.33333333 0.22222222]
F1 scores:	 [0.34375    0.36247335 0.66071429 0.6407613  0.40490798 0.60992908
 0.37267081 0.74850299 0.125      0.27586207]
Average
	Precision: 0.5671425701633341
	Recall: 0.5615468409586056
	F1: 0.5580196026576756


In [6]:
clf = LogisticRegressionCV(cv=5, max_iter=5000)
clf.fit(xTrain, yTrain)

yPred = clf.predict(xTest)

helper.evaluate_score(yTest, yPred)

Accuracy:	 0.599128540305011
Precision:	 [0.52631579 0.41921397 0.66386555 0.56947891 0.55555556 0.73282443
 0.58       0.8137931  1.         0.66666667]
Recall: 	 [0.25862069 0.40336134 0.71171171 0.78865979 0.28301887 0.66206897
 0.30208333 0.69822485 0.06666667 0.11111111]
F1 scores:	 [0.34682081 0.4111349  0.68695652 0.66138329 0.375      0.69565217
 0.39726027 0.75159236 0.125      0.19047619]
Average
	Precision: 0.6049504216536128
	Recall: 0.599128540305011
	F1: 0.5807574713402027


In [7]:
clf = LinearSVC()
clf.fit(xTrain, yTrain)

yPred = clf.predict(xTest)

helper.evaluate_score(yTest, yPred)

Accuracy:	 0.5996732026143791
Precision:	 [0.48192771 0.42222222 0.65730337 0.59803922 0.49350649 0.71328671
 0.53731343 0.76923077 1.         0.5       ]
Recall: 	 [0.34482759 0.39915966 0.7027027  0.73367698 0.35849057 0.70344828
 0.375      0.71005917 0.2        0.16666667]
F1 scores:	 [0.40201005 0.41036717 0.67924528 0.65895062 0.41530055 0.70833333
 0.44171779 0.73846154 0.33333333 0.25      ]
Average
	Precision: 0.5956719502388577
	Recall: 0.5996732026143791
	F1: 0.5892884478077615


In [8]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", Truncation=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
# model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=num_labels)

In [9]:
data_og = pd.concat([intent.text, intent.negativereason], axis=1)

data = []
for t in data_og.iterrows():
    temp = {}
    temp["text"] = t[1].text
    temp["label"] = t[1].negativereason
    temp.update(tokenizer(t[1].text))
    data.append(temp)

train, test = train_test_split(data, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)

In [None]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=num_labels)

training_args = TrainingArguments(
    output_dir="./intent_results",
    learning_rate=2e-5,
    evaluation_strategy = "steps",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
)

trainer_tuned = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train,
    eval_dataset=val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

In [None]:
best_run = trainer_tuned.hyperparameter_search(n_trials=10, direction="maximize")

In [None]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer_tuned.args, n, v)

trainer_tuned.train()

In [None]:
preds = trainer_tuned.predict(test_dict)

***** Running Prediction *****
  Num examples = 1836
  Batch size = 8
The following columns in the test set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.


In [None]:
yTrue = [t["label"] for t in test_dict]
yPred = [p.argmax() for p in preds[0]]
helper.evaluate_score(yTrue, yPred)

Accuracy:	 0.35185185185185186
Precision:	 [0.         0.         0.33707865 0.35436584 0.         0.
 0.         0.         0.         0.        ]
Recall: 	 [0.         0.         0.27607362 0.95697074 0.         0.
 0.         0.         0.         0.        ]
F1 scores:	 [0.         0.         0.30354132 0.5172093  0.         0.
 0.         0.         0.         0.        ]
Average
	Precision: 0.17199030086783854
	Recall: 0.35185185185185186
	F1: 0.21756703347160183


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
