# FastText Pre-Trained Model Testing

In [1]:
# data manipulation
import pandas as pd
from sklearn.model_selection import train_test_split # train-test split

# model
import fasttext

# model metrics
from sklearn.metrics import f1_score, precision_score, recall_score 

In [2]:
# load test datasets
test_filenames = ['data/all_test.csv', 'data/news_test.csv', 'data/reddit_test.csv', 'data/twitter_test.csv']

## Amazon Polarity Review Model

In [3]:
# load model
model_amazon = fasttext.load_model("utils/fasttext/amazon_review_polarity.ftz")



In [4]:
print("amazon reviews fasttext model")
print("----------------------------------------------------------------------")
for i in range(len(test_filenames)):
    print("results for:", test_filenames[i][5:])
    
    test_df = pd.read_csv(test_filenames[i], header=0)
    X_test = test_df["text"]
    y_actual = test_df["label"]

    y_pred = [model_amazon.predict(x.replace("\n", ""))[0][0][-1] for x in X_test]
    y_pred = [1 if (x=='1') else 0 for x in y_pred]

    precision = precision_score(y_actual, y_pred, average="binary", pos_label=1)
    recall = recall_score(y_actual, y_pred, average="binary", pos_label=1)
    f1 = f1_score(y_actual, y_pred, average="binary", pos_label=1)

    print("precision:", precision)
    print("recall:", recall)
    print("f1:", f1)
    print("----------------------------------------------------------------------")

amazon reviews fasttext model
----------------------------------------------------------------------
results for: all_test.csv
precision: 0.43033509700176364
recall: 0.6931818181818182
f1: 0.5310119695321002
----------------------------------------------------------------------
results for: news_test.csv
precision: 0.27235772357723576
recall: 0.5677966101694916
f1: 0.3681318681318681
----------------------------------------------------------------------
results for: reddit_test.csv
precision: 0.5416666666666666
recall: 0.7814207650273224
f1: 0.6398210290827739
----------------------------------------------------------------------
results for: twitter_test.csv
precision: 0.5964912280701754
recall: 0.6666666666666666
f1: 0.6296296296296297
----------------------------------------------------------------------


## Yelp Polarity Review Model 

In [6]:
# load model
model_yelp = fasttext.load_model("utils/fasttext/yelp_review_polarity.ftz")



In [7]:
print("yelp reviews fasttext model")
print("----------------------------------------------------------------------")

for i in range(len(test_filenames)):
    print("results for:", test_filenames[i][5:])
    
    test_df = pd.read_csv(test_filenames[i], header=0)
    X_test = test_df["text"]
    y_actual = test_df["label"]

    y_pred = [model_yelp.predict(x.replace("\n", ""))[0][0][-1] for x in X_test]
    y_pred = [1 if (x=='1') else 0 for x in y_pred]

    precision = precision_score(y_actual, y_pred, average="binary", pos_label=1)
    recall = recall_score(y_actual, y_pred, average="binary", pos_label=1)
    f1 = f1_score(y_actual, y_pred, average="binary", pos_label=1)

    print("precision:", precision)
    print("recall:", recall)
    print("f1:", f1)
    print("----------------------------------------------------------------------")

yelp reviews fasttext model
----------------------------------------------------------------------
results for: all_test.csv
precision: 0.36005089058524176
recall: 0.8039772727272727
f1: 0.4973637961335677
----------------------------------------------------------------------
results for: news_test.csv
precision: 0.2446043165467626
recall: 0.864406779661017
f1: 0.3813084112149533
----------------------------------------------------------------------
results for: reddit_test.csv
precision: 0.4576271186440678
recall: 0.7377049180327869
f1: 0.5648535564853557
----------------------------------------------------------------------
results for: twitter_test.csv
precision: 0.6216216216216216
recall: 0.9019607843137255
f1: 0.736
----------------------------------------------------------------------
