# Task 8 of Semeval 2020: Memotion analysis
## Models training and evaluation
This task is divided into 3 subtasks which are detailed below

In [None]:
import pandas as pd
from joblib import dump, load
from sklearn.linear_model import LogisticRegressionCV
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from src.models.ordinal_regression import OrdinalClassifier
from src.utils.files import load_dfs
from src.utils.embeddings import retrieve_all_embeds
from src.utils.reports import generate_report

In [None]:
model_path = "data/models/custom"

In [None]:
def evaluate(clf, embeds, y_train, y_dev, filename, multitask=False):
    res = {}
    for item, (X_train, X_dev, X_test) in embeds.items():
        jobfile = "{}/{}_{}.joblib".format(model_path, filename, "_".join(item.split()))
        print("############### Embeddings: {} ####################".format(item))
        if clf is None:
            load(jobfile)
        else:
            clf.fit(X_train, y_train)
            dump(clf, jobfile) 
        y_pred_dev = clf.predict(X_dev)
        y_pred_test = clf.predict(X_test)
        if not multitask:
            rep = classification_report(y_dev, y_pred_dev)
            print(rep)
        else:
            rep = [classification_report(y_dev[:,col], y_pred_dev[:,col]) for col in range(y_dev.shape[1])]
            cols = ["Humour", "Sarcasm", "Offense", "Motivation"]
            for c, r in list(zip(cols, rep)):
                print("results for class {}:\n{}".format(c, r))
        res[item] = {"pred_cls_dev": y_pred_dev, "report_str": rep, "pred_cls_test": y_pred_test}
    return res

In [None]:
df_train, df_dev = load_dfs(["data/train_cleaned_final.csv", "data/dev_cleaned_final.csv"])
embed = retrieve_all_embeds([("data/features/use.pkl.train", "data/features/xception.pkl.train"), 
                             ("data/features/use.pkl.dev","data/features/xception.pkl.dev"),
                             ("data/features/use.pkl.test", "data/features/xception.pkl.test")])

## Task A: sentiment polarity detection
Classify memes as negative, neutral or positive. More details here: https://competitions.codalab.org/competitions/20629
We compare the results of Ordinal classifier with logistic regression, SVM and random forest.
To investigate how each modality contributes to the detection, we test these models with embeddings of sentences only, 
images only and both concatenated.

In [None]:
y_train_a = df_train["Overall_sentiment"].cat.codes
y_dev_a = df_dev["Overall_sentiment"].cat.codes

In [None]:
y_dev_a

In [None]:
lr_oc = None
# Uncomment for training the model instead of using the pretrained one
# lr_oc = OrdinalClassifier(LogisticRegressionCV(cv=5, random_state=0, solver="saga", max_iter=10000, n_jobs=6))

res_a_lr = evaluate(lr_oc, embed, y_train_a, y_dev_a, "task_a_lr")

In [None]:
knn_oc = None
# Uncomment for training the model instead of using the pretrained one
# knn_oc = OrdinalClassifier(KNeighborsClassifier(n_jobs=6))

res_a_knn = evaluate(knn_oc, embed, y_train_a, y_dev_a, "task_a_knn")

In [None]:
gnb_oc = None
# Uncomment for training the model instead of using the pretrained one
# gnb_oc = OrdinalClassifier(GaussianNB())

res_a_gnb = evaluate(gnb_oc, embed, y_train_a, y_dev_a, "task_a_gnb")

In [None]:
abc_oc = None
# Uncomment for training the model instead of using the pretrained one
# abc_oc = OrdinalClassifier(AdaBoostClassifier(n_estimators=100, random_state=0))

res_a_abc = evaluate(abc_oc, embed, y_train_a, y_dev_a, "task_a_abc")

In [None]:
rf_oc = None
# Uncomment for training the model instead of using the pretrained one
# rf_oc = OrdinalClassifier(RandomForestClassifier(random_state=0, oob_score=True, n_jobs=6))

res_a_rf = evaluate(rf_oc, embed, y_train_a, y_dev_a, "task_a_rf")

In [None]:
mlp_oc = None
# Uncomment for training the model instead of using the pretrained one
# mlp_oc = OrdinalClassifier(MLPClassifier(max_iter=1000))

res_a_mlp = evaluate(mlp_oc, embed, y_train_a, y_dev_a, "task_a_mlp")

## Task B: Multilabel sentiment detection
Classify memes as Humourous, sarcastics, offensive and/or motivationnal. One meme can have multiple sentiments.
More details here: https://competitions.codalab.org/competitions/20629
We compare the results of OneVsRest classifier with logistic regression, SVM and random forest.
To investigate how each modality contributes to the detection, we test these models with embeddings of sentences only, 
images only and both concatenated.

In [None]:
y_train_b = df_train[["Humour_bin", "Sarcasm_bin", "Offense_bin", "Motivation_bin"]].to_numpy().astype(int)
y_dev_b = df_dev[["Humour_bin", "Sarcasm_bin", "Offense_bin", "Motivation_bin"]].to_numpy().astype(int)

In [None]:
y_dev_b

In [None]:
lr_ovc = None
# Uncomment for training the model instead of using the pretrained one
# lr_ovc = OneVsRestClassifier(LogisticRegressionCV(cv=5, random_state=0, solver="saga", max_iter=10000, n_jobs=6))

res_b_lr = evaluate(lr_ovc, embed, y_train_b, y_dev_b, "task_b_lr")

In [None]:
knn_ovc = None
# Uncomment for training the model instead of using the pretrained one
# knn_ovc = OneVsRestClassifier(KNeighborsClassifier(n_jobs=6))

res_b_knn = evaluate(knn_ovc, embed, y_train_b, y_dev_b, "task_b_knn")

In [None]:
gnb_ovc = None
# Uncomment for training the model instead of using the pretrained one
# gnb_ovc = OneVsRestClassifier(GaussianNB())

res_b_gnb = evaluate(gnb_ovc, embed, y_train_b, y_dev_b, "task_b_gnb")

In [None]:
abc_ovc = None
# Uncomment for training the model instead of using the pretrained one
# abc_ovc = OneVsRestClassifier(AdaBoostClassifier(n_estimators=100, random_state=0))

res_b_abc = evaluate(abc_ovc, embed, y_train_b, y_dev_b, "task_b_abc")

In [None]:
rf_ovc = None
# Uncomment for training the model instead of using the pretrained one
# rf_ovc = OneVsRestClassifier(RandomForestClassifier(random_state=0,oob_score=True, n_jobs=6))

res_b_rf = evaluate(rf_ovc, embed, y_train_b, y_dev_b, "task_b_rf")

In [None]:
mlp_ovc = None
# Uncomment for training the model instead of using the pretrained one
# mlp_ovc = OneVsRestClassifier(MLPClassifier(max_iter=1000))

res_b_mlp = evaluate(mlp_ovc, embed, y_train_b, y_dev_b, "task_b_mlp")

## Task C: Multilabel sentiment intensity detection
Classify the degree of humour, sarcasm, offense and motivation of each meme. 
One meme can have multiple sentiments of different intensities. Each sentiment intensity is ranked from 0 (not at all) 
to 5 (very much).
More details here: https://competitions.codalab.org/competitions/20629
We compare the results of OneVsRest Ordinal classifier with logistic regression, SVM and random forest.
To investigate how each modality contributes to the detection, we test these models with embeddings of sentences only, 
images only and both concatenated.

In [None]:
cols = ["Humour", "Sarcasm", "Offense", "Motivation"]
y_train_c = pd.concat([df_train[name].cat.codes for name in cols], axis=1).to_numpy()
y_dev_c = pd.concat([df_dev[name].cat.codes for name in cols], axis=1).to_numpy()

In [None]:
def generate_reports(res_a, res_b, res_c, model_name):
    configs = ["text only","image only", "concatenated"]
    for c in configs:
        task_a = res_a_lr[c]["pred_cls_test"]
        task_b = res_b_lr[c]["pred_cls_test"]
        task_c = res_c_lr[c]["pred_cls_test"]
        r = generate_report(task_a, task_b, task_c, zipname="res_{}_{}.zip".format(model_name, "_".join(c.split())))
    return r

In [None]:
lr_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# lr_ovc_oc = MultiOutputClassifier(OrdinalClassifier(LogisticRegressionCV(cv=5, random_state=0, solver="saga", max_iter=10000, n_jobs=6)))

res_c_lr = evaluate(lr_ovc_oc, embed, y_train_c, y_dev_c, "task_c_lr", multitask=True)

In [7]:
knn_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# knn_ovc_oc = MultiOutputClassifier(OrdinalClassifier(KNeighborsClassifier(n_jobs=6)))

res_c_knn = evaluate(knn_ovc_oc, embed, y_train_c, y_dev_c, "task_c_knn", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.19      0.26      0.22       227
           1       0.33      0.40      0.36       343
           2       0.31      0.22      0.26       341
           3       0.04      0.01      0.02        89

    accuracy                           0.27      1000
   macro avg       0.22      0.22      0.21      1000
weighted avg       0.26      0.27      0.26      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.30      0.29      0.29       249
           1       0.50      0.64      0.56       491
           2       0.17      0.10      0.13       214
           3       0.00      0.00      0.00        46

    accuracy                           0.41      1000
   macro avg       0.24      0.26      0.25      1000
weighted avg       0.36      0.41      0.38      1000

results for class

In [9]:
gnb_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# gnb_ovc_oc = MultiOutputClassifier(OrdinalClassifier(GaussianNB()))

res_c_gnb = evaluate(gnb_ovc_oc, embed, y_train_c, y_dev_c, "task_c_gnb", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.23      0.77      0.36       227
           1       0.38      0.10      0.15       343
           2       0.38      0.11      0.16       341
           3       0.10      0.07      0.08        89

    accuracy                           0.25      1000
   macro avg       0.27      0.26      0.19      1000
weighted avg       0.32      0.25      0.20      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.24      0.71      0.36       249
           1       0.46      0.12      0.19       491
           2       0.22      0.09      0.13       214
           3       0.03      0.02      0.02        46

    accuracy                           0.26      1000
   macro avg       0.24      0.24      0.18      1000
weighted avg       0.33      0.26      0.21      1000

results for class

In [10]:
abc_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# abc_ovc_oc = MultiOutputClassifier(OrdinalClassifier(AdaBoostClassifier(n_estimators=100, random_state=0)))

res_c_abc = evaluate(abc_ovc_oc, embed, y_train_c, y_dev_c, "task_c_abc", multitask=True)

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.23      0.83      0.35       227
           1       0.00      0.00      0.00       343
           2       0.00      0.00      0.00       341
           3       0.12      0.21      0.15        89

    accuracy                           0.21      1000
   macro avg       0.09      0.26      0.13      1000
weighted avg       0.06      0.21      0.09      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.25      0.90      0.39       249
           1       0.00      0.00      0.00       491
           2       0.00      0.00      0.00       214
           3       0.02      0.04      0.03        46

    accuracy                           0.23      1000
   macro avg       0.07      0.23      0.10      1000
weighted avg       0.06      0.23      0.10      1000

results for class Offense:
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.26      0.91      0.40       227
           1       0.00      0.00      0.00       343
           2       0.00      0.00      0.00       341
           3       0.21      0.48      0.30        89

    accuracy                           0.25      1000
   macro avg       0.12      0.35      0.17      1000
weighted avg       0.08      0.25      0.12      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.27      0.96      0.42       249
           1       0.00      0.00      0.00       491
           2       0.00      0.00      0.00       214
           3       0.17      0.43      0.25        46

    accuracy                           0.26      1000
   macro avg       0.11      0.35      0.17      1000
weighted avg       0.07      0.26      0.12      1000

results for class Offense:
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
rf_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# rf_ovc_oc = MultiOutputClassifier(OrdinalClassifier(RandomForestClassifier(random_state=0, oob_score=True, n_jobs=6)))

res_c_rf = evaluate(rf_ovc_oc, embed, y_train_c, y_dev_c, "task_c_rf", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.19      0.20      0.20       227
           1       0.36      0.39      0.37       343
           2       0.33      0.30      0.32       341
           3       0.11      0.10      0.11        89

    accuracy                           0.29      1000
   macro avg       0.25      0.25      0.25      1000
weighted avg       0.29      0.29      0.29      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.30      0.23      0.26       249
           1       0.50      0.57      0.53       491
           2       0.20      0.17      0.18       214
           3       0.00      0.00      0.00        46

    accuracy                           0.38      1000
   macro avg       0.25      0.24      0.24      1000
weighted avg       0.36      0.38      0.37      1000

results for class

In [None]:
mlp_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# mlp_ovc_oc = MultiOutputClassifier(OrdinalClassifier(MLPClassifier(max_iter=1000)))

res_c_mlp = evaluate(mlp_ovc_oc, embed, y_train_c, y_dev_c, "task_c_mlp", multitask=True)

############### Embeddings: image only ####################




## Generating report
We take the results of the best classifier for each task, here the random forest and generate a report following the guidelines provided here: https://competitions.codalab.org/competitions/20629#learn_the_details-submission-guidelines

In [None]:
generate_reports(res_a_lr, res_b_lr, res_c_lr, "lr")

In [None]:
generate_reports(res_a_knn, res_b_knn, res_c_knn, "knn")

In [None]:
generate_reports(res_a_gnb, res_b_gnb, res_c_gnb, "gnb")

In [None]:
generate_reports(res_a_abc, res_b_abc, res_c_abc, "abc")

In [None]:
generate_reports(res_a_rf, res_b_rf, res_c_rf, "rf")

In [None]:
generate_reports(res_a_mlp, res_b_mlp, res_c_mlp, "mlp")