# Task 8 of Semeval 2020: Memotion analysis
## Models training and evaluation
This task is divided into 3 subtasks which are detailed below

In [71]:
import pandas as pd
from joblib import dump, load
from sklearn.linear_model import LogisticRegressionCV
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from src.models.ordinal_regression import OrdinalClassifier
from src.utils.files import load_dfs
from src.utils.embeddings import retrieve_all_embeds
from src.utils.reports import generate_report

In [72]:
model_path = "data/models/custom"

In [73]:
def evaluate(clf, embeds, y_train, y_dev, filename, multitask=False):
    res = {}
    pretrained = True if clf is None else False
    for item, (X_train, X_dev, X_test) in embeds.items():
        jobfile = "{}/{}_{}.joblib".format(model_path, filename, "_".join(item.split()))
        print("############### Embeddings: {} ####################".format(item))
        if pretrained:
            clf = load(jobfile)
        else:
            clf.fit(X_train, y_train)
            dump(clf, jobfile) 
        y_pred_dev = clf.predict(X_dev)
        y_pred_test = clf.predict(X_test)
        if not multitask:
            rep = classification_report(y_dev, y_pred_dev)
            print(rep)
        else:
            rep = [classification_report(y_dev[:,col], y_pred_dev[:,col]) for col in range(y_dev.shape[1])]
            cols = ["Humour", "Sarcasm", "Offense", "Motivation"]
            for c, r in list(zip(cols, rep)):
                print("results for class {}:\n{}".format(c, r))
        res[item] = {"pred_cls_dev": y_pred_dev, "report_str": rep, "pred_cls_test": y_pred_test}
        
    return res

In [74]:
df_train, df_dev = load_dfs(["data/train_cleaned_final.csv", "data/dev_cleaned_final.csv"])
embed = retrieve_all_embeds([("data/features/use.pkl.train", "data/features/xception.pkl.train", "data/features/dcca.pkl.train"), 
                             ("data/features/use.pkl.dev","data/features/xception.pkl.dev", "data/features/dcca.pkl.dev"),
                             ("data/features/use.pkl.test", "data/features/xception.pkl.test", "data/features/dcca.pkl.test")])

## Task A: sentiment polarity detection
Classify memes as negative, neutral or positive. More details here: https://competitions.codalab.org/competitions/20629
We compare the results of Ordinal classifier with logistic regression, SVM and random forest.
To investigate how each modality contributes to the detection, we test these models with embeddings of sentences only, 
images only and both concatenated.

In [75]:
y_train_a = df_train["Overall_sentiment"].cat.codes
y_dev_a = df_dev["Overall_sentiment"].cat.codes

In [76]:
y_dev_a

0      2
1      0
2      1
3      2
4      1
      ..
995    2
996    2
997    2
998    2
999    2
Length: 1000, dtype: int8

In [77]:
lr_oc = None
# Uncomment for training the model instead of using the pretrained one
# lr_oc = OrdinalClassifier(LogisticRegressionCV(cv=5, random_state=0, solver="saga", max_iter=10000, n_jobs=6))

res_a_lr = evaluate(lr_oc, embed, y_train_a, y_dev_a, "task_a_lr")

############### Embeddings: image only ####################
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        80
           1       0.00      0.00      0.00       302
           2       0.62      1.00      0.76       618

    accuracy                           0.62      1000
   macro avg       0.21      0.33      0.25      1000
weighted avg       0.38      0.62      0.47      1000

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        80
           1       0.00      0.00      0.00       302
           2       0.62      1.00      0.76       618

    accuracy                           0.62      1000
   macro avg       0.21      0.33      0.25      1000
weighted avg       0.38      0.62      0.47      1000

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [78]:
knn_oc = None
# Uncomment for training the model instead of using the pretrained one
# knn_oc = OrdinalClassifier(KNeighborsClassifier(n_jobs=6))

res_a_knn = evaluate(knn_oc, embed, y_train_a, y_dev_a, "task_a_knn")

############### Embeddings: image only ####################
              precision    recall  f1-score   support

           0       0.04      0.03      0.03        80
           1       0.30      0.30      0.30       302
           2       0.61      0.64      0.63       618

    accuracy                           0.49      1000
   macro avg       0.32      0.32      0.32      1000
weighted avg       0.47      0.49      0.48      1000

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.24      0.16      0.19        80
           1       0.47      0.40      0.43       302
           2       0.70      0.77      0.73       618

    accuracy                           0.61      1000
   macro avg       0.47      0.45      0.45      1000
weighted avg       0.59      0.61      0.60      1000

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

      

In [79]:
gnb_oc = None
# Uncomment for training the model instead of using the pretrained one
# gnb_oc = OrdinalClassifier(GaussianNB())

res_a_gnb = evaluate(gnb_oc, embed, y_train_a, y_dev_a, "task_a_gnb")

############### Embeddings: image only ####################
              precision    recall  f1-score   support

           0       0.08      0.65      0.14        80
           1       0.27      0.15      0.19       302
           2       0.62      0.16      0.26       618

    accuracy                           0.20      1000
   macro avg       0.32      0.32      0.20      1000
weighted avg       0.47      0.20      0.23      1000

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.17      0.14      0.15        80
           1       0.40      0.23      0.29       302
           2       0.65      0.80      0.72       618

    accuracy                           0.58      1000
   macro avg       0.41      0.39      0.39      1000
weighted avg       0.54      0.58      0.54      1000

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

      

In [80]:
abc_oc = None
# Uncomment for training the model instead of using the pretrained one
# abc_oc = OrdinalClassifier(AdaBoostClassifier(n_estimators=100, random_state=0))

res_a_abc = evaluate(abc_oc, embed, y_train_a, y_dev_a, "task_a_abc")

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.11      0.03      0.04        80
           1       0.00      0.00      0.00       302
           2       0.62      0.98      0.76       618

    accuracy                           0.61      1000
   macro avg       0.24      0.34      0.27      1000
weighted avg       0.39      0.61      0.47      1000

############### Embeddings: text only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.57      0.10      0.17        80
           1       0.00      0.00      0.00       302
           2       0.63      1.00      0.77       618

    accuracy                           0.62      1000
   macro avg       0.40      0.37      0.31      1000
weighted avg       0.43      0.62      0.49      1000

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

           0       0.30      0.04      0.07        80
           1       0.00      0.00      0.00       302
           2       0.62      0.99      0.76       618

    accuracy                           0.62      1000
   macro avg       0.31      0.34      0.28      1000
weighted avg       0.41      0.62      0.48      1000

############### Embeddings: concatenated ####################
              precision    recall  f1-score   support

           0       0.36      0.05      0.09        80
         

  _warn_prf(average, modifier, msg_start, len(result))


In [81]:
rf_oc = None
# Uncomment for training the model instead of using the pretrained one
# rf_oc = OrdinalClassifier(RandomForestClassifier(random_state=0, oob_score=True, n_jobs=6))

res_a_rf = evaluate(rf_oc, embed, y_train_a, y_dev_a, "task_a_rf")

############### Embeddings: image only ####################
              precision    recall  f1-score   support

           0       0.07      0.07      0.07        80
           1       0.30      0.25      0.27       302
           2       0.63      0.68      0.65       618

    accuracy                           0.50      1000
   macro avg       0.33      0.33      0.33      1000
weighted avg       0.48      0.50      0.49      1000

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.95      0.76      0.85        80
           1       0.98      0.76      0.86       302
           2       0.87      0.99      0.93       618

    accuracy                           0.90      1000
   macro avg       0.93      0.84      0.88      1000
weighted avg       0.91      0.90      0.90      1000

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

      

In [82]:
mlp_oc = None
# Uncomment for training the model instead of using the pretrained one
# mlp_oc = OrdinalClassifier(MLPClassifier(max_iter=1000))

res_a_mlp = evaluate(mlp_oc, embed, y_train_a, y_dev_a, "task_a_mlp")

############### Embeddings: image only ####################
              precision    recall  f1-score   support

           0       0.09      0.09      0.09        80
           1       0.29      0.27      0.28       302
           2       0.62      0.64      0.63       618

    accuracy                           0.48      1000
   macro avg       0.33      0.33      0.33      1000
weighted avg       0.48      0.48      0.48      1000

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.92      0.76      0.84        80
           1       0.83      0.82      0.82       302
           2       0.90      0.92      0.91       618

    accuracy                           0.88      1000
   macro avg       0.88      0.83      0.86      1000
weighted avg       0.88      0.88      0.88      1000

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

      

  _warn_prf(average, modifier, msg_start, len(result))


## Task B: Multilabel sentiment detection
Classify memes as Humourous, sarcastics, offensive and/or motivationnal. One meme can have multiple sentiments.
More details here: https://competitions.codalab.org/competitions/20629
We compare the results of OneVsRest classifier with logistic regression, SVM and random forest.
To investigate how each modality contributes to the detection, we test these models with embeddings of sentences only, 
images only and both concatenated.

In [83]:
y_train_b = df_train[["Humour_bin", "Sarcasm_bin", "Offense_bin", "Motivation_bin"]].to_numpy().astype(int)
y_dev_b = df_dev[["Humour_bin", "Sarcasm_bin", "Offense_bin", "Motivation_bin"]].to_numpy().astype(int)

In [84]:
y_dev_b

array([[1, 1, 1, 0],
       [1, 1, 0, 1],
       [0, 0, 0, 0],
       ...,
       [1, 1, 1, 0],
       [1, 1, 1, 0],
       [1, 1, 1, 0]])

In [85]:
lr_ovc = None
# Uncomment for training the model instead of using the pretrained one
# lr_ovc = OneVsRestClassifier(LogisticRegressionCV(cv=5, random_state=0, solver="saga", max_iter=10000, n_jobs=6))

res_b_lr = evaluate(lr_ovc, embed, y_train_b, y_dev_b, "task_b_lr")

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      1.00      0.87       773
           1       0.75      1.00      0.86       751
           2       0.60      1.00      0.75       601
           3       0.00      0.00      0.00       366

   micro avg       0.71      0.85      0.77      2491
   macro avg       0.53      0.75      0.62      2491
weighted avg       0.61      0.85      0.71      2491
 samples avg       0.71      0.80      0.72      2491

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.77      1.00      0.87       773
           1       0.75      1.00      0.86       751
           2       0.60      1.00      0.75       601
           3       0.00      0.00      0.00       366

   micro avg       0.71      0.85      0.77      2491
   macro avg       0.53      0.75      0.62      2491
weighted avg       0.61      0.85      0.71      2491
 samples avg      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [86]:
knn_ovc = None
# Uncomment for training the model instead of using the pretrained one
# knn_ovc = OneVsRestClassifier(KNeighborsClassifier(n_jobs=6))

res_b_knn = evaluate(knn_ovc, embed, y_train_b, y_dev_b, "task_b_knn")

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      0.90      0.83       773
           1       0.76      0.93      0.84       751
           2       0.62      0.70      0.66       601
           3       0.32      0.22      0.26       366

   micro avg       0.69      0.76      0.72      2491
   macro avg       0.62      0.69      0.65      2491
weighted avg       0.66      0.76      0.71      2491
 samples avg       0.69      0.71      0.66      2491

############### Embeddings: text only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.80      0.95      0.86       773
           1       0.78      0.96      0.86       751
           2       0.69      0.80      0.74       601
           3       0.56      0.34      0.43       366

   micro avg       0.74      0.83      0.78      2491
   macro avg       0.71      0.76      0.72      2491
weighted avg       0.73      0.83      0.77      2491
 samples avg       0.74      0.78      0.72      2491

############### Embeddings: deep cca ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.80      0.94      0.86       773
           1       0.77      0.94      0.85       751
           2       0.69      0.76      0.72       601
           3       0.58      0.40      0.48       366

   micro avg       0.74      0.82      0.78      2491
   macro avg       0.71      0.76      0.73      2491
weighted avg       0.73      0.82      0.77      2491
 samples avg       0.74      0.77      0.71      2491

############### Embeddings: concatenated ####################
              precision    recall  f1-score   support

           0       0.80      0.95      0.87       773
           1       0.78      0.97      0.87       751
           2       0.69      0.82      0.75       601
           3       0.58      0.34      0.43       366

   micro avg       0.75      0.84      0.79      2491
   macro avg       0.71      0.77      0.73      2491
weighted avg       0.74      0.84      0.78      2491
 samples avg   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [87]:
gnb_ovc = None
# Uncomment for training the model instead of using the pretrained one
# gnb_ovc = OneVsRestClassifier(GaussianNB())

res_b_gnb = evaluate(gnb_ovc, embed, y_train_b, y_dev_b, "task_b_gnb")

############### Embeddings: image only ####################
              precision    recall  f1-score   support

           0       0.79      0.23      0.36       773
           1       0.73      0.24      0.36       751
           2       0.61      0.19      0.29       601
           3       0.38      0.80      0.51       366

   micro avg       0.53      0.31      0.39      2491
   macro avg       0.63      0.37      0.38      2491
weighted avg       0.67      0.31      0.37      2491
 samples avg       0.45      0.28      0.31      2491

############### Embeddings: text only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.79      0.85      0.82       773
           1       0.77      0.80      0.79       751
           2       0.66      0.66      0.66       601
           3       0.44      0.41      0.42       366

   micro avg       0.71      0.73      0.72      2491
   macro avg       0.66      0.68      0.67      2491
weighted avg       0.70      0.73      0.71      2491
 samples avg       0.68      0.68      0.64      2491

############### Embeddings: deep cca ####################
              precision    recall  f1-score   support

           0       0.77      0.87      0.82       773
           1       0.75      0.96      0.84       751
           2       0.57      0.34      0.43       601
           3       0.32      0.17      0.22       366

   micro avg       0.70      0.67      0.68      2491
   macro avg       0.60      0.59      0.58      2491
weighted avg       0.65      0.67      0.64      2491
 samples avg       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [88]:
abc_ovc = None
# Uncomment for training the model instead of using the pretrained one
# abc_ovc = OneVsRestClassifier(AdaBoostClassifier(n_estimators=100, random_state=0))

res_b_abc = evaluate(abc_ovc, embed, y_train_b, y_dev_b, "task_b_abc")

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      0.95      0.85       773
           1       0.75      0.97      0.84       751
           2       0.60      0.80      0.69       601
           3       0.32      0.11      0.17       366

   micro avg       0.70      0.80      0.74      2491
   macro avg       0.61      0.71      0.64      2491
weighted avg       0.66      0.80      0.71      2491
 samples avg       0.70      0.75      0.69      2491

############### Embeddings: text only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.79      0.96      0.87       773
           1       0.77      0.97      0.86       751
           2       0.66      0.87      0.75       601
           3       0.60      0.28      0.39       366

   micro avg       0.74      0.84      0.79      2491
   macro avg       0.71      0.77      0.72      2491
weighted avg       0.73      0.84      0.77      2491
 samples avg       0.73      0.79      0.72      2491

############### Embeddings: deep cca ####################


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.78      0.97      0.86       773
           1       0.76      0.98      0.85       751
           2       0.61      0.89      0.72       601
           3       0.52      0.11      0.18       366

   micro avg       0.71      0.83      0.77      2491
   macro avg       0.67      0.74      0.66      2491
weighted avg       0.69      0.83      0.73      2491
 samples avg       0.71      0.78      0.71      2491

############### Embeddings: concatenated ####################
              precision    recall  f1-score   support

           0       0.78      0.95      0.86       773
           1       0.76      0.96      0.85       751
           2       0.65      0.83      0.72       601
           3       0.49      0.25      0.34       366

   micro avg       0.72      0.82      0.77      2491
   macro avg       0.67      0.75      0.69      2491
weighted avg       0.70      0.82      0.75      2491
 samples avg   

  _warn_prf(average, modifier, msg_start, len(result))


In [89]:
rf_ovc = None
# Uncomment for training the model instead of using the pretrained one
# rf_ovc = OneVsRestClassifier(RandomForestClassifier(random_state=0,oob_score=True, n_jobs=6))

res_b_rf = evaluate(rf_ovc, embed, y_train_b, y_dev_b, "task_b_rf")

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.76      0.80      0.78       773
           1       0.76      0.85      0.80       751
           2       0.61      0.68      0.64       601
           3       0.37      0.30      0.33       366

   micro avg       0.68      0.71      0.69      2491
   macro avg       0.62      0.66      0.64      2491
weighted avg       0.67      0.71      0.69      2491
 samples avg       0.65      0.67      0.61      2491

############### Embeddings: text only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.95      0.99      0.97       773
           1       0.93      0.99      0.96       751
           2       0.87      0.98      0.92       601
           3       0.98      0.80      0.88       366

   micro avg       0.92      0.96      0.94      2491
   macro avg       0.93      0.94      0.93      2491
weighted avg       0.93      0.96      0.94      2491
 samples avg       0.87      0.89      0.87      2491

############### Embeddings: deep cca ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      0.99      0.87       773
           1       0.75      0.99      0.86       751
           2       0.63      0.94      0.75       601
           3       0.58      0.09      0.16       366

   micro avg       0.72      0.85      0.78      2491
   macro avg       0.68      0.75      0.66      2491
weighted avg       0.70      0.85      0.73      2491
 samples avg       0.71      0.80      0.72      2491

############### Embeddings: concatenated ####################
              precision    recall  f1-score   support

           0       0.78      1.00      0.87       773
           1       0.75      1.00      0.86       751
           2       0.63      0.94      0.75       601
           3       0.68      0.08      0.14       366

   micro avg       0.72      0.85      0.78      2491
   macro avg       0.71      0.75      0.66      2491
weighted avg       0.72      0.85      0.73      2491
 samples avg   

  _warn_prf(average, modifier, msg_start, len(result))


In [90]:
mlp_ovc = None
# Uncomment for training the model instead of using the pretrained one
# mlp_ovc = OneVsRestClassifier(MLPClassifier(max_iter=1000))

res_b_mlp = evaluate(mlp_ovc, embed, y_train_b, y_dev_b, "task_b_mlp")

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      0.79      0.78       773
           1       0.76      0.84      0.80       751
           2       0.61      0.70      0.65       601
           3       0.37      0.31      0.33       366

   micro avg       0.68      0.71      0.69      2491
   macro avg       0.63      0.66      0.64      2491
weighted avg       0.67      0.71      0.69      2491
 samples avg       0.65      0.67      0.62      2491

############### Embeddings: text only ####################
              precision    recall  f1-score   support

           0       0.96      0.96      0.96       773
           1       0.93      0.97      0.95       751
           2       0.90      0.91      0.90       601
           3       0.87      0.84      0.86       366

   micro avg       0.92      0.93      0.93      2491
   macro avg       0.91      0.92      0.92      2491
weighted avg       0.92      0.93      0.93      2491
 samples avg      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Task C: Multilabel sentiment intensity detection
Classify the degree of humour, sarcasm, offense and motivation of each meme. 
One meme can have multiple sentiments of different intensities. Each sentiment intensity is ranked from 0 (not at all) 
to 5 (very much).
More details here: https://competitions.codalab.org/competitions/20629
We compare the results of OneVsRest Ordinal classifier with logistic regression, SVM and random forest.
To investigate how each modality contributes to the detection, we test these models with embeddings of sentences only, 
images only and both concatenated.

In [91]:
cols = ["Humour", "Sarcasm", "Offense", "Motivation"]
y_train_c = pd.concat([df_train[name].cat.codes for name in cols], axis=1).to_numpy()
y_dev_c = pd.concat([df_dev[name].cat.codes for name in cols], axis=1).to_numpy()

In [92]:
lr_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# lr_ovc_oc = MultiOutputClassifier(OrdinalClassifier(LogisticRegressionCV(cv=5, random_state=0, solver="saga", max_iter=10000, n_jobs=6)))

res_c_lr = evaluate(lr_ovc_oc, embed, y_train_c, y_dev_c, "task_c_lr", multitask=True)

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       227
           1       0.34      1.00      0.51       343
           2       0.00      0.00      0.00       341
           3       0.00      0.00      0.00        89

    accuracy                           0.34      1000
   macro avg       0.09      0.25      0.13      1000
weighted avg       0.12      0.34      0.18      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       249
           1       0.49      0.99      0.66       491
           2       0.20      0.01      0.02       214
           3       0.00      0.00      0.00        46

    accuracy                           0.49      1000
   macro avg       0.17      0.25      0.17      1000
weighted avg       0.28      0.49      0.33      1000

results for class Offense:
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))


In [93]:
knn_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# knn_ovc_oc = MultiOutputClassifier(OrdinalClassifier(KNeighborsClassifier(n_jobs=6)))

res_c_knn = evaluate(knn_ovc_oc, embed, y_train_c, y_dev_c, "task_c_knn", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.19      0.26      0.22       227
           1       0.33      0.40      0.36       343
           2       0.31      0.22      0.26       341
           3       0.04      0.01      0.02        89

    accuracy                           0.27      1000
   macro avg       0.22      0.22      0.21      1000
weighted avg       0.26      0.27      0.26      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.30      0.29      0.29       249
           1       0.50      0.64      0.56       491
           2       0.17      0.10      0.13       214
           3       0.00      0.00      0.00        46

    accuracy                           0.41      1000
   macro avg       0.24      0.26      0.25      1000
weighted avg       0.36      0.41      0.38      1000

results for class

In [94]:
gnb_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# gnb_ovc_oc = MultiOutputClassifier(OrdinalClassifier(GaussianNB()))

res_c_gnb = evaluate(gnb_ovc_oc, embed, y_train_c, y_dev_c, "task_c_gnb", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.23      0.77      0.36       227
           1       0.38      0.10      0.15       343
           2       0.38      0.11      0.16       341
           3       0.10      0.07      0.08        89

    accuracy                           0.25      1000
   macro avg       0.27      0.26      0.19      1000
weighted avg       0.32      0.25      0.20      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.24      0.71      0.36       249
           1       0.46      0.12      0.19       491
           2       0.22      0.09      0.13       214
           3       0.03      0.02      0.02        46

    accuracy                           0.26      1000
   macro avg       0.24      0.24      0.18      1000
weighted avg       0.33      0.26      0.21      1000

results for class

In [95]:
abc_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# abc_ovc_oc = MultiOutputClassifier(OrdinalClassifier(AdaBoostClassifier(n_estimators=100, random_state=0)))

res_c_abc = evaluate(abc_ovc_oc, embed, y_train_c, y_dev_c, "task_c_abc", multitask=True)

############### Embeddings: image only ####################


  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.23      0.83      0.35       227
           1       0.00      0.00      0.00       343
           2       0.00      0.00      0.00       341
           3       0.12      0.21      0.15        89

    accuracy                           0.21      1000
   macro avg       0.09      0.26      0.13      1000
weighted avg       0.06      0.21      0.09      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.25      0.90      0.39       249
           1       0.00      0.00      0.00       491
           2       0.00      0.00      0.00       214
           3       0.02      0.04      0.03        46

    accuracy                           0.23      1000
   macro avg       0.07      0.23      0.10      1000
weighted avg       0.06      0.23      0.10      1000

results for class Offense:
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.26      0.91      0.40       227
           1       0.00      0.00      0.00       343
           2       0.00      0.00      0.00       341
           3       0.21      0.48      0.30        89

    accuracy                           0.25      1000
   macro avg       0.12      0.35      0.17      1000
weighted avg       0.08      0.25      0.12      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.27      0.96      0.42       249
           1       0.00      0.00      0.00       491
           2       0.00      0.00      0.00       214
           3       0.17      0.43      0.25        46

    accuracy                           0.26      1000
   macro avg       0.11      0.35      0.17      1000
weighted avg       0.07      0.26      0.12      1000

results for class Offense:
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.24      0.93      0.39       227
           1       0.00      0.00      0.00       343
           2       0.00      0.00      0.00       341
           3       0.14      0.20      0.17        89

    accuracy                           0.23      1000
   macro avg       0.10      0.28      0.14      1000
weighted avg       0.07      0.23      0.10      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.25      0.95      0.40       249
           1       0.00      0.00      0.00       491
           2       0.00      0.00      0.00       214
           3       0.10      0.13      0.11        46

    accuracy                           0.24      1000
   macro avg       0.09      0.27      0.13      1000
weighted avg       0.07      0.24      0.10      1000

results for class Offense:
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))


In [96]:
rf_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# rf_ovc_oc = MultiOutputClassifier(OrdinalClassifier(RandomForestClassifier(random_state=0, oob_score=True, n_jobs=6)))

res_c_rf = evaluate(rf_ovc_oc, embed, y_train_c, y_dev_c, "task_c_rf", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.19      0.20      0.20       227
           1       0.36      0.39      0.37       343
           2       0.33      0.30      0.32       341
           3       0.11      0.10      0.11        89

    accuracy                           0.29      1000
   macro avg       0.25      0.25      0.25      1000
weighted avg       0.29      0.29      0.29      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.30      0.23      0.26       249
           1       0.50      0.57      0.53       491
           2       0.20      0.17      0.18       214
           3       0.00      0.00      0.00        46

    accuracy                           0.38      1000
   macro avg       0.25      0.24      0.24      1000
weighted avg       0.36      0.38      0.37      1000

results for class

In [97]:
mlp_ovc_oc = None
# Uncomment for training the model instead of using the pretrained one
# mlp_ovc_oc = MultiOutputClassifier(OrdinalClassifier(MLPClassifier(max_iter=1000)))

res_c_mlp = evaluate(mlp_ovc_oc, embed, y_train_c, y_dev_c, "task_c_mlp", multitask=True)

############### Embeddings: image only ####################
results for class Humour:
              precision    recall  f1-score   support

           0       0.18      0.16      0.17       227
           1       0.34      0.36      0.35       343
           2       0.33      0.33      0.33       341
           3       0.09      0.09      0.09        89

    accuracy                           0.28      1000
   macro avg       0.23      0.24      0.24      1000
weighted avg       0.28      0.28      0.28      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.27      0.18      0.22       249
           1       0.49      0.59      0.54       491
           2       0.19      0.18      0.18       214
           3       0.00      0.00      0.00        46

    accuracy                           0.37      1000
   macro avg       0.24      0.24      0.24      1000
weighted avg       0.35      0.37      0.36      1000

results for class

  _warn_prf(average, modifier, msg_start, len(result))


results for class Humour:
              precision    recall  f1-score   support

           0       0.79      0.60      0.69       227
           1       0.62      0.74      0.68       343
           2       0.67      0.70      0.68       341
           3       0.88      0.64      0.74        89

    accuracy                           0.69      1000
   macro avg       0.74      0.67      0.70      1000
weighted avg       0.70      0.69      0.69      1000

results for class Sarcasm:
              precision    recall  f1-score   support

           0       0.73      0.53      0.62       249
           1       0.70      0.83      0.76       491
           2       0.65      0.64      0.64       214
           3       0.96      0.59      0.73        46

    accuracy                           0.70      1000
   macro avg       0.76      0.65      0.69      1000
weighted avg       0.71      0.70      0.70      1000

results for class Offense:
              precision    recall  f1-score   supp

## Generating report
We take the results of the best classifier for each task, here the random forest and generate a report following the guidelines provided here: https://competitions.codalab.org/competitions/20629#learn_the_details-submission-guidelines

In [63]:
from src.utils.reports import generate_reports

In [65]:
print(generate_reports(res_a_lr, res_b_lr, res_c_lr, "lr"))

1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1200
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_1100
1_11

In [66]:
print(generate_reports(res_a_knn, res_b_knn, res_c_knn, "knn"))

0_0000_0000
1_1110_2100
1_1010_1000
1_1110_0100
-1_1110_1200
0_1110_1110
0_1110_1100
1_1101_0101
1_1000_1000
1_1110_2120
1_1100_2100
-1_1110_0110
1_1111_2101
0_1100_0100
1_1010_0000
1_1110_1010
0_1000_0000
0_1111_1001
0_1110_1110
1_1111_1201
1_1110_0120
1_0000_0000
1_1110_2210
1_1100_2100
1_1111_1111
1_1110_1100
1_0100_0000
0_0111_0111
1_1110_2220
0_1110_1000
1_1110_2110
1_1111_0101
0_1100_0000
0_1110_1210
1_0110_0000
0_0100_0100
1_0111_0101
1_1100_2100
1_1111_1101
1_1001_2001
1_1111_1211
-1_1101_2201
1_1110_2100
1_1111_0211
0_1110_1110
-1_1110_0020
0_1110_0110
0_1110_0110
1_1110_2020
1_1111_1101
0_1111_1111
1_1100_2000
1_1110_1000
1_1111_1211
0_1110_1100
-1_0100_0000
1_1100_1100
1_1100_1100
1_1110_1120
0_1110_0110
0_1111_2101
0_1100_0100
1_1110_1120
0_1110_1100
1_1000_2000
-1_1110_2220
0_1111_2111
0_1110_1110
1_1110_2110
0_1100_2100
0_1110_0000
1_1100_1100
1_1110_2100
1_1110_0110
1_1111_1201
1_1111_2321
1_1110_1100
1_1110_1100
1_1110_3000
1_1110_1100
0_1111_1111
0_1001_0001
1_1111_211

In [67]:
print(generate_reports(res_a_gnb, res_b_gnb, res_c_gnb, "gnb"))

1_1110_0200
0_1111_2211
1_1100_1100
1_1100_1300
1_1110_2210
1_1100_1300
0_1111_2211
1_1100_1100
1_1100_1100
0_0011_2011
1_1100_1100
1_1100_1100
1_1111_2211
1_1100_1100
1_1100_1300
1_1100_1100
0_0110_2110
1_1100_1300
1_1100_1100
1_1110_2210
1_1100_1300
1_1111_2201
-1_1110_2210
0_0111_0211
1_1100_1300
1_1111_2211
1_1100_1300
0_1111_2211
1_1100_1300
1_1100_1100
1_1111_2211
1_1110_2210
-1_1111_2211
1_1100_1100
1_1100_1100
1_1100_1100
1_1100_1100
1_1100_1300
1_1100_1300
1_1100_1100
1_1100_1300
1_1110_1100
1_1100_1100
1_1100_1100
1_1101_1201
1_1100_1300
1_1110_1100
0_0111_0111
1_1100_1100
1_1100_1300
1_1100_1100
1_1100_1300
1_1100_1300
1_1110_1120
1_1100_1100
1_1100_1100
1_1111_2211
1_1100_1300
0_1111_2211
1_1100_1000
1_1110_2100
1_1100_1300
1_1100_1300
1_1100_1200
1_1100_1300
0_1110_2010
1_1100_1100
1_1100_1100
1_1100_1300
1_1100_1300
1_1110_2100
1_1100_1100
0_0110_2110
1_1100_1100
1_1100_1100
1_1100_1100
1_1100_1300
1_1100_1100
1_1100_1100
0_0110_0210
1_1110_1100
1_1100_1300
1_1100_1300
1_

In [68]:
print(generate_reports(res_a_abc, res_b_abc, res_c_abc, "abc"))

1_1110_0000
1_1110_0000
1_1110_0000
1_1100_0000
1_1110_0000
1_1100_0000
1_1110_0000
1_1110_3000
1_1110_0000
1_1110_0000
1_1110_0000
1_1111_0031
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1111_0001
1_1111_0001
1_0111_0001
1_1110_0000
1_1110_3000
1_1110_0300
1_1100_0000
1_1110_0000
1_1110_3000
1_1110_0000
1_0110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1010_0000
1_1110_3000
1_1110_0000
1_1110_3000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0300
1_1100_0000
1_1110_0000
1_1100_0000
1_1110_0000
1_0110_0000
1_1110_3000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_3000
1_1100_0000
1_1111_0001
1_1110_0000
1_1110_0000
1_1110_0000
1_1100_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1100_0000
1_1110_0000
1_1111_0001
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_3000
1_1110_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1100_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_1000_0000
1_1110_0000
1_1110_0000
1_1110_0000
1_11

In [69]:
print(generate_reports(res_a_rf, res_b_rf, res_c_rf, "rf"))

1_1110_0010
1_1111_0001
1_1110_1110
1_1110_1000
1_1110_0120
1_1110_1100
1_1100_1100
1_1110_2100
1_1100_2100
1_1110_0000
1_1110_0100
1_1110_0000
1_1110_2100
1_1110_2100
1_1100_1100
0_1110_2100
1_1100_2200
1_1110_2100
1_1110_1100
1_1110_0100
1_1110_1200
1_1110_1210
1_1110_2100
1_1110_0000
1_1110_1100
1_1110_1110
1_1110_2100
1_1110_0100
1_1110_1100
1_1110_2100
1_1110_3000
1_1110_0100
1_1111_0101
1_1110_1100
1_1110_1100
1_1110_2100
1_1110_1110
1_1110_2110
1_1110_0100
1_1110_2100
1_1110_1100
1_1110_2100
1_1100_0100
1_1110_2100
1_1110_1220
1_1110_1000
1_1110_0110
1_1110_0000
1_1110_2110
1_1110_1100
1_1110_1100
1_1110_1100
1_1110_0110
1_1110_0000
1_1110_1110
1_1110_0100
1_1111_0101
1_1100_1100
1_1110_2110
1_1110_2100
1_1110_2100
1_1110_1100
1_1110_1100
1_1110_2000
1_1110_2100
1_1110_0000
1_1110_2100
1_1110_2110
1_1110_1110
1_1110_1110
1_1110_2110
1_1110_1100
1_1110_0000
1_1110_1110
1_1110_1100
1_1110_1200
1_1110_1100
1_1110_1110
1_1110_1100
1_1010_0000
1_1110_0100
1_1110_1100
1_1110_2100
1_11

In [70]:
print(generate_reports(res_a_mlp, res_b_mlp, res_c_mlp, "mlp"))

1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_1100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_1100
1_1110_0100
1_1100_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_1110_0100
1_11