# Impact of image and text features on classification
The trained models consistently showed worse or equivalent results when image or image + text was used for prediciton compared to text only. The aim of this section is to check if image and text are failing on the same predictions (i.e., images features are not helpful for sentiment analysis in memes) or if images can be more helpful than text in some cases (i.e., image and text features are complementary but images are less often used as a strong predictor).

In [1]:
import pandas as pd
import numpy as np
from joblib import dump, load
from src.utils.files import load_dfs, load_clfs
from src.utils.embeddings import retrieve_all_embeds
from src.utils.reports import generate_reports
from src.models.voting import soft_transform, hard_transform

In [2]:
def evaluate(clfs, embeds, y_dev, voting="soft", multitask=False):
    res = {}
    if voting == "soft":
        y_pred_dev = soft_transform(clfs, embed[1])
        y_pred_test = soft_transform(clf, embed[2])
    else:
        y_pred_dev = hard_transform(clfs, embed[1])
        y_pred_test = hard_transform(clf, embed[2])
    if not multitask:
        rep = classification_report(y_dev, y_pred_dev)
        print(rep)
    else:
        rep = [classification_report(y_dev[:,col], y_pred_dev[:,col]) for col in range(y_dev.shape[1])]
        cols = ["Humour", "Sarcasm", "Offense", "Motivation"]
        for c, r in list(zip(cols, rep)):
            print("results for class {}:\n{}".format(c, r))
    res[item] = {"pred_cls_dev": y_pred_dev, "report_str": rep, "pred_cls_test": y_pred_test}
    return res

In [3]:
def evaluate_all_tasks(model_name, embeds, y_devs):
    tasks = ["task_a", "task_b", "task_c"]
    embed_type = ["text_only", "image_only", "concatenated"]
    model_path = "data/models/custom"
    res = []
    
    for i, task in enumerate(tasks):
        clf_names = ["{}/{}_{}_{}.joblib".format(model_path,task, model_name, e) for e in embed_type]
        clfs_task = load_clfs(clf_names)
        multitask = True if i == 2 else False
        res.append(evaluate(clfs_task, embeds, y_dev, multitask=True))
    generate_reports(*res, model_name)
    return res

In [5]:
df_train, df_dev = load_dfs(["data/train_cleaned_final.csv", "data/dev_cleaned_final.csv"])
cols = ["Humour", "Sarcasm", "Offense", "Motivation"]
y_devs = [df_dev["Overall_sentiment"].cat.codes,
          df_dev[["Humour_bin", "Sarcasm_bin", "Offense_bin", "Motivation_bin"]].to_numpy().astype(int),
          pd.concat([df_dev[name].cat.codes for name in cols], axis=1).to_numpy()]
embed = retrieve_all_embeds([("data/features/use.pkl.train", "data/features/xception.pkl.train"), 
                              ("data/features/use.pkl.dev","data/features/xception.pkl.dev"),
                              ("data/features/use.pkl.test", "data/features/xception.pkl.test")])
embed = list(zip(*embed.values()))

In [None]:
evaluate_all_tasks("lr", embed, y_devs)

In [None]:
evaluate_all_tasks("knn", embed, y_devs)

In [None]:
evaluate_all_tasks("gnb", embed, y_devs)

In [None]:
evaluate_all_tasks("abc", embed, y_devs)

In [None]:
evaluate_all_tasks("mlp", embed, y_devs)

In [None]:
evaluate_all_tasks("rf", embed, y_devs)