In [None]:
import glob
import os

import numpy as np
import pandas as pd

# Submission Format

We will use the following to convert csv files to the json format used for the evaluation.

We first transform the test set (validation till test solutions are not released).

For the evaluation soft labels, we use the mean of the annotator aggregations, with possible values (0, 1/3, 2/3, 1).

In [None]:
def soft_dict_t1(stereo):
    return {"Stereotype": stereo, "NoStereotype": 1 - stereo}


def soft_dict_t2(row, stereo="stereotype", imp="implicit"):
    implicit = row[stereo] * row[imp]
    explicit = row[stereo] * (1 - row[imp])
    return {"Implicit": implicit, "Explicit": explicit, "NoStereotype": 1 - row[stereo]}

In [None]:
def test_to_json(test, name="test"):
    test["test_case"] = "DETESTS-Dis"
    # T1 HARD
    df = test.copy()
    df["value"] = np.where(df["stereotype"] == 1, "Stereotype", "NoStereotype")
    df[["test_case", "id", "value"]].to_json(f"data/{name}_t1_hard.json", orient="records", indent=4)

    if name == "test":
        # T1 HARD - detests
        df = test.copy()
        df["value"] = np.where(df["stereotype"] == 1, "Stereotype", "NoStereotype")
        df[df.source == "detests"][["test_case", "id", "value"]].to_json(
            f"data/{name}-detests_t1_hard.json", orient="records", indent=4
        )

        # T1 HARD - stereohoax
        df = test.copy()
        df["value"] = np.where(df["stereotype"] == 1, "Stereotype", "NoStereotype")
        df[df.source == "stereohoax"][["test_case", "id", "value"]].to_json(
            f"data/{name}-stereohoax_t1_hard.json", orient="records", indent=4
        )

    # T2 HARD
    df = test.copy()
    df["value"] = np.select(
        [df["implicit"] == 1, df["stereotype"] == 1], ["Implicit", "Explicit"], default="NoStereotype"
    )
    df[["test_case", "id", "value"]].to_json(f"data/{name}_t2_hard.json", orient="records", indent=4)

    # T1 SOFT
    df = test.copy()
    df["stereotype_soft"] = df[["stereotype_a1", "stereotype_a2", "stereotype_a3"]].mean(axis=1)
    df["value"] = df["stereotype_soft"].apply(soft_dict_t1)
    df[["test_case", "id", "value"]].to_json(f"data/{name}_t1_soft.json", orient="records", indent=4)

    # T2 SOFT
    df = test.copy()
    df["value"] = df.apply(soft_dict_t2, args=("stereotype_soft", "implicit_soft"), axis=1)
    df[["test_case", "id", "value"]].to_json(f"data/{name}_t2_soft.json", orient="records", indent=4)

We create a validation partition. This is just a random partition, without taking into account the thread structure as we did in the train/test partition.

In [None]:
train = pd.read_csv("data/train.csv")
df = train
train = df.sample(frac=0.8, random_state=42)
validation = df.drop(train.index)

train.to_csv("data/train_val.csv", index=False)
validation.to_csv("data/validation.csv", index=False)

In [None]:
val = pd.read_csv("data/validation.csv")
val["stereotype_soft"] = val[["stereotype_a1", "stereotype_a2", "stereotype_a3"]].mean(axis=1)
val["implicit_soft"] = val[["implicit_a1", "implicit_a2", "implicit_a3"]].mean(axis=1)

test_to_json(val, "validation")

In [None]:
test = pd.read_csv("data/test_solutions.csv")
test["stereotype_soft"] = test[["stereotype_a1", "stereotype_a2", "stereotype_a3"]].mean(axis=1)
test["implicit_soft"] = test[["implicit_a1", "implicit_a2", "implicit_a3"]].mean(axis=1)

test_to_json(test, "test")

The baselines may be converted to json as follows.

You can use the same functions for your results.

In [None]:
def json_t1_hard(file):
    df = pd.read_csv(file)
    df["test_case"] = "DETESTS-Dis"
    df["value"] = np.where(df["stereotype"] == 1, "Stereotype", "NoStereotype")
    df[["test_case", "id", "value"]].to_json(file[:-4] + ".json", orient="records", indent=4)


def json_t2_hard(file):
    df = pd.read_csv(file)
    df["test_case"] = "DETESTS-Dis"
    df["value"] = np.select(
        [df["implicit"] == 1, df["stereotype"] == 1], ["Implicit", "Explicit"], default="NoStereotype"
    )
    df[["test_case", "id", "value"]].to_json(file[:-4] + ".json", orient="records", indent=4)


def json_t1_soft(file):
    df = pd.read_csv(file)
    df["test_case"] = "DETESTS-Dis"
    df["value"] = df["stereotype"].apply(soft_dict_t1)
    df[["test_case", "id", "value"]].to_json(file[:-4] + ".json", orient="records", indent=4)


def json_t2_soft(file):
    df = pd.read_csv(file)
    df["test_case"] = "DETESTS-Dis"
    df["value"] = df.apply(soft_dict_t2, axis=1)
    df[["test_case", "id", "value"]].to_json(file[:-4] + ".json", orient="records", indent=4)

In [None]:
for file in glob.glob("baselines/validation/*_t1_hard.csv"):
    json_t1_hard(file)

for file in glob.glob("baselines/validation/*_t2_hard.csv"):
    json_t2_hard(file)

for file in glob.glob("baselines/validation/*_t1_soft.csv"):
    json_t1_soft(file)

for file in glob.glob("baselines/validation/*_t2_soft.csv"):
    json_t2_soft(file)

for file in glob.glob("baselines/test/*_t1_hard.csv"):
    json_t1_hard(file)
    
json_t1_hard("baselines/test/beto_t1_hard_detests.csv")
json_t1_hard("baselines/test/beto_t1_hard_stereohoax.csv")

for file in glob.glob("baselines/test/*_t2_hard.csv"):
    json_t2_hard(file)

for file in glob.glob("baselines/test/*_t1_soft.csv"):
    json_t1_soft(file)

for file in glob.glob("baselines/test/*_t2_soft.csv"):
    json_t2_soft(file)

# Evaluation

In [None]:
from evaluation import evaluate, main

Here we provide an example of the metrics for the 4 tasks

In [None]:
main()

You may try them with the baselines or your own models

In [None]:
pred = "baselines/validation/tfidf_svc_t1_hard.json"
gold = "data/validation_t1_hard.json"
evaluate(pred, gold)

In [None]:
pred = "baselines/validation/tfidf_svc_t2_hard.json"
gold = "data/validation_t2_hard.json"
evaluate(pred, gold)