# Create Figures for Paper

Ce notebook permet de créer les figures du papier.

In [1]:
# ---------------------------- PREPARING NOTEBOOK ---------------------------- #
# Autoreload
%load_ext autoreload
%autoreload 2

# Random seed
import numpy as np
np.random.seed(42)

# External modules
import os
from IPython.display import display, Markdown, Latex, clear_output
from tqdm import notebook as tqdm

# Set global log level
import logging
logging.basicConfig(level=logging.INFO)
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Define PWD as the current git repository
import git
repo = git.Repo('.', search_parent_directories=True)
pwd = repo.working_dir
os.chdir(pwd)

## Loading Data

### Zero-shot

In [2]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd, "results", "topic_classification", "ZeroShot", "flaubert_metrics.csv"
)
flaubert_metrics = pd.read_csv(path)
flaubert_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Travel,0.15625,0.78125,0.96875
1,Culture,0.212121,0.666667,0.757576
2,World,0.4375,0.84375,0.875
3,Science,0.545455,0.818182,0.909091
4,Health,0.545455,0.69697,0.848485
5,Business,0.5625,0.90625,0.9375
6,Politics,0.65625,0.78125,0.78125
7,Food,0.666667,0.878788,0.909091
8,Music,0.757576,0.878788,0.939394
9,Sport,0.787879,0.878788,0.909091


In [3]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "ZeroShot",
    "Flaubert-pretrained_classification_metrics.csv",
)
flaubert_classification_metrics = pd.read_csv(path)
flaubert_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,Flaubert-pretrained,0.557103,0.557103,0.557103,0.557103


In [4]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "ZeroShot",
    "mDeBERTa-v3-base-mnli-xnli_metrics.csv",
)
mDeBERTa_metrics = pd.read_csv(path)
mDeBERTa_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,World,0.125,0.65625,0.84375
1,Science,0.242424,0.484848,0.636364
2,Sport,0.30303,0.787879,0.939394
3,Culture,0.30303,0.727273,0.818182
4,Health,0.393939,0.757576,0.878788
5,Business,0.4375,0.84375,0.96875
6,Travel,0.4375,0.875,0.9375
7,Technology,0.454545,0.666667,0.787879
8,Food,0.575758,0.909091,0.939394
9,Music,0.818182,0.969697,0.969697


In [5]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "ZeroShot",
    "mDeBERTa-v3-base-mnli-xnli_classification_metrics.csv",
)
mDeBERTa_classification_metrics = pd.read_csv(path)
mDeBERTa_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,mDeBERTa-v3-base-mnli-xnli,0.451253,0.451253,0.451253,0.451253


### OpenAi Topic Classifier

In [6]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_davinci-002_metrics.csv",
)
davinci_metrics = pd.read_csv(path)
davinci_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Sport,0.0,0.0,0.060606
1,Politics,0.0,0.0,0.0
2,Music,0.0,1.0,1.0
3,Business,0.0,0.0,1.0
4,Food,0.0,0.0,0.030303
5,Science,0.0,0.0,0.0
6,Health,0.0,0.0,0.030303
7,Culture,0.0,0.0,1.0
8,Travel,0.0,1.0,1.0
9,Technology,0.0,0.0,0.0


In [7]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_davinci-002_classification_metrics.csv",
)
davinci_classification_metrics = pd.read_csv(path)
davinci_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,ZeroShot_davinci-002,0.089136,0.089136,0.089136,0.089136


In [8]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_gpt-3.5-turbo-1106_metrics.csv",
)
gpt_metrics = pd.read_csv(path)
gpt_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,World,0.21875,0.9375,1.0
1,Science,0.333333,0.727273,0.848485
2,Technology,0.424242,0.939394,0.969697
3,Business,0.46875,0.90625,1.0
4,Travel,0.53125,0.84375,0.96875
5,Culture,0.545455,0.878788,0.969697
6,Sport,0.69697,0.969697,0.969697
7,Food,0.69697,0.909091,0.939394
8,Music,0.757576,0.939394,1.0
9,Health,0.787879,0.909091,0.939394


In [9]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "OpenAiTopicClassification",
    "ZeroShot_gpt-3.5-turbo-1106_classification_metrics.csv",
)
gpt_classification_metrics = pd.read_csv(path)
gpt_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,ZeroShot_gpt-3.5-turbo-1106,0.579387,0.579387,0.579387,0.579387


### Flaubert fine-tuned

In [10]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "FlaubertFineTuned",
    "flaubert_fine_tuned_metrics.csv",
)
flaubert_fine_tuned_metrics = pd.read_csv(path)
flaubert_fine_tuned_metrics

Unnamed: 0.1,Unnamed: 0,accuracy@1,accuracy@3,accuracy@5
0,Science,0.454545,0.939394,1.0
1,Politics,0.5,0.75,0.8125
2,Business,0.625,0.90625,0.9375
3,Health,0.636364,0.878788,0.878788
4,Culture,0.757576,0.969697,1.0
5,World,0.8125,0.96875,0.96875
6,Travel,0.8125,0.96875,1.0
7,Sport,0.818182,0.848485,0.939394
8,Technology,0.878788,0.939394,0.939394
9,Music,0.939394,1.0,1.0


In [11]:
import pandas as pd

# Load metrics
path = os.path.join(
    pwd,
    "results",
    "topic_classification",
    "FlaubertFineTuned",
    "flaubert_fine_tuned_classification_metrics.csv",
)
flaubert_fine_tuned_classification_metrics = pd.read_csv(path)
flaubert_fine_tuned_classification_metrics

Unnamed: 0.1,Unnamed: 0,accuracy,precision,recall,f1
0,FlaubertFineTuned,0.743733,0.743733,0.743733,0.743733


## Figures Creation

In [23]:
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

matplotlib.use("pgf")
matplotlib.rcParams.update(
    {
        "pgf.texsystem": "pdflatex",
        "font.family": "serif",
        "font.size": 12,
        "text.usetex": True,
        "pgf.rcfonts": False,
    }
)


metrics = {
    "Flaubert-pretrained": flaubert_metrics.copy(),
    "Davinci-002": davinci_metrics.copy(),
    "GPT-3.5-turbo-1106": gpt_metrics.copy(),
    "mDeBERTa": mDeBERTa_metrics.copy(),
    "Flaubert-fine-tuned": flaubert_fine_tuned_metrics.copy(),
}

# Format all tables the same way
for key in metrics.keys():
    metrics[key].columns = [
        "topic",
        "accuracy@1",
        "accuracy@3",
        "accuracy@5",
    ]

# Sort models by their best accuracy
sorted_keys = sorted(
    metrics.keys(), key=lambda x: metrics[x]["accuracy@1"].iloc[0], reverse=True
)
metrics = {key: metrics[key].reset_index(drop=True) for key in sorted_keys}


# Remove Davinci-002
metrics.pop("Davinci-002")

fig, axs = plt.subplots(2, 2, figsize=(10, 10 / 1.618), sharey=True)
# Make the plot more compact
plt.subplots_adjust(wspace=0.25, hspace=0.5)
FONTSIZE = 8

for i, key in enumerate(metrics.keys()):
    df = metrics[key].copy()

    sns.set_theme(style="whitegrid")

    acc_1 = axs[i // 2, i % 2].barh(
        df.topic,
        df["accuracy@1"],
        label="Top 1 Accuracy",
        height=0.6,
        color="#1f77b4",
    )
    acc_2 = axs[i // 2, i % 2].barh(
        df.topic,
        df["accuracy@3"] - df["accuracy@1"],
        label="Top 3 Accuracy",
        height=0.5,
        left=df["accuracy@1"],
        color="#ff7f0e",
    )
    acc_3 = axs[i // 2, i % 2].barh(
        df.topic,
        df["accuracy@5"] - df["accuracy@3"],
        label="Top 5 Accuracy",
        height=0.4,
        left=df["accuracy@3"],
        color="#2ca02c",
    )

    axs[i // 2, i % 2].set_title(
        f"{key} (Best : {round(df['accuracy@1'].iloc[-1] * 100, 2)}%)",
        fontsize=FONTSIZE + 2,
    )
    axs[i // 2, i % 2].set_xlabel("Accuracy (%)", fontsize=FONTSIZE + 2)
    axs[i // 2, i % 2].set_ylabel("Label", fontsize=FONTSIZE + 2)
    axs[i // 2, i % 2].set_xticks(np.arange(0, 1.1, 0.1))
    axs[i // 2, i % 2].grid(False)
    axs[i // 2, i % 2].get_children()[11].set(
        hatch="//", alpha=0.8, edgecolor="white", height=1
    )
    axs[i // 2, i % 2].get_children()[23].set(
        hatch="//", alpha=0.8, edgecolor="white", height=0.9
    )
    axs[i // 2, i % 2].get_children()[35].set(
        hatch="//", alpha=0.8, edgecolor="white", height=0.8
    )

    # Make xticks integers
    xticks = axs[i // 2, i % 2].get_xticks()
    axs[i // 2, i % 2].set_xticklabels([f"{int(x * 100)}" for x in xticks])

    # Set both axis font size
    axs[i // 2, i % 2].tick_params(axis="both", which="major", labelsize=FONTSIZE)

# Add legend
fig.legend(
    [acc_1, acc_2, acc_3],
    ["Top 1 Accuracy", "Top 3 Accuracy", "Top 5 Accuracy"],
    loc="lower center",
    ncol=3,
    bbox_to_anchor=(0.5, -0.05),
)

# Set inches
fig.set_size_inches(w=10, h=6)

plt.tight_layout()
plt.show()

# Save figure
path = os.path.join(
    pwd,
    "figures",
    "topic_classification",
    f"accuracy@n.pgf",
)
if not os.path.exists(os.path.dirname(path)):
    os.makedirs(os.path.dirname(path))
plt.savefig(path, bbox_inches="tight")

  plt.show()


In [13]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)

metrics = {
    "Flaubert-pretrained": flaubert_classification_metrics.copy(),
    "Davinci-002": davinci_classification_metrics.copy(),
    "GPT-3.5-turbo-1106": gpt_classification_metrics.copy(),
    "mDeBERTa": mDeBERTa_classification_metrics.copy(),
    "Flaubert-fine-tuned": flaubert_fine_tuned_classification_metrics.copy(),
}

# Format all tables the same way
for key in metrics.keys():
    metrics[key].columns = [
        "model",
        "accuracy",
        "f1 (micro)",
        "precision (micro)",
        "recall (micro)",
    ]

# Concatenate all metrics
df = pd.concat(metrics.values(), axis=0)
df.columns = [
    "model",
    "accuracy",
    "f1 (micro)",
    "precision (micro)",
    "recall (micro)",
]
df["model"] = metrics.keys()
df.set_index("model", inplace=True)

# Sort by accuracy
df.sort_values("accuracy", ascending=False, inplace=True)


# Style dataframe
# Bold best results
def highlight_best(x):
    # Create empty dataframe
    df = pd.DataFrame("", index=x.index, columns=x.columns)

    # Bold max of accuracy	f1 (micro)	precision (micro)	recall (micro)
    for metric in ["accuracy", "f1 (micro)", "precision (micro)", "recall (micro)"]:
        df[metric].loc[x[metric].idxmax()] += "font-weight: bold; color: #FF9999;"

    return df


styled_df = (
    df.style.background_gradient(cmap=sns.light_palette("green", as_cmap=True))
    .apply(highlight_best, axis=None)
    .applymap_index(lambda v: "font-weight: bold;", axis="columns")
    .applymap_index(lambda v: "font-weight: bold;", axis="rows")
    .format(decimal=",", thousands=".", precision=2)
)
display(styled_df)
path = os.path.join(
    pwd, "figures", "topic_classification", "classification_metrics.tex"
)
if not os.path.exists(os.path.dirname(path)):
    os.makedirs(os.path.dirname(path))
latex = styled_df.to_latex(
    caption=(f"Metrics for topic classification models."),
    clines="skip-last;data",
    convert_css=True,
    position_float="centering",
    multicol_align="|c|",
    hrules=True,
)

# Add \begin{adjustbox}{center}
latex = latex.replace(
    "\\begin{tabular}", "\\begin{adjustbox}{center}\n\\begin{tabular}"
).replace("\\end{tabular}", "\\end{tabular}\n\\end{adjustbox}")

# Add comment under the table
latex = latex.replace(
    "\\end{adjustbox}",
    "\\end{adjustbox}\n\\begin{minipage}{12cm}\n\\vspace{0.1cm}\n\\hline\n\\vspace{0.1cm}\n\\begin{itemize}\n\\item The best results have been highlighted in bold and light red.\n\\item The color gradient is more intense for the best results.\n\\item The models are sorted by accuracy\n\\end{itemize}\n\\end{minipage}",
)

# Force position of table
latex = latex.replace("\\begin{table}", "\\begin{table}[!h]")

print(latex)

Unnamed: 0_level_0,accuracy,f1 (micro),precision (micro),recall (micro)
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Flaubert-fine-tuned,74,74,74,74
GPT-3.5-turbo-1106,58,58,58,58
Flaubert-pretrained,56,56,56,56
mDeBERTa,45,45,45,45
Davinci-002,9,9,9,9


\begin{table}[!h]
\centering
\caption{Metrics for topic classification models.}
\begin{adjustbox}{center}
\begin{tabular}{lrrrr}
\toprule
 & \bfseries accuracy & \bfseries f1 (micro) & \bfseries precision (micro) & \bfseries recall (micro) \\
model &  &  &  &  \\
\midrule
\bfseries Flaubert-fine-tuned & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,74 & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,74 & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,74 & {\cellcolor[HTML]{008000}} \color[HTML]{F1F1F1} \bfseries \color[HTML]{FF9999} 0,74 \\
\bfseries GPT-3.5-turbo-1106 & {\cellcolor[HTML]{3B9D3B}} \color[HTML]{F1F1F1} 0,58 & {\cellcolor[HTML]{3B9D3B}} \color[HTML]{F1F1F1} 0,58 & {\cellcolor[HTML]{3B9D3B}} \color[HTML]{F1F1F1} 0,58 & {\cellcolor[HTML]{3B9D3B}} \color[HTML]{F1F1F1} 0,58 \\
\bfseries Flaubert-pretrained & {\cellcolor[HTML]{42A042}} \color[HTML]{F1F1F1} 0,56 & {\cellcolor