In [None]:
!pip install mlflow dagshub --quiet

In [None]:
import os
import mlflow
from mlflow.tracking import MlflowClient
import dagshub
from google.colab import userdata
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
import matplotlib.font_manager as fm

In [None]:
font_path = os.path.expanduser("/content/drive/MyDrive/Colab Notebooks/fonts/Exo2-VariableFont_wght.ttf")  # Remplacez par le chemin exact
fm.fontManager.addfont(font_path)

# Définir la police globale avec le nom de la police
rcParams["font.family"] = "Exo 2"
# deux couleurs pertinentes pour aller avec la présentation
bleuclair = (0.15, 0.55, 0.82)
couleur_complementaire = (1 - bleuclair[0], 1 - bleuclair[1], 1 - bleuclair[2])
bleufonce = "#073642"

imgPrezPath = '/content/drive/MyDrive/Colab Notebooks/Projet_07/presentationimg'

In [None]:
# Récupère automatiquement le secret
dagshub_token = userdata.get('DAGSHUB_TOKEN')

# Initialisation Dagshub
dagshub.auth.add_app_token(dagshub_token)

# Connecter MLflow à Dagshub
dagshub.init(repo_owner='fabiencappelli', repo_name='Projet_07', mlflow=True)

# Configure MLflow pour pointer vers Dagshub
mlflow.set_tracking_uri('https://dagshub.com/fabiencappelli/Projet_07.mlflow')

In [None]:
client = MlflowClient()

In [None]:
experiments = client.search_experiments()

In [None]:
def get_artifact_size_bytes(run_id, path="model"):
    total = 0
    for info in client.list_artifacts(run_id, path=path):
        if info.is_dir:
            total += get_artifact_size_bytes(run_id, info.path)
        else:
            total += info.file_size or 0
    return total

In [None]:
for exp in experiments:
    print(f"- {exp.name}  (ID: {exp.experiment_id})")

In [None]:
rows = []
for exp in experiments:
    # On prend le dernier run terminé avec succès
    runs = client.search_runs(
        experiment_ids=[exp.experiment_id],
        filter_string="attributes.status = 'FINISHED'",
        order_by=["attributes.end_time DESC"],
        max_results=1
    )
    if not runs:
        continue
    run = runs[0]

    data = run.data.metrics

    size_bytes = get_artifact_size_bytes(run.info.run_id, path="model")
    size_mb    = size_bytes / (1024 * 1024)

    rows.append({
      "experiment_name": exp.name,
      "f1_score": data.get("test_f1", data.get("f1_score", None)),
      "accuracy": data.get("test_accuracy", data.get("accuracy", None)),
      "roc_auc": data.get("test_roc_auc", data.get("roc_auc", None)),
      "inf_time_ms": data.get("test_inference_time_ms_per_sample", data.get("inference_time_ms_per_sample", None)),
      "model_size_mb": size_mb
    })
    # je dois recourir à ce qui précède car j'ai donné un nom différent aux métriques sur DistilBERT mais idéalement il aurait fallu donner le même nom

# J'exclus deux expériences qui ne sont plus pertinentes ici
rows = [row for row in rows if (row['experiment_name'] != 'DISTILBERT-testregistering' and row['experiment_name'] != 'BERT')]

df = pd.DataFrame(rows)

# Je renomme les experiments pour les graphiques
rename_map = {
    "TFIDF_LogisticRegression_with_Metrics": "TFIDF LR",
    "TFIDF_LogisticRegression_with_Metrics_Spacy_Tokens": "TFIDF LR Tokenizé",
    "Embedding_Glove.twitter.27B.200d": "Glove",
    "Embedding_GoogleNews-vectors-negative300": "GoogleNews",
    "DISTILBERT": "DistilBERT",
}

df['experiment_name'] = df['experiment_name'].replace(rename_map)

# Je réordonne les experiments pour les graphiques
order = [
    "TFIDF LR",
    "TFIDF LR Tokenizé",
    "RN_Brut",
    "RN_Preproc",
    "RN_Preproc_Lemm",
    "Glove",
    "GoogleNews",
    "BLSTM",
    "DistilBERT"
]

df['sort_order'] = df['experiment_name'].apply(lambda x: order.index(x) if x in order else 999)
df = df.sort_values('sort_order').drop('sort_order', axis=1)

print(df)

In [None]:
palette = sns.color_palette("Set2", 4)
sns.set(style='white', palette=palette)

fig, ax1 = plt.subplots(figsize=(12, 8))

ax2 = ax1.twinx()
ln_inf, = ax2.plot(
    df['experiment_name'], df['inf_time_ms'],
    label='Temps d’inférence (ms)', marker='X', color='#E377C2', linewidth=2
)
ax2.set_ylabel('Temps d’inférence (ms)', color='#E377C2')
ax2.tick_params(axis='y', labelcolor='#E377C2')

ln_f1, = ax1.plot(df['experiment_name'], df['f1_score'], label='F1 Score', marker='o', linewidth=2, color=palette[0])
ln_acc, = ax1.plot(df['experiment_name'], df['accuracy'], label='Accuracy', marker='s', linewidth=2, color=palette[1])
ln_roc, = ax1.plot(df['experiment_name'], df['roc_auc'], label='ROC AUC', marker='^', linewidth=2, color=palette[2])

ax1.set_xlabel('Modèle')
ax1.set_ylabel('Scores')
ax1.tick_params(axis='y')

# Fusion des légendes
lines = [ln_inf, ln_f1, ln_acc, ln_roc]
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper left', frameon=True, fancybox=True, shadow=True)

plt.setp(ax1.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
plt.title('Comparaison des performances des modèles', fontsize=14)
plt.tight_layout()
plt.savefig(os.path.join(imgPrezPath, "Perfs.svg"),format="svg",bbox_inches="tight",pad_inches=0.1,)


In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

sns.lineplot(
    x='experiment_name', y='model_size_mb', data=df,
    ax=ax, marker='D', linewidth=2, color="#1177BB"
)

ax.set_xlabel("Modèle")
ax.set_ylabel("Taille du modèle (Mo)")
ax.set_title("Taille des modèles en mémoire", fontsize=14)
ax.tick_params(axis='x')
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

plt.tight_layout()
plt.savefig(os.path.join(imgPrezPath, "Tailles.svg"),format="svg",bbox_inches="tight",pad_inches=0.1,)


In [None]:
fig, ax = plt.subplots(figsize=(16, len(df)*0.7 + 1.5))
ax.axis('off')

ncol = len(df.columns)
col_width = 1.0 / ncol

table = ax.table(
    cellText=df.round(3).values,
    colLabels=df.columns,
    cellLoc='center',
    loc='center'
)

table.auto_set_font_size(False)
table.set_fontsize(14)

for (row, col), cell in table.get_celld().items():
    cell.set_height(0.08)
    cell.set_width(col_width)

    if row == 0:
        cell.set_text_props(weight='bold', color='white')
        cell.set_facecolor('#1177BB')
    elif row % 2 == 0:
        cell.set_facecolor("#f0f7fa")
    else:
        cell.set_facecolor("#e5ecf6")

    cell.set_edgecolor("#BBBBBB")
    cell.set_linewidth(1.0)



plt.tight_layout()
plt.savefig(os.path.join(imgPrezPath, "Tableaures.svg"),format="svg",bbox_inches="tight",pad_inches=0.1,)
