In [1]:

# 1) INSTALLATION & IMPORTS


# Sur Kaggle (ou Colab), installez si besoin :
!pip install -q torch accelerate transformers datasets evaluate scikit-learn plotly
!pip install -q huggingface_hub peft bitsandbytes
!pip install -q loguru

import os
import numpy as np
import pandas as pd

# Pour les modèles scikit
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Plotly pour graphes interactifs
import plotly.express as px
import plotly.figure_factory as ff

# Transformers
import torch
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, 
                          Trainer, TrainingArguments, DataCollatorWithPadding)
import evaluate

# LoRA
from peft import LoraConfig, get_peft_model, TaskType

from loguru import logger

print("=== Librairies importées ===")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h=== Librairies importées ===


In [2]:

# 2) CHARGEMENT DU DATASET


# 2 options : 
# (A) Charger depuis "FinGPT/fingpt-sentiment-train" :
try:
    from datasets import load_dataset
    ds_raw = load_dataset("FinGPT/fingpt-sentiment-train", split="train")
    df = ds_raw.to_pandas()
except:
    # (B) Sinon, on suppose un DataFrame local 'df'
    pass

print(df.head(5))
print("Taille du DataFrame:", df.shape)

# df contient 3 colonnes : ["input", "output", "instruction"] ou juste ["input","label"] ?

# Si c’est ["output"], on la convertit en label
if "output" in df.columns:
    # V1 : On fera la version 3 classes
    def simplify_label_3(lbl):
        # negative, neutral, positive
        lbl_low = lbl.lower()
        if "negative" in lbl_low:
            return "negative"
        elif "positive" in lbl_low:
            return "positive"
        else:
            return "neutral"
    df["label_3"] = df["output"].apply(simplify_label_3)

   
    def label_9(lbl):
        return lbl.lower().strip()  
    df["label_9"] = df["output"].apply(label_9)
    
    # On a 2 colonnes label_3 et label_9
    # On concentre le "input" juste sur la col "input"
    df = df[["input","label_3","label_9"]].reset_index(drop=True)


README.md:   0%|          | 0.00/529 [00:00<?, ?B/s]

(…)-00000-of-00001-dabab110260ac909.parquet:   0%|          | 0.00/6.42M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/76772 [00:00<?, ? examples/s]

                                               input               output  \
0  Teollisuuden Voima Oyj , the Finnish utility k...              neutral   
1  Sanofi poaches AstraZeneca scientist as new re...              neutral   
2  Starbucks says the workers violated safety pol...  moderately negative   
3                      $brcm raises revenue forecast             positive   
4  Google parent Alphabet Inc. reported revenue a...  moderately negative   

                                         instruction  
0  What is the sentiment of this news? Please cho...  
1  What is the sentiment of this news? Please cho...  
2  What is the sentiment of this news? Please cho...  
3  What is the sentiment of this tweet? Please ch...  
4  What is the sentiment of this news? Please cho...  
Taille du DataFrame: (76772, 3)


In [3]:

# 3) EXPLORER LES DISTRIBUTIONS


# Distribution 3 classes
fig3 = px.histogram(df, x="label_3", title="Répartition (3 classes)", 
                    color="label_3")
fig3.show()

# Distribution 9 classes (selon dataset exact)
fig9 = px.histogram(df, x="label_9", title="Répartition (9 classes)", 
                    color="label_9")
fig9.show()


In [4]:

# 4) SCENARIO A : 3 classes (neg/neu/pos)


df_3 = df[["input","label_3"]].rename(columns={"label_3":"label"}).copy()
df_3.dropna(subset=["label"], inplace=True)
print("=== Scenario 3 classes ===")
print(df_3["label"].value_counts())

# SPLIT
train_3, temp_3 = train_test_split(df_3, test_size=0.30, random_state=42, stratify=df_3["label"])
val_3, test_3 = train_test_split(temp_3, test_size=0.66, random_state=42, stratify=temp_3["label"])
print(f"Train={len(train_3)} | Val={len(val_3)} | Test={len(test_3)}")


# 4A) BASELINES (3 classes)


# Vectorisation TF-IDF
vectorizer_3 = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
X_train_3 = vectorizer_3.fit_transform(train_3["input"])
y_train_3 = train_3["label"]

X_val_3   = vectorizer_3.transform(val_3["input"])
y_val_3   = val_3["label"]

X_test_3  = vectorizer_3.transform(test_3["input"])
y_test_3  = test_3["label"]

# LogReg
params_lr = {
    "C":[0.01,0.1,1,10],
    "solver":["lbfgs","liblinear"]
}
lr_3 = GridSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    params_lr,
    scoring="f1_macro",
    cv=3,
    n_jobs=-1
)
lr_3.fit(X_train_3, y_train_3)
best_lr_3 = lr_3.best_estimator_

val_pred_lr_3 = best_lr_3.predict(X_val_3)
acc_lr_val_3 = accuracy_score(y_val_3, val_pred_lr_3)
f1_lr_val_3  = f1_score(y_val_3, val_pred_lr_3, average='macro')

test_pred_lr_3 = best_lr_3.predict(X_test_3)
acc_lr_test_3 = accuracy_score(y_test_3, test_pred_lr_3)
f1_lr_test_3  = f1_score(y_test_3, test_pred_lr_3, average='macro')

print("=== LogReg (3 classes) ===")
print(f"Best params: {lr_3.best_params_}, best CV f1={lr_3.best_score_:.3f}")
print(f"Val : Acc={acc_lr_val_3:.3f}, F1={f1_lr_val_3:.3f}")
print(f"Test: Acc={acc_lr_test_3:.3f}, F1={f1_lr_test_3:.3f}\n")

# RF
params_rf = {
    "n_estimators":[50,100,200],
    "max_depth":[None,10,20]
}
rf_3 = GridSearchCV(
    RandomForestClassifier(random_state=42),
    params_rf,
    scoring="f1_macro",
    cv=3,
    n_jobs=-1
)
rf_3.fit(X_train_3, y_train_3)
best_rf_3 = rf_3.best_estimator_

val_pred_rf_3 = best_rf_3.predict(X_val_3)
acc_rf_val_3 = accuracy_score(y_val_3, val_pred_rf_3)
f1_rf_val_3  = f1_score(y_val_3, val_pred_rf_3, average='macro')

test_pred_rf_3 = best_rf_3.predict(X_test_3)
acc_rf_test_3 = accuracy_score(y_test_3, test_pred_rf_3)
f1_rf_test_3  = f1_score(y_test_3, test_pred_rf_3, average='macro')

print("=== RandomForest (3 classes) ===")
print(f"Best params: {rf_3.best_params_}, best CV f1={rf_3.best_score_:.3f}")
print(f"Val : Acc={acc_rf_val_3:.3f}, F1={f1_rf_val_3:.3f}")
print(f"Test: Acc={acc_rf_test_3:.3f}, F1={f1_rf_test_3:.3f}\n")


=== Scenario 3 classes ===
label
positive    30510
neutral     29215
negative    17047
Name: count, dtype: int64
Train=53740 | Val=7830 | Test=15202


In [5]:

# 4A) suite


# Vectorisation TF-IDF
vectorizer_3 = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
X_train_3 = vectorizer_3.fit_transform(train_3["input"])
y_train_3 = train_3["label"]

X_val_3   = vectorizer_3.transform(val_3["input"])
y_val_3   = val_3["label"]

X_test_3  = vectorizer_3.transform(test_3["input"])
y_test_3  = test_3["label"]

# LogReg
params_lr = {
    "C":[0.01,0.1,1,10],
    "solver":["lbfgs","liblinear"]
}
lr_3 = GridSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    params_lr,
    scoring="f1_macro",
    cv=3,
    n_jobs=-1
)
lr_3.fit(X_train_3, y_train_3)
best_lr_3 = lr_3.best_estimator_

val_pred_lr_3 = best_lr_3.predict(X_val_3)
acc_lr_val_3 = accuracy_score(y_val_3, val_pred_lr_3)
f1_lr_val_3  = f1_score(y_val_3, val_pred_lr_3, average='macro')

test_pred_lr_3 = best_lr_3.predict(X_test_3)
acc_lr_test_3 = accuracy_score(y_test_3, test_pred_lr_3)
f1_lr_test_3  = f1_score(y_test_3, test_pred_lr_3, average='macro')

print("=== LogReg (3 classes) ===")
print(f"Best params: {lr_3.best_params_}, best CV f1={lr_3.best_score_:.3f}")
print(f"Val : Acc={acc_lr_val_3:.3f}, F1={f1_lr_val_3:.3f}")
print(f"Test: Acc={acc_lr_test_3:.3f}, F1={f1_lr_test_3:.3f}\n")

# RF
params_rf = {
    "n_estimators":[50,100,200],
    "max_depth":[None,10,20]
}
rf_3 = GridSearchCV(
    RandomForestClassifier(random_state=42),
    params_rf,
    scoring="f1_macro",
    cv=3,
    n_jobs=-1
)
rf_3.fit(X_train_3, y_train_3)
best_rf_3 = rf_3.best_estimator_

val_pred_rf_3 = best_rf_3.predict(X_val_3)
acc_rf_val_3 = accuracy_score(y_val_3, val_pred_rf_3)
f1_rf_val_3  = f1_score(y_val_3, val_pred_rf_3, average='macro')

test_pred_rf_3 = best_rf_3.predict(X_test_3)
acc_rf_test_3 = accuracy_score(y_test_3, test_pred_rf_3)
f1_rf_test_3  = f1_score(y_test_3, test_pred_rf_3, average='macro')

print("=== RandomForest (3 classes) ===")
print(f"Best params: {rf_3.best_params_}, best CV f1={rf_3.best_score_:.3f}")
print(f"Val : Acc={acc_rf_val_3:.3f}, F1={f1_rf_val_3:.3f}")
print(f"Test: Acc={acc_rf_test_3:.3f}, F1={f1_rf_test_3:.3f}\n")


=== LogReg (3 classes) ===
Best params: {'C': 10, 'solver': 'lbfgs'}, best CV f1=0.870
Val : Acc=0.891, F1=0.886
Test: Acc=0.890, F1=0.884

=== RandomForest (3 classes) ===
Best params: {'max_depth': None, 'n_estimators': 200}, best CV f1=0.864
Val : Acc=0.898, F1=0.890
Test: Acc=0.898, F1=0.888



In [6]:

# 4B) MODELE FINBERT (LoRA) (3 classes)


label_list_3 = ["negative","neutral","positive"]
label2id_3 = {lbl:i for i,lbl in enumerate(label_list_3)}
id2label_3 = {i:lbl for i,lbl in enumerate(label_list_3)}

def map_label_3(ex):
    ex["labels"] = label2id_3[ex["label"]]
    return ex

train_3_ds = Dataset.from_pandas(train_3.reset_index(drop=True)).map(map_label_3)
val_3_ds   = Dataset.from_pandas(val_3.reset_index(drop=True)).map(map_label_3)
test_3_ds  = Dataset.from_pandas(test_3.reset_index(drop=True)).map(map_label_3)

model_name_3 = "ProsusAI/finbert"
tokenizer_3  = AutoTokenizer.from_pretrained(model_name_3)
model_3base  = AutoModelForSequenceClassification.from_pretrained(
    model_name_3,
    num_labels=len(label_list_3),
    id2label=id2label_3,
    label2id=label2id_3
)

if tokenizer_3.pad_token is None:
    tokenizer_3.add_special_tokens({"pad_token":"[PAD]"})
    model_3base.resize_token_embeddings(len(tokenizer_3))

def tok_map_3(ex):
    return tokenizer_3(
        ex["input"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_3_tok = train_3_ds.map(tok_map_3, batched=True)
val_3_tok   = val_3_ds.map(tok_map_3, batched=True)
test_3_tok  = test_3_ds.map(tok_map_3, batched=True)

train_3_tok = train_3_tok.remove_columns(["input","label"])
val_3_tok   = val_3_tok.remove_columns(["input","label"])
test_3_tok  = test_3_tok.remove_columns(["input","label"])

collator_3 = DataCollatorWithPadding(tokenizer=tokenizer_3)

# LoRA
peft_config_3 = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query","value"]
)
model_3 = get_peft_model(model_3base, peft_config_3)

def compute_metrics_3(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc  = accuracy_score(labels, preds)
    f1   = f1_score(labels, preds, average='macro')
    prec = precision_score(labels, preds, average='macro')
    rec  = recall_score(labels, preds, average='macro')
    return {"accuracy": acc, "f1": f1, "precision":prec, "recall":rec}

training_args_3 = TrainingArguments(
    output_dir="./finbert_3classes_lora",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_steps=100,
    fp16=True,
    report_to="none"
)

trainer_3 = Trainer(
    model=model_3,
    args=training_args_3,
    train_dataset=train_3_tok,
    eval_dataset=val_3_tok,
    tokenizer=tokenizer_3,
    data_collator=collator_3,
    compute_metrics=compute_metrics_3
)

trainer_3.train()
trainer_3.save_model("./finbert_3_best")
tokenizer_3.save_pretrained("./finbert_3_best")

test_results_3 = trainer_3.evaluate(test_3_tok)
print("=== FinBERT-lora (3 classes) sur Test ===")
print(test_results_3)


Map:   0%|          | 0/53740 [00:00<?, ? examples/s]

Map:   0%|          | 0/7830 [00:00<?, ? examples/s]

Map:   0%|          | 0/15202 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Map:   0%|          | 0/53740 [00:00<?, ? examples/s]

Map:   0%|          | 0/7830 [00:00<?, ? examples/s]

Map:   0%|          | 0/15202 [00:00<?, ? examples/s]


`evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead


`tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4623,0.411472,0.846616,0.845496,0.844584,0.846719
2,0.4066,0.382048,0.867305,0.866409,0.865554,0.868851
3,0.3635,0.354737,0.875223,0.874613,0.873751,0.875759


=== FinBERT-lora (3 classes) sur Test ===
{'eval_loss': 0.3562515676021576, 'eval_accuracy': 0.8746217602946981, 'eval_f1': 0.8730099265324366, 'eval_precision': 0.8730121247818358, 'eval_recall': 0.8737594904752118, 'eval_runtime': 73.107, 'eval_samples_per_second': 207.942, 'eval_steps_per_second': 26.003, 'epoch': 3.0}


In [7]:

# 4C) GRAPHIQUES INTERACTIFS : Matrices & Courbes


# 1) Matrice de confusion (Plotly) - ex FinBERT-lora
preds_fin_3 = []
test_3_labels = test_3_tok["labels"]
for i in range(len(test_3_tok)):
    inp_ids = test_3_tok[i]["input_ids"]
    attn_m = test_3_tok[i]["attention_mask"]
    ipt = {
        "input_ids": torch.tensor([inp_ids], dtype=torch.long, device=model_3.device),
        "attention_mask": torch.tensor([attn_m], dtype=torch.long, device=model_3.device),
    }
    with torch.no_grad():
        out_ = model_3(**ipt)
    preds_fin_3.append(out_.logits.argmax(dim=1).item())

cm_fin_3 = confusion_matrix(test_3_labels, preds_fin_3, labels=[0,1,2]) 
z_text = [[str(y) for y in x] for x in cm_fin_3]
fig_cm = ff.create_annotated_heatmap(
    z=cm_fin_3, 
    x=["negative","neutral","positive"],
    y=["negative","neutral","positive"],
    annotation_text=z_text,
    colorscale="Blues"
)
fig_cm.update_layout(title="FinBERT-lora (3 classes) Confusion Matrix (Plotly)")
fig_cm.show()

# 2) Log d'entraînement
logs_3 = trainer_3.state.log_history
loss_list, val_f1_list, ep_list = [], [], []

for entry in logs_3:
    if "loss" in entry and "epoch" in entry:
        loss_list.append((entry["epoch"], entry["loss"]))
    if "eval_f1" in entry:
        val_f1_list.append((entry["epoch"], entry["eval_f1"]))

fig_loss = px.line(
    x=[x for (x,_) in loss_list],
    y=[y for (_,y) in loss_list],
    markers=True,
    title="Évolution Loss (FinBERT-lora) vs. Epoch"
)
fig_loss.update_xaxes(title="Epoch")
fig_loss.update_yaxes(title="Training Loss")
fig_loss.show()

if val_f1_list:
    fig_f1 = px.line(
        x=[x for (x,_) in val_f1_list],
        y=[y for (_,y) in val_f1_list],
        markers=True,
        title="F1 Validation vs. Epoch (FinBERT-lora)"
    )
    fig_f1.update_xaxes(title="Epoch")
    fig_f1.update_yaxes(title="F1 (macro)")
    fig_f1.show()


In [9]:

# 5) SCENARIO B : 9 CLASSES


print("\n=== SCENARIO 9 CLASSES (optionnel) ===")

# On suppose la colonne label_9 correspond à 9 labels
df_9 = df[["input","label_9"]].rename(columns={"label_9":"label"}).copy()
df_9.dropna(subset=["label"], inplace=True)

print(df_9["label"].value_counts())
fig9b = px.histogram(df_9, x="label", title="Répartition (9 classes)", color="label")
fig9b.show()

train_9, temp_9 = train_test_split(df_9, test_size=0.30, random_state=42, stratify=df_9["label"])
val_9, test_9 = train_test_split(temp_9, test_size=0.66, random_state=42, stratify=temp_9["label"])

print(f"Train={len(train_9)}, Val={len(val_9)}, Test={len(test_9)}")

# 5A) Baseline scikit-learn (LogReg / RF)
vec_9 = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
X_train_9 = vec_9.fit_transform(train_9["input"])
y_train_9 = train_9["label"]

X_val_9   = vec_9.transform(val_9["input"])
y_val_9   = val_9["label"]

X_test_9  = vec_9.transform(test_9["input"])
y_test_9  = test_9["label"]

# LogReg
params_lr9 = {
    "C":[0.01,0.1,1,10],
    "solver":["lbfgs","liblinear"]
}
lr9 = GridSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    params_lr9,
    scoring="f1_macro",
    cv=3,
    n_jobs=-1
)
lr9.fit(X_train_9, y_train_9)
best_lr_9 = lr9.best_estimator_
val_pred_lr_9 = best_lr_9.predict(X_val_9)
test_pred_lr_9= best_lr_9.predict(X_test_9)

acc_lr_val_9 = accuracy_score(y_val_9, val_pred_lr_9)
f1_lr_val_9  = f1_score(y_val_9, val_pred_lr_9, average='macro')

acc_lr_test_9 = accuracy_score(y_test_9, test_pred_lr_9)
f1_lr_test_9  = f1_score(y_test_9, test_pred_lr_9, average='macro')

print("\n=== LogReg 9 classes ===")
print(f"Val  : Acc={acc_lr_val_9:.3f} F1={f1_lr_val_9:.3f}")
print(f"Test : Acc={acc_lr_test_9:.3f} F1={f1_lr_test_9:.3f}")

# 5B) FinBERT-lora (9 classes)
# On map label -> entiers
label9_list = sorted(df_9["label"].unique().tolist())  # ex. ["mildly negative","moderately negative","strong negative", "neutral", "mildly positive","moderately positive","strong positive", ...]
label2id_9 = {lbl:i for i,lbl in enumerate(label9_list)}
id2label_9 = {i:lbl for i,lbl in enumerate(label9_list)}

def map_label_9(ex):
    ex["labels"] = label2id_9[ex["label"]]
    return ex

train_9_ds = Dataset.from_pandas(train_9.reset_index(drop=True)).map(map_label_9)
val_9_ds   = Dataset.from_pandas(val_9.reset_index(drop=True)).map(map_label_9)
test_9_ds  = Dataset.from_pandas(test_9.reset_index(drop=True)).map(map_label_9)

tok_9 = AutoTokenizer.from_pretrained(model_name_3)
m9base= AutoModelForSequenceClassification.from_pretrained(
    model_name_3,
    num_labels=len(label9_list),
    id2label=id2label_9,
    label2id=label2id_9,
    ignore_mismatched_sizes=True  
)


if tok_9.pad_token is None:
    tok_9.add_special_tokens({"pad_token":"[PAD]"})
    m9base.resize_token_embeddings(len(tok_9))

def tok_map_9(ex):
    return tok_9(
        ex["input"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_9_tok = train_9_ds.map(tok_map_9, batched=True)
val_9_tok   = val_9_ds.map(tok_map_9, batched=True)
test_9_tok  = test_9_ds.map(tok_map_9, batched=True)

train_9_tok = train_9_tok.remove_columns(["input","label"])
val_9_tok   = val_9_tok.remove_columns(["input","label"])
test_9_tok  = test_9_tok.remove_columns(["input","label"])

coll_9 = DataCollatorWithPadding(tokenizer=tok_9)

peft_9 = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query","value"]
)
m9 = get_peft_model(m9base, peft_9)

def compute_9(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc  = accuracy_score(labels, preds)
    f1   = f1_score(labels, preds, average='macro')
    return {"accuracy":acc, "f1":f1}

args_9 = TrainingArguments(
    output_dir="./finbert_9_lora",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_steps=100,
    fp16=True,
    report_to="none"
)

trainer_9 = Trainer(
    model=m9,
    args=args_9,
    train_dataset=train_9_tok,
    eval_dataset=val_9_tok,
    tokenizer=tok_9,
    data_collator=coll_9,
    compute_metrics=compute_9
)

trainer_9.train()
trainer_9.save_model("./finbert_9_best")
tok_9.save_pretrained("./finbert_9_best")

res_9 = trainer_9.evaluate(test_9_tok)
print("\n=== FinBERT-lora (9 classes) sur Test ===")
print(res_9)



=== SCENARIO 9 CLASSES (optionnel) ===
label
neutral                29215
positive               21588
negative               11749
moderately positive     6163
moderately negative     2972
mildly positive         2548
mildly negative         2108
strong negative          218
strong positive          211
Name: count, dtype: int64


Train=53740, Val=7830, Test=15202

=== LogReg 9 classes ===
Val  : Acc=0.849 F1=0.480
Test : Acc=0.842 F1=0.470


Map:   0%|          | 0/53740 [00:00<?, ? examples/s]

Map:   0%|          | 0/7830 [00:00<?, ? examples/s]

Map:   0%|          | 0/15202 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ProsusAI/finbert and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([9, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([9]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/53740 [00:00<?, ? examples/s]

Map:   0%|          | 0/7830 [00:00<?, ? examples/s]

Map:   0%|          | 0/15202 [00:00<?, ? examples/s]


`evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead


`tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.6306,0.618932,0.787101,0.420636
2,0.526,0.550363,0.810473,0.434179
3,0.5671,0.526883,0.818135,0.442408



=== FinBERT-lora (9 classes) sur Test ===
{'eval_loss': 0.5240683555603027, 'eval_accuracy': 0.8248914616497829, 'eval_f1': 0.447739472860885, 'eval_runtime': 73.0743, 'eval_samples_per_second': 208.035, 'eval_steps_per_second': 26.015, 'epoch': 3.0}


In [10]:
########################################
# 6) SAUVEGARDE DES MEILLEURS MODELES
########################################

# scikit-learn -> ex. joblib
import joblib
joblib.dump(best_lr_3, "best_lr_3cls.pkl")
joblib.dump(best_rf_3, "best_rf_3cls.pkl")
# On a déjà trainer_3.save_model("./finbert_3_best") pour FinBERT-lora

# Idem pour scenario 9 classes
joblib.dump(best_lr_9, "best_lr_9cls.pkl")
# plus trainer_9.save_model("./finbert_9_best")

print("=== Sauvegarde terminée. ===")


=== Sauvegarde terminée. ===


In [11]:

# 7) CONCLUSION / COMPARAISON


print("\n=== SCENARIO 3 CLASSES ===")
print(f"LogReg test : Acc={acc_lr_test_3:.3f}, F1={f1_lr_test_3:.3f}")
print(f"RF     test : Acc={acc_rf_test_3:.3f}, F1={f1_rf_test_3:.3f}")
print(f"FinBERT-lora test (extrait) : Acc={test_results_3['eval_accuracy']:.3f}, F1={test_results_3['eval_f1']:.3f}")

print("\n=== SCENARIO 9 CLASSES ===")
print(f"LogReg test (macro) : ??? (voir f1_lr_test_9,acc_lr_test_9)") 
print(f"FinBERT-lora test : {res_9}")

print("""
Observations :
1) Baselines scikit-learn peuvent être très forts (TF-IDF + LR ou RF).
2) FinBERT-lora se situe parfois légèrement en-dessous si epochs ou LR pas ajustés.
3) L'approche 9 classes est plus complexe => F1 plus faible.
)



=== SCENARIO 3 CLASSES ===
LogReg test : Acc=0.890, F1=0.884
RF     test : Acc=0.898, F1=0.888
FinBERT-lora test (extrait) : Acc=0.875, F1=0.873

=== SCENARIO 9 CLASSES ===
LogReg test (macro) : ??? (voir f1_lr_test_9,acc_lr_test_9)
FinBERT-lora test : {'eval_loss': 0.5240683555603027, 'eval_accuracy': 0.8248914616497829, 'eval_f1': 0.447739472860885, 'eval_runtime': 73.0743, 'eval_samples_per_second': 208.035, 'eval_steps_per_second': 26.015, 'epoch': 3.0}

Observations :
1) Baselines scikit-learn peuvent être très forts (TF-IDF + LR ou RF).
2) FinBERT-lora se situe parfois légèrement en-dessous si epochs ou LR pas ajustés.
3) L'approche 9 classes est plus complexe => F1 plus faible.

Vous pouvez affiner hyperparamètres, nb epochs, batch_size, ou la config LoRA 
pour tenter de dépasser la baseline scikit sur 3 classes ou 9 classes.

