In [6]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import unicodedata
import string
import re
import spacy
from datasets import load_from_disk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from sklearn.utils import resample
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import time
import utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
wikiqa_data = load_from_disk("wikiqa")
test_data_set = wikiqa_data["test"]
train_data_set = wikiqa_data["train"]
validation_data_set = wikiqa_data["validation"]


def preprocess_examples(examples):
    examples['question'] = [utils.preprocess(q) for q in examples['question']]
    examples['answer'] = [utils.preprocess(a) for a in examples['answer']]
    return examples

train_data_set = train_data_set.map(preprocess_examples, batched=True)
validation_data_set = validation_data_set.map(preprocess_examples, batched=True)
test_data_set = test_data_set.map(preprocess_examples, batched=True)

# Convertir en DataFrame
train_df = pd.DataFrame({
    'question': train_data_set['question'],
    'answer': train_data_set['answer'],
    'label': train_data_set['label']
})

validation_df = pd.DataFrame({
    'question': validation_data_set['question'],
    'answer': validation_data_set['answer'],
    'label': validation_data_set['label']
})

test_df = pd.DataFrame({
    'question': test_data_set['question'],
    'answer': test_data_set['answer'],
    'label': test_data_set['label']
})

In [4]:
# Définir le pipeline de transformation des colonnes
preprocess_pipeline = ColumnTransformer(
    transformers=[
        ('question', TfidfVectorizer(), 'question'),
        ('answer', TfidfVectorizer(), 'answer'),
    ]
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocess_pipeline),
    ('classifier', LogisticRegression(solver='liblinear'))
])

# Entraîner le modèle avec les données équilibrées
start_time = time.time()
pipeline.fit(train_df[['question', 'answer']], train_df['label'])
end_time = time.time()
actual_training_time = end_time - start_time
print(f"Temps d'entraînement réel: {actual_training_time:.2f} secondes")

# Fonction pour calculer MAP, MRR, S@1
def compute_metrics(df, predictions, probabilities):
    grouped = df.groupby('question')
    ap_sum = 0
    rr_sum = 0
    success_at_1 = 0
    for name, group in grouped:
        group_probs = probabilities[group.index, 1]  # Probabilités de la classe 1
        group_labels = group['label'].values
        sorted_indices = np.argsort(-group_probs)  # Trier les indices par probabilité décroissante
        sorted_labels = group_labels[sorted_indices]
        
        ap = 0
        correct_count = 0
        for rank, label in enumerate(sorted_labels, 1):
            if label == 1:
                correct_count += 1
                ap += correct_count / rank
                if correct_count == 1:
                    rr_sum += 1 / rank
        ap /= max(correct_count, 1)
        ap_sum += ap
        
        if sorted_labels[0] == 1:
            success_at_1 += 1
    
    n = len(grouped)
    map_score = ap_sum / n
    mrr_score = rr_sum / n
    success_at_1_score = success_at_1 / n
    
    return map_score, mrr_score, success_at_1_score

Temps d'entraînement réel: 0.46 secondes


In [7]:
# Évaluer le modèle sur les données de validation
validation_predictions = pipeline.predict(balanced_validation_df[['question', 'answer']])
validation_probabilities = pipeline.predict_proba(balanced_validation_df[['question', 'answer']])
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_validation_df, validation_predictions, validation_probabilities)
print(f"\nValidation Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculer les métriques classiques
validation_accuracy = accuracy_score(balanced_validation_df['label'], validation_predictions)
validation_precision = precision_score(balanced_validation_df['label'], validation_predictions)
validation_recall = recall_score(balanced_validation_df['label'], validation_predictions)
validation_f1 = f1_score(balanced_validation_df['label'], validation_predictions)

print(f"Accuracy: {validation_accuracy}")
print(f"Precision: {validation_precision}")
print(f"Recall: {validation_recall}")
print(f"F1 Score: {validation_f1}")

# Évaluer le modèle sur les données de test
test_predictions = pipeline.predict(balanced_test_df[['question', 'answer']])
test_probabilities = pipeline.predict_proba(balanced_test_df[['question', 'answer']])
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_test_df, test_predictions, test_probabilities)
print(f"\nTest Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculer les métriques classiques
test_accuracy = accuracy_score(balanced_test_df['label'], test_predictions)
test_precision = precision_score(balanced_test_df['label'], test_predictions)
test_recall = recall_score(balanced_test_df['label'], test_predictions)
test_f1 = f1_score(balanced_test_df['label'], test_predictions)

print(f"Accuracy: {test_accuracy}")
print(f"Precision: {test_precision}")
print(f"Recall: {test_recall}")
print(f"F1 Score: {test_f1}")

NameError: name 'balanced_validation_df' is not defined

In [8]:
# Evaluate the model on validation data
validation_predictions = pipeline.predict(validation_df[['question', 'answer']])
validation_probabilities = pipeline.predict_proba(validation_df[['question', 'answer']])
map_score, mrr_score, success_at_1_score = compute_metrics(validation_df, validation_predictions, validation_probabilities)
print(f"\nValidation Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculate classic metrics
validation_accuracy = accuracy_score(validation_df['label'], validation_predictions)
validation_precision = precision_score(validation_df['label'], validation_predictions)
validation_recall = recall_score(validation_df['label'], validation_predictions)
validation_f1 = f1_score(validation_df['label'], validation_predictions)

print(f"Accuracy: {validation_accuracy}")
print(f"Precision: {validation_precision}")
print(f"Recall: {validation_recall}")
print(f"F1 Score: {validation_f1}")

# Evaluate the model on test data
test_predictions = pipeline.predict(test_df[['question', 'answer']])
test_probabilities = pipeline.predict_proba(test_df[['question', 'answer']])
map_score, mrr_score, success_at_1_score = compute_metrics(test_df, test_predictions, test_probabilities)
print(f"\nTest Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculate classic metrics
test_accuracy = accuracy_score(test_df['label'], test_predictions)
test_precision = precision_score(test_df['label'], test_predictions)
test_recall = recall_score(test_df['label'], test_predictions)
test_f1 = f1_score(test_df['label'], test_predictions)

print(f"Accuracy: {test_accuracy}")
print(f"Precision: {test_precision}")
print(f"Recall: {test_recall}")
print(f"F1 Score: {test_f1}")



Validation Results:
MAP: 0.2511612226442736
MRR: 0.25228437914878604
S@1: 0.18305084745762712

Accuracy: 0.9498719356019026
Precision: 1.0
Recall: 0.02142857142857143
F1 Score: 0.04195804195804196

Test Results:
MAP: 0.22066635850583924
MRR: 0.22571738786966483
S@1: 0.15955766192733017

Accuracy: 0.9524736415247365
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
