In [1]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import unicodedata
import string
import re
import spacy
from datasets import load_from_disk
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import resample
from utils import preprocess
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import FunctionTransformer

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Kadem\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Kadem\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import gensim.downloader as api

# Télécharger les embeddings Word2Vec pré-entraînés
word2vec_model = api.load('word2vec-google-news-300')

### Chargement des des données et Prétraitement des données

In [3]:
wikiqa_data = load_from_disk("wikiqa")
test_data_set = wikiqa_data["test"]
train_data_set = wikiqa_data["train"]
validation_data_set = wikiqa_data["validation"]


def preprocess_examples(examples):
    examples['question'] = [preprocess(q) for q in examples['question']]
    examples['answer'] = [preprocess(a) for a in examples['answer']]
    return examples

train_data_set = train_data_set.map(preprocess_examples, batched=True)
validation_data_set = validation_data_set.map(preprocess_examples, batched=True)
test_data_set = test_data_set.map(preprocess_examples, batched=True)

# Convertir en DataFrame
train_df = pd.DataFrame({
    'question': train_data_set['question'],
    'answer': train_data_set['answer'],
    'label': train_data_set['label']
})

validation_df = pd.DataFrame({
    'question': validation_data_set['question'],
    'answer': validation_data_set['answer'],
    'label': validation_data_set['label']
})

test_df = pd.DataFrame({
    'question': test_data_set['question'],
    'answer': test_data_set['answer'],
    'label': test_data_set['label']
})

In [4]:
# Sur-échantillonner la classe minoritaire
def balance_classes(df):
    df_majority = df[df.label == 0]
    df_minority = df[df.label == 1]
    
    df_minority_upsampled = resample(df_minority, 
                                     replace=True,     # échantillonner avec remplacement
                                     n_samples=len(df_majority),    # pour faire correspondre la classe majoritaire
                                     random_state=123) # pour la reproductibilité
    
    return pd.concat([df_majority, df_minority_upsampled])

balanced_train_df = balance_classes(train_df)
balanced_validation_df = balance_classes(validation_df)
balanced_test_df = balance_classes(test_df)

# Afficher les statistiques des ensembles de données après suréchantillonnage
print("Ensemble de données d'entraînement :")
print(balanced_train_df['label'].value_counts())
print("\nEnsemble de données de validation :")
print(balanced_validation_df['label'].value_counts())
print("\nEnsemble de données de test :")
print(balanced_test_df['label'].value_counts())

Ensemble de données d'entraînement :
label
0    19320
1    19320
Name: count, dtype: int64

Ensemble de données de validation :
label
0    2593
1    2593
Name: count, dtype: int64

Ensemble de données de test :
label
0    5872
1    5872
Name: count, dtype: int64


In [5]:
# Réduire la classe majoritaire et suréchantillonner la classe minoritaire
def reduce_and_balance_classes(df):
    df_majority = df[df.label == 0]
    df_minority = df[df.label == 1]
    
    # Réduire la classe majoritaire de 75 %
    df_majority_reduced = resample(df_majority, 
                                   replace=False,    # échantillonner sans remplacement
                                   n_samples=int(len(df_majority) * 0.25),  # 25 % de la classe majoritaire
                                   random_state=123) # pour la reproductibilité
    
    # Suréchantillonner la classe minoritaire pour correspondre à la taille de la classe majoritaire réduite
    df_minority_upsampled = resample(df_minority, 
                                     replace=True,     # échantillonner avec remplacement
                                     n_samples=len(df_majority_reduced),    # pour faire correspondre la classe majoritaire réduite
                                     random_state=123) # pour la reproductibilité
    
    return pd.concat([df_majority_reduced, df_minority_upsampled])

balanced_train_df = reduce_and_balance_classes(train_df)
balanced_validation_df = reduce_and_balance_classes(validation_df)
balanced_test_df = reduce_and_balance_classes(test_df)

# Afficher les statistiques des ensembles de données après réduction et suréchantillonnage
print("Ensemble de données d'entraînement :")
print(balanced_train_df['label'].value_counts())
print("\nEnsemble de données de validation :")
print(balanced_validation_df['label'].value_counts())
print("\nEnsemble de données de test :")
print(balanced_test_df['label'].value_counts())

Ensemble de données d'entraînement :
label
0    4830
1    4830
Name: count, dtype: int64

Ensemble de données de validation :
label
0    648
1    648
Name: count, dtype: int64

Ensemble de données de test :
label
0    1468
1    1468
Name: count, dtype: int64


In [6]:
balanced_train_df = balanced_train_df.sample(frac=1).reset_index(drop=True)
balanced_validation_df = balanced_validation_df.sample(frac=1).reset_index(drop=True)
balanced_test_df = balanced_test_df.sample(frac=1).reset_index(drop=True)

### Word2Vec

In [7]:
# Fonction pour obtenir le vecteur moyen de Word2Vec pour un texte donné
def get_mean_word2vec(text, model):
    words = text.split()
    word_vecs = [model[word] for word in words if word in model]
    if len(word_vecs) == 0:
        return np.zeros(model.vector_size)
    return np.mean(word_vecs, axis=0)

# Transformer les questions en vecteurs Word2Vec moyens
def transform_questions(df, model):
    return np.vstack(df['question'].apply(lambda x: get_mean_word2vec(x, model)).values)

# Transformer les réponses en vecteurs Word2Vec moyens
def transform_answers(df, model):
    return np.vstack(df['answer'].apply(lambda x: get_mean_word2vec(x, model)).values)

### Entraînement du données

In [106]:
# Définir le pipeline de transformation des colonnes
classifier = Pipeline(steps=[
    ('features', FeatureUnion([
        ('question_w2v', FunctionTransformer(transform_questions, kw_args={'model': word2vec_model}, validate=False)),
        ('answer_w2v', FunctionTransformer(transform_answers, kw_args={'model': word2vec_model}, validate=False))
    ])),
    ('classifier', SVC(kernel='linear', class_weight='balanced', probability=True))
])

# Entraîner le modèle avec les données équilibrées
classifier.fit(balanced_train_df, balanced_train_df['label'])

### Evaluation sur les données de validation

In [108]:
# Évaluer le modèle sur les données de validation
validation_predictions = classifier.predict(balanced_validation_df)
validation_probabilities = classifier.predict_proba(balanced_validation_df)

# Afficher le rapport de classification pour les données de validation
print("\nValidation Classification Report:")
print(classification_report(balanced_validation_df['label'], validation_predictions))


Validation Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.65      0.65       648
           1       0.65      0.64      0.64       648

    accuracy                           0.65      1296
   macro avg       0.65      0.65      0.65      1296
weighted avg       0.65      0.65      0.65      1296



In [109]:
# Fonction pour calculer MAP, MRR, S@1
def compute_metrics(df, probabilities):
    grouped = df.groupby('question')
    ap_sum = 0
    rr_sum = 0
    success_at_1 = 0
    for name, group in grouped:
        group_indices = list(group.index)
        group_probs = probabilities[group_indices, 1]  # Probabilités de la classe 1
        group_labels = group['label'].values
        sorted_indices = np.argsort(-group_probs)  # Trier les indices par probabilité décroissante
        sorted_labels = group_labels[sorted_indices]
        
        ap = 0
        correct_count = 0
        for rank, label in enumerate(sorted_labels, 1):
            if label == 1:
                correct_count += 1
                ap += correct_count / rank
                if correct_count == 1:
                    rr_sum += 1 / rank
        ap /= max(correct_count, 1)
        ap_sum += ap
        
        if sorted_labels[0] == 1:
            success_at_1 += 1
    
    n = len(grouped)
    map_score = ap_sum / n
    mrr_score = rr_sum / n
    success_at_1_score = success_at_1 / n
    
    return map_score, mrr_score, success_at_1_score

# Calculer et afficher les métriques de validation
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_validation_df, validation_probabilities)
print(f"\nValidation Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")


Validation Results:
MAP: 0.3890111772227131
MRR: 0.3601234567901235
S@1: 0.29259259259259257



### Evaluation sur les données de test

In [112]:
# Évaluer le modèle sur les données de test
test_predictions = classifier.predict(balanced_test_df)
test_probabilities = classifier.predict_proba(balanced_test_df)

# Afficher le rapport de classification pour les données de test
print("\nTest Classification Report:")
print(classification_report(balanced_test_df['label'], test_predictions))


Test Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.66      0.63      1468
           1       0.62      0.55      0.58      1468

    accuracy                           0.61      2936
   macro avg       0.61      0.61      0.60      2936
weighted avg       0.61      0.61      0.60      2936



In [111]:
# Calculer et afficher les métriques de test
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_test_df, test_probabilities)
print(f"\nTest Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculer les métriques classiques de test
test_accuracy = accuracy_score(balanced_test_df['label'], test_predictions)
test_precision = precision_score(balanced_test_df['label'], test_predictions)
test_recall = recall_score(balanced_test_df['label'], test_predictions)
test_f1 = f1_score(balanced_test_df['label'], test_predictions)

print(f"Accuracy: {test_accuracy}")
print(f"Precision: {test_precision}")
print(f"Recall: {test_recall}")
print(f"F1 Score: {test_f1}")


Test Results:
MAP: 0.3688340306228933
MRR: 0.34124343257443085
S@1: 0.287215411558669

Accuracy: 0.6055858310626703
Precision: 0.6188650306748467
Recall: 0.5497275204359673
F1 Score: 0.5822510822510822


# Modèle avec les Fonctions de Lisibilités

In [8]:
from features import cpw,spw,wps,cwps,cwr,lwps,lwr,dale_chall

### On ajoute les fonction de lisibilité pour les questions et les answers a notre pipeline

In [9]:
# Ajouter les nouvelles fonctionnalités pour les questions et les réponses
def transform_cpw_question(df):
    return np.array(df['question'].apply(cpw).values).reshape(-1, 1)

def transform_spw_question(df):
    return np.array(df['question'].apply(spw).values).reshape(-1, 1)

def transform_wps_question(df):
    return np.array(df['question'].apply(wps).values).reshape(-1, 1)

def transform_cwps_question(df):
    return np.array(df['question'].apply(cwps).values).reshape(-1, 1)

def transform_cwr_question(df):
    return np.array(df['question'].apply(cwr).values).reshape(-1, 1)

def transform_lwps_question(df):
    return np.array(df['question'].apply(lwps).values).reshape(-1, 1)

def transform_lwr_question(df):
    return np.array(df['question'].apply(lwr).values).reshape(-1, 1)

def transform_dale_chall_question(df):
    return np.array(df['question'].apply(dale_chall).values).reshape(-1, 1)

def transform_cpw_answer(df):
    return np.array(df['answer'].apply(cpw).values).reshape(-1, 1)

def transform_spw_answer(df):
    return np.array(df['answer'].apply(spw).values).reshape(-1, 1)

def transform_wps_answer(df):
    return np.array(df['answer'].apply(wps).values).reshape(-1, 1)

def transform_cwps_answer(df):
    return np.array(df['answer'].apply(cwps).values).reshape(-1, 1)

def transform_cwr_answer(df):
    return np.array(df['answer'].apply(cwr).values).reshape(-1, 1)

def transform_lwps_answer(df):
    return np.array(df['answer'].apply(lwps).values).reshape(-1, 1)

def transform_lwr_answer(df):
    return np.array(df['answer'].apply(lwr).values).reshape(-1, 1)

def transform_dale_chall_answer(df):
    return np.array(df['answer'].apply(dale_chall).values).reshape(-1, 1)

# Définir le pipeline de transformation des colonnes
classifier_2 = Pipeline(steps=[
    ('features', FeatureUnion([
        ('question_w2v', FunctionTransformer(transform_questions, kw_args={'model': word2vec_model}, validate=False)),
        ('answer_w2v', FunctionTransformer(transform_answers, kw_args={'model': word2vec_model}, validate=False)),
        ('cpw_question', FunctionTransformer(transform_cpw_question, validate=False)),
        ('spw_question', FunctionTransformer(transform_spw_question, validate=False)),
        ('wps_question', FunctionTransformer(transform_wps_question, validate=False)),
        ('cwps_question', FunctionTransformer(transform_cwps_question, validate=False)),
        ('cwr_question', FunctionTransformer(transform_cwr_question, validate=False)),
        ('lwps_question', FunctionTransformer(transform_lwps_question, validate=False)),
        ('lwr_question', FunctionTransformer(transform_lwr_question, validate=False)),
        ('dale_chall_question', FunctionTransformer(transform_dale_chall_question, validate=False)),
        ('cpw_answer', FunctionTransformer(transform_cpw_answer, validate=False)),
        ('spw_answer', FunctionTransformer(transform_spw_answer, validate=False)),
        ('wps_answer', FunctionTransformer(transform_wps_answer, validate=False)),
        ('cwps_answer', FunctionTransformer(transform_cwps_answer, validate=False)),
        ('cwr_answer', FunctionTransformer(transform_cwr_answer, validate=False)),
        ('lwps_answer', FunctionTransformer(transform_lwps_answer, validate=False)),
        ('lwr_answer', FunctionTransformer(transform_lwr_answer, validate=False)),
        ('dale_chall_answer', FunctionTransformer(transform_dale_chall_answer, validate=False)),
    ])),
    ('classifier', SVC(kernel='linear', class_weight='balanced', probability=True))
])

### Entraînement du données

In [10]:
# Entraîner le modèle avec les données équilibrées
classifier_2.fit(balanced_train_df, balanced_train_df['label'])

### Evaluation sur les données de validation

In [11]:
# Évaluer le modèle sur les données de validation
validation_predictions = classifier_2.predict(balanced_validation_df)
validation_probabilities = classifier_2.predict_proba(balanced_validation_df)

# Afficher le rapport de classification pour les données de validation
print("\nValidation Classification Report:")
print(classification_report(balanced_validation_df['label'], validation_predictions))


Validation Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.67      0.65       648
           1       0.65      0.62      0.64       648

    accuracy                           0.65      1296
   macro avg       0.65      0.65      0.64      1296
weighted avg       0.65      0.65      0.64      1296



In [12]:
# Fonction pour calculer MAP, MRR, S@1
def compute_metrics(df, probabilities):
    grouped = df.groupby('question')
    ap_sum = 0
    rr_sum = 0
    success_at_1 = 0
    for name, group in grouped:
        group_indices = list(group.index)
        group_probs = probabilities[group_indices, 1]  # Probabilités de la classe 1
        group_labels = group['label'].values
        sorted_indices = np.argsort(-group_probs)  # Trier les indices par probabilité décroissante
        sorted_labels = group_labels[sorted_indices]
        
        ap = 0
        correct_count = 0
        for rank, label in enumerate(sorted_labels, 1):
            if label == 1:
                correct_count += 1
                ap += correct_count / rank
                if correct_count == 1:
                    rr_sum += 1 / rank
        ap /= max(correct_count, 1)
        ap_sum += ap
        
        if sorted_labels[0] == 1:
            success_at_1 += 1
    
    n = len(grouped)
    map_score = ap_sum / n
    mrr_score = rr_sum / n
    success_at_1_score = success_at_1 / n
    
    return map_score, mrr_score, success_at_1_score

# Calculer et afficher les métriques de validation
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_validation_df, validation_probabilities)
print(f"\nValidation Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")


Validation Results:
MAP: 0.39758788179962284
MRR: 0.36709876543209874
S@1: 0.3



### Evaluation sur les données de test

In [13]:
# Évaluer le modèle sur les données de test
test_predictions = classifier_2.predict(balanced_test_df)
test_probabilities = classifier_2.predict_proba(balanced_test_df)

# Afficher le rapport de classification pour les données de test
print("\nTest Classification Report:")
print(classification_report(balanced_test_df['label'], test_predictions))


Test Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.69      0.65      1468
           1       0.65      0.58      0.62      1468

    accuracy                           0.64      2936
   macro avg       0.64      0.64      0.64      2936
weighted avg       0.64      0.64      0.64      2936



In [14]:
# Calculer et afficher les métriques de test
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_test_df, test_probabilities)
print(f"\nTest Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculer les métriques classiques de test
test_accuracy = accuracy_score(balanced_test_df['label'], test_predictions)
test_precision = precision_score(balanced_test_df['label'], test_predictions)
test_recall = recall_score(balanced_test_df['label'], test_predictions)
test_f1 = f1_score(balanced_test_df['label'], test_predictions)

print(f"Accuracy: {test_accuracy}")
print(f"Precision: {test_precision}")
print(f"Recall: {test_recall}")
print(f"F1 Score: {test_f1}")


Test Results:
MAP: 0.3709202452623223
MRR: 0.344497956800934
S@1: 0.28896672504378285

Accuracy: 0.6365803814713896
Precision: 0.6527037319116527
Recall: 0.5837874659400545
F1 Score: 0.6163250629270047


# Modèle avec les Fonctions de Matching Lexical et Semantique

In [15]:
from features import length,check_exact_match,overlap,overlap_syn_fraction,tagme_overlap,bm25_score,word2vec_similarity

### On ajoute les fonction de matching lexical et semantique pour les questions et les answers a notre pipeline

In [17]:
# Charger le modèle Word2Vec pré-entraîné
word2vec_model = api.load("word2vec-google-news-300")

# Transformer les questions en vecteurs Word2Vec moyens
def transform_questions(df, model):
    return np.vstack(df['question'].apply(lambda x: get_mean_word2vec(x, model)).values)

# Transformer les réponses en vecteurs Word2Vec moyens
def transform_answers(df, model):
    return np.vstack(df['answer'].apply(lambda x: get_mean_word2vec(x, model)).values)

# Ajouter les nouvelles fonctionnalités pour les questions et les réponses
def transform_length_answer(df):
    return np.array(df['answer'].apply(length).values).reshape(-1, 1)

def transform_exact_match(df):
    return np.array(df.apply(lambda row: check_exact_match(row['question'], row['answer']), axis=1)).reshape(-1, 1)

def transform_overlap(df):
    return np.array(df.apply(lambda row: overlap(row['question'], row['answer']), axis=1)).reshape(-1, 1)

def transform_overlap_syn(df):
    return np.array(df.apply(lambda row: overlap_syn_fraction(row['question'], row['answer']), axis=1)).reshape(-1, 1)

def transform_tagme(df):
    return np.array(df.apply(lambda row: tagme_overlap(row['question'], row['answer']), axis=1)).reshape(-1, 1)

def transform_w2v_sim(df, model):
    return np.array(df.apply(lambda row: word2vec_similarity(row['question'], row['answer'], model), axis=1)).reshape(-1, 1)

### Entraînement du données

In [19]:
# Définir le pipeline de transformation des colonnes
classifier_3 = Pipeline(steps=[
    ('features', FeatureUnion([
        ('question_w2v', FunctionTransformer(transform_questions, kw_args={'model': word2vec_model}, validate=False)),
        ('answer_w2v', FunctionTransformer(transform_answers, kw_args={'model': word2vec_model}, validate=False)),
        ('length_answer', FunctionTransformer(transform_length_answer, validate=False)),
        ('exact_match', FunctionTransformer(transform_exact_match, validate=False)),
        ('overlap', FunctionTransformer(transform_overlap, validate=False)),
        ('overlap_syn', FunctionTransformer(transform_overlap_syn, validate=False)),
        ('tagme', FunctionTransformer(transform_tagme, validate=False)),
        ('w2v_sim', FunctionTransformer(transform_w2v_sim, kw_args={'model': word2vec_model}, validate=False)),
    ])),
    ('classifier', SVC(kernel='linear', class_weight='balanced', probability=True))
])

In [20]:
# Entraîner le modèle avec les données équilibrées
classifier_3.fit(balanced_train_df, balanced_train_df['label'])

### Evaluation sur les données de validation

In [21]:
# Évaluer le modèle sur les données de validation
validation_predictions = classifier_3.predict(balanced_validation_df)
validation_probabilities = classifier_3.predict_proba(balanced_validation_df)

In [22]:
# Fonction pour calculer MAP, MRR, S@1
def compute_metrics(df, probabilities):
    grouped = df.groupby('question')
    ap_sum = 0
    rr_sum = 0
    success_at_1 = 0
    for name, group in grouped:
        group_indices = list(group.index)
        group_probs = probabilities[group_indices, 1]  # Probabilités de la classe 1
        group_labels = group['label'].values
        sorted_indices = np.argsort(-group_probs)  # Trier les indices par probabilité décroissante
        sorted_labels = group_labels[sorted_indices]
        
        ap = 0
        correct_count = 0
        for rank, label in enumerate(sorted_labels, 1):
            if label == 1:
                correct_count += 1
                ap += correct_count / rank
                if correct_count == 1:
                    rr_sum += 1 / rank
        ap /= max(correct_count, 1)
        ap_sum += ap
        
        if sorted_labels[0] == 1:
            success_at_1 += 1
    
    n = len(grouped)
    map_score = ap_sum / n
    mrr_score = rr_sum / n
    success_at_1_score = success_at_1 / n
    
    return map_score, mrr_score, success_at_1_score

# Calculer et afficher les métriques de validation
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_validation_df, validation_probabilities)
print(f"\nValidation Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")


Validation Results:
MAP: 0.40607688697647715
MRR: 0.37623456790123455
S@1: 0.3111111111111111



### Evaluation sur les données de test

In [23]:
# Évaluer le modèle sur les données de test
test_predictions = classifier_3.predict(balanced_test_df)
test_probabilities = classifier_3.predict_proba(balanced_test_df)

# Afficher le rapport de classification pour les données de test
print("\nTest Classification Report:")
print(classification_report(balanced_test_df['label'], test_predictions))


Test Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.71      0.69      1468
           1       0.69      0.65      0.67      1468

    accuracy                           0.68      2936
   macro avg       0.68      0.68      0.68      2936
weighted avg       0.68      0.68      0.68      2936



In [24]:
# Calculer et afficher les métriques de test
map_score, mrr_score, success_at_1_score = compute_metrics(balanced_test_df, test_probabilities)
print(f"\nTest Results:\nMAP: {map_score}\nMRR: {mrr_score}\nS@1: {success_at_1_score}\n")

# Calculer les métriques classiques de test
test_accuracy = accuracy_score(balanced_test_df['label'], test_predictions)
test_precision = precision_score(balanced_test_df['label'], test_predictions)
test_recall = recall_score(balanced_test_df['label'], test_predictions)
test_f1 = f1_score(balanced_test_df['label'], test_predictions)

print(f"Accuracy: {test_accuracy}")
print(f"Precision: {test_precision}")
print(f"Recall: {test_recall}")
print(f"F1 Score: {test_f1}")


Test Results:
MAP: 0.38119338706269146
MRR: 0.3608018513885414
S@1: 0.3117338003502627

Accuracy: 0.6777929155313351
Precision: 0.6902332361516035
Recall: 0.6450953678474114
F1 Score: 0.6669014084507042


### Conclusion : 

##### Avant l'ajout des Fonctions de Lisibilités : 

Accuracy: 0.6055858310626703

Precision: 0.6188650306748467

Recall: 0.5497275204359673

F1 Score: 0.5822510822510822

MAP: 0.3688340306228933

MRR: 0.34124343257443085

S@1: 0.287215411558669


##### Après l'ajout des Fonctions de Lisibilités : 

Accuracy: 0.6365803814713896

Precision: 0.6527037319116527

Recall: 0.5837874659400545

F1 Score: 0.6163250629270047

MAP: 0.3709202452623223

MRR: 0.344497956800934

S@1: 0.28896672504378285


##### Après l'ajout des Fonctions de matching lexical et semantique : 

Accuracy: 0.6777929155313351

Precision: 0.6902332361516035

Recall: 0.6450953678474114

F1 Score: 0.6669014084507042

MAP: 0.38119338706269146

MRR: 0.3608018513885414

S@1: 0.3117338003502627


#### On remarque une petite amélioration des résultats

# Modèle avec les Fonctions de Lisibilité et les fonctions de Matching Lexical et Semantique