# Conversational Search Retrieval Augmented Generation

#### Uncomment the nltk.download if you haven't downloaded it yet

In [1]:
import TRECCASTeval as trec
import numpy as np
import pprint
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import OpenSearchSimpleAPI as osearch
import bm25s
import pandas as pd

nltk.download('stopwords')

pp = pprint.PrettyPrinter(indent=4)


test_bed = trec.ConvSearchEvaluation()

# Initialize stop words and stemmer
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))


[nltk_data] Downloading package stopwords to /home/anna/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Preprocess text will tokenize the text 
This function takes in raw text (usually conversational utterances) and performs the following:

- Converts the text to lowercase.
- Removes non-alphanumeric characters (punctuation, symbols).
- Removes common stopwords using the NLTK stopwords list.
- Stems each word using the Porter stemmer to reduce words to their base forms.

In [2]:

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    text = ' '.join([word for word in text.split() if word not in stop_words])
    text = ' '.join([stemmer.stem(word) for word in text.split()])
    return text


### Training and Test Data Processing

Here, we iterate over the training and testing topics provided by the test_bed. We filter out some conversation IDs based on predefined criteria, and then process each utterance in the conversation using our preprocess_text function. The preprocessed utterances are accumulated over turns of the conversation to simulate a growing context.

Key Variables:
- **previous_query_tokenized**: Keeps track of the concatenated previous utterances to simulate a conversation history.
- **topics**: Stores each turn's preprocessed utterances, indexed by a combination of the conversation ID and turn number.

(Printing of the queries is optional and used mostly for debug purposes)


In [3]:

chosen_topic= 77
conversation = []
topics = {}
for topic in test_bed.train_topics:
    conv_id = topic['number']
    if conv_id not in (1, 2, 4, 7, 15, 17, 18, 22, 23, 24, 25, 27, 30):
        continue
    previous_query_tokenized = ''
    for turn in topic['turn']:
        turn_id = turn['number']
        utterance = turn['raw_utterance']
        updated_utterance = previous_query_tokenized + utterance
        previous_query_tokenized += preprocess_text(utterance) + ' '
        topic_turn_id = '%d_%d'% (conv_id, turn_id)
        
    
        topics[topic_turn_id] = updated_utterance
        if conv_id == chosen_topic :
            conversation.append({"conv_id" : conv_id, "turn_id" : turn_id, "utterance" : utterance})


for topic in test_bed.test_topics:
    conv_id = topic['number']

    if conv_id not in (31, 32, 33, 34, 37, 40, 49, 50, 54, 56, 58, 59, 61, 67, 68, 69, 75, 77, 78, 79):
        continue
    
    previous_query_tokenized = ''
    for turn in topic['turn']:
        turn_id = turn['number']
        utterance = turn['raw_utterance']
        updated_utterance = previous_query_tokenized + utterance
        previous_query_tokenized += preprocess_text(utterance) + ' '
        topic_turn_id = '%d_%d'% (conv_id, turn_id)
        

        topics[topic_turn_id] = updated_utterance
        if conv_id == chosen_topic :
            conversation.append({"conv_id" : conv_id, "turn_id" : turn_id, "utterance" : utterance})



opensearch = osearch.OSsimpleAPI()

numdocs = 100


{'acknowledged': True, 'shards_acknowledged': True}

----------------------------------------------------------------------------------- INDEX SETTINGS
{'kwiz': {'settings': {'index': {'creation_date': '1728153198145',
                                 'knn': 'true',
                                 'number_of_replicas': '0',
                                 'number_of_shards': '1',
                                 'provided_name': 'kwiz',
                                 'refresh_interval': '-1',
                                 'similarity': {'default': {'lambda': '0.7',
                                                            'type': 'LMJelinekMercer'}},
                                 'uuid': 'qkpQ7pcwS7iT1IOTsfwRNg',
                                 'version': {'created': '135238227'}}}}}

----------------------------------------------------------------------------------- INDEX MAPPINGS
{'kwiz': {'mappings': {'properties': {'collection': {'type': 'keyword'},
                   

In [6]:

def BM_retrieval(k):
    #metrics_df = pd.DataFrame(columns=['turn', 'query', '_id'])
    BM25data = []
    bm25_doc_ids = []
    for element in conversation :
        topic = str(chosen_topic)
        turn = str(element['turn_id'])
        utterance = topic + '_' + turn
   
        query = topics[utterance]
        print(query)
        opensearch_results = opensearch.search_body(query, numDocs = k)
        best_docs = []
        best_passages = []
        content_to_id = {}  
        for index, row in opensearch_results.iterrows():
            doc_id = row['_id']
            doc_body = opensearch.get_doc_body(doc_id)
            best_passages.append(doc_body)
            best_docs.append(doc_id)
            content_to_id[doc_body] = doc_id  
        bm25_doc_ids.append([content_to_id[doc] for doc in best_passages if doc in content_to_id])
   
            
        BM25data.append({'turn': turn, 'query': element["utterance"], 'expanded_query' : query, 'top passages': best_passages, '_id': best_docs})

    
    df = pd.DataFrame(BM25data)
   
    return df

bm25_results = BM_retrieval(100)
print(bm25_results)


What's the difference between soup and stew?
what differ soup stew Is chilli a stew?
what differ soup stew chilli stew How about goulash?
what differ soup stew chilli stew goulash What are popular ones in France?
what differ soup stew chilli stew goulash popular one franc How is cassoulet made?
what differ soup stew chilli stew goulash popular one franc cassoulet made Tell me about feijoada and its significance.
what differ soup stew chilli stew goulash popular one franc cassoulet made tell feijoada signific How is it similar or different from cassoulet?
what differ soup stew chilli stew goulash popular one franc cassoulet made tell feijoada signific similar differ cassoulet Tell about Bigos stew.
what differ soup stew chilli stew goulash popular one franc cassoulet made tell feijoada signific similar differ cassoulet tell bigo stew Why is it important?
what differ soup stew chilli stew goulash popular one franc cassoulet made tell feijoada signific similar differ cassoulet tell bigo s

Search example:

In [7]:
# LMDRetriever
class LMDRetriever:
    def __init__(self, opensearch, corpus_ids):
        self.opensearch = opensearch
        self.corpus_ids = corpus_ids
        self.corpus_length = 0
        self.doc_count = len(corpus_ids)
        self.index = self.build_index()
        self.collection_frequency = self.build_collection_frequency()
        self.mu = self.calculate_mu()
    
    def calculate_mu(self):
        avg_doc_length = self.corpus_length / self.doc_count
        return 0.1 * avg_doc_length

    def build_index(self):
        index = {}
        for doc_id in self.corpus_ids:
            term_vectors = self.opensearch.doc_term_vectors(doc_id)
            if term_vectors:
                terms = term_vectors[3]
                for term, stats in terms.items():
                    if term not in index:
                        index[term] = {}
                    index[term][doc_id] = stats[0]
                    self.corpus_length += stats[0]
        return index

    def build_collection_frequency(self):
        collection_frequency = {}
        for doc_id in self.corpus_ids:
            term_vectors = self.opensearch.doc_term_vectors(doc_id)
            if term_vectors:
                terms = term_vectors[3]
                for term, stats in terms.items():
                    if term not in collection_frequency:
                        collection_frequency[term] = 0
                    collection_frequency[term] += stats[2]
        return collection_frequency

    def score(self, query, doc_id):
        score = 1.0
        term_vectors = self.opensearch.doc_term_vectors(doc_id)
        if term_vectors:
            terms = term_vectors[3]
            doc_length = sum([stats[0] for stats in terms.values()])
            for term in query.split():
                tf = terms.get(term, [0])[0]
                cf = self.collection_frequency.get(term, 0)
                p_ml = cf / self.corpus_length
                p_lmd = (tf + self.mu * p_ml) / (doc_length + self.mu)
                if p_lmd > 0:
                    score *= p_lmd
        return score

    def retrieve(self, query, k):
        scores = []
        for doc_id in self.corpus_ids:
            score = self.score(query, doc_id)
            scores.append((doc_id, score))
        scores = sorted(scores, key=lambda x: x[1], reverse=True)
        return scores[:k]




In [8]:

#print(bm25_results)

#print('-------------------------------------------------------------------------------------')


reranked_df = pd.DataFrame(columns=["turn", "query", "_id"])

for index, row in bm25_results.iterrows():
   
    bm25_doc_ids = row["_id"] 
    turn = row["turn"]
    query = row["expanded_query"]


    lmd_retriever = LMDRetriever(opensearch=opensearch, corpus_ids=bm25_doc_ids)

    reranked_results = lmd_retriever.retrieve(query, k=100)

  
    top_N_passages = [doc_id for doc_id, score in reranked_results]

  
    new_row = pd.DataFrame({
        "turn": [turn],
        "query": [row["query"]],
        "_id": [top_N_passages]
    })



    reranked_df = pd.concat([reranked_df, new_row], ignore_index=True)


print(reranked_df)


  turn                                           query  \
0    1    What's the difference between soup and stew?   
1    2                               Is chilli a stew?   
2    3                              How about goulash?   
3    4                What are popular ones in France?   
4    5                          How is cassoulet made?   
5    6    Tell me about feijoada and its significance.   
6    7  How is it similar or different from cassoulet?   
7    8                          Tell about Bigos stew.   
8    9                            Why is it important?   
9   10              What is the history of Irish stew?   

                                                 _id  
0  [MARCO_16539, MARCO_7312785, MARCO_4009632, MA...  
1  [CAR_e0559ce4c6079c402ca8c187105fc3099decb965,...  
2  [CAR_e0559ce4c6079c402ca8c187105fc3099decb965,...  
3  [CAR_2f71ee8fd2627fe73672114087a52c055ccf1e43,...  
4  [CAR_fdb1694659cd24f206e5f5eceec1d2d5444fe72f,...  
5  [CAR_3bcb55430dbf72eb2817f6b

In [9]:
turns_BM = []
BM_ap_values = []
BM_ndcg_values = []
BM_precision_values = []
BM_recall_values = []

print("BM25")

for index, row in bm25_results.iterrows():
    try:
    
        turn = f"77_{row['turn']}"  
        query = row['query']
        docs = row['_id']  

        result_df = pd.DataFrame({"_id": docs})

        p10, recall, ap, ndcg5 = test_bed.eval(result_df, turn)
        turns_BM.append(turn)
        BM_ap_values.append(ap)
        BM_ndcg_values.append(ndcg5)
        BM_precision_values.append(p10)
        BM_recall_values.append(recall)


        print(f"Turn: {turn}")
        
    
        print(f"P@10: {p10}, Recall: {recall}, AP: {ap}, NDCG@5: {ndcg5}\n")

    except Exception as e:
  
        print(f"Erreur sur le tour {turn}: {e}")
        break  

print(turns_BM,BM_ap_values,BM_ndcg_values,BM_precision_values, BM_recall_values)


BM25
Turn: 77_1
P@10: 0.8, Recall: 0.6153846153846154, AP: 0.35366162517942523, NDCG@5: 0.6215431670303851

Turn: 77_2
P@10: 0.0, Recall: 0.2, AP: 0.002150537634408602, NDCG@5: 0.0

Turn: 77_3
P@10: 1.0, Recall: 0.7948717948717948, AP: 0.6093518121885131, NDCG@5: 0.91172809177877

Turn: 77_4
P@10: 0.0, Recall: 0.25, AP: 0.01251605655533558, NDCG@5: 0.0

Turn: 77_5
P@10: 0.0, Recall: 0.1388888888888889, AP: 0.008396951250587453, NDCG@5: 0.0

Turn: 77_6
P@10: 0.1, Recall: 0.45, AP: 0.10590742305386128, NDCG@5: 0.09637771110074017

Turn: 77_7
P@10: 0.1, Recall: 0.5714285714285714, AP: 0.12287022093071924, NDCG@5: 0.15979138559512832

Turn: 77_8
P@10: 0.0, Recall: 0.25, AP: 0.019106507605008642, NDCG@5: 0.0

Turn: 77_9
P@10: 0, Recall: 0, AP: 0, NDCG@5: 0

Turn: 77_10
P@10: 0, Recall: 0, AP: 0, NDCG@5: 0

['77_1', '77_2', '77_3', '77_4', '77_5', '77_6', '77_7', '77_8', '77_9', '77_10'] [0.35366162517942523, 0.002150537634408602, 0.6093518121885131, 0.01251605655533558, 0.008396951250587453

In [10]:

print("LMD")

turns_LMD = []
LMD_ap_values = []
LMD_ndcg_values = []
LMD_precision_values = []
LMD_recall_values = []

for index, row in reranked_df.iterrows():
    try:
    
        turn = f"77_{row['turn']}"  
        query = row['query']
        docs = row['_id']  

        result_df = pd.DataFrame({"_id": docs})

        p10, recall, ap, ndcg5 = test_bed.eval(result_df, turn)
        turns_LMD.append(turn)
        LMD_ap_values.append(ap)
        LMD_ndcg_values.append(ndcg5)
        LMD_precision_values.append(p10)
        LMD_recall_values.append(recall)


        print(f"Turn: {turn}")
        
    
        print(f"P@10: {p10}, Recall: {recall}, AP: {ap}, NDCG@5: {ndcg5}\n")

    except Exception as e:
  
        print(f"Erreur sur le tour {turn}: {e}")
        break  

print(turns_LMD,LMD_ap_values,LMD_ndcg_values,LMD_precision_values, LMD_recall_values)


LMD
Turn: 77_1
P@10: 0.6, Recall: 0.6153846153846154, AP: 0.21890349531082884, NDCG@5: 0.1599902354470432

Turn: 77_2
P@10: 0.0, Recall: 0.2, AP: 0.003125, NDCG@5: 0.0

Turn: 77_3
P@10: 0.3, Recall: 0.7948717948717948, AP: 0.25882643244194437, NDCG@5: 0.1674605507391572

Turn: 77_4
P@10: 0.1, Recall: 0.25, AP: 0.024812485364711395, NDCG@5: 0.14041677439671904

Turn: 77_5
P@10: 0.0, Recall: 0.1388888888888889, AP: 0.009951371052788918, NDCG@5: 0.0

Turn: 77_6
P@10: 0.3, Recall: 0.45, AP: 0.12444064502467019, NDCG@5: 0.2209440956511154

Turn: 77_7
P@10: 0.0, Recall: 0.5714285714285714, AP: 0.10050228563638879, NDCG@5: 0.0

Turn: 77_8
P@10: 0.0, Recall: 0.25, AP: 0.013531396189702111, NDCG@5: 0.0

Turn: 77_9
P@10: 0, Recall: 0, AP: 0, NDCG@5: 0

Turn: 77_10
P@10: 0, Recall: 0, AP: 0, NDCG@5: 0



NameError: name 'turns' is not defined

In [None]:
"""import matplotlib.pyplot as plt


filtered_turns = []
filtered_ap_values = []
filtered_ndcg_values = []
filtered_precision_values = []
filtered_recall_values = []

for i in range(len(turns)):

    if LMD_ap_values[i] != 0 or LMD_ndcg_values[i] != 0:
        filtered_turns.append(turns[i])
        filtered_ap_values.append(LMD_ap_values[i])
        filtered_ndcg_values.append(LMD_ndcg_values[i])

 
    if LMD_precision_values[i] != 0 or LMD_recall_values[i] != 0:
        filtered_precision_values.append(LMD_precision_values[i])
        filtered_recall_values.append(LMD_recall_values[i])


plt.ion()


plt.figure(figsize=(10, 6))
plt.plot(filtered_turns, filtered_ap_values, marker='o', label='AP', linestyle='-')
plt.plot(filtered_turns, filtered_ndcg_values, marker='x', label='NDCG@5', linestyle='-')
plt.title("Evolution of AP and NCDG5 for each turn")
plt.xlabel("Turns")
plt.ylabel("Scores")
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.show()


plt.figure(figsize=(10, 6))
plt.plot(filtered_recall_values, filtered_precision_values, marker='o', label='Precision-Recall')
plt.title("Precision-Recall")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.grid(True)
plt.show()"""

