# About:

Prepare the data to populate the 'TexteFiscExactCode' column from the 'Textes' table.

In [1]:
# !pip install llama-index
# !pip install llama-index-readers-database
# !pip install llama-index-embeddings-huggingface
# !pip install llama-index-llms-ollama
# !pip install llama-index-postprocessor-cohere-rerank
# !pip install llama-index-postprocessor-flag-embedding-reranker
# !pip install FlagEmbedding
# !pip install openpyxl
# !pip install psycopg2
# !pip install pandas
# !pip install sqlalchemy

In [None]:
# !pip install llama-index-embeddings-ollama

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
COUNTRY_NAME = 'MLI Mali'
SPOKEN_LANGUAGE = "French"

In [3]:
from general_config import COUNTRY_NAMES_LIST

# validate COUNTRY_NAME
if COUNTRY_NAME in COUNTRY_NAMES_LIST:
    print('country name OK')

country name OK


## Get data from Postgres

In [4]:
from postgres_connection import get_postgress_data
from sql_files import sql_files
import pandas as pd

### This is not necessary if we extract the data using the llama_index db reader

In [5]:
df = get_postgress_data(sql_files['get_docs_per_country'].replace("%country_name%", COUNTRY_NAME))

  df = pd.read_sql(query, conn)


In [6]:
df.shape

(218, 3)

In [7]:
df_droit = get_postgress_data(sql_files['get_droit_type_for_docs_per_country'].replace("%country_name%", COUNTRY_NAME))

In [8]:
df_droit.shape

(218, 2)

In [9]:
df = pd.merge(df,df_droit,how='left', on='title')

### This is mandatory

In [10]:
df_fisc_exacts = get_postgress_data(sql_files['get_textes_fiscaux_exacts'], db='Ferdi')

  df = pd.read_sql(query, conn)


In [11]:
df_fisc_exacts.head()

Unnamed: 0,TexteFiscExactCode,TexteFiscStandardCode,TexteFiscExactCodeCourt,TexteFiscExactComplet
0,_Gen_CGI_CGI,_Gen_CGI,_CGI,Code général des impôts
1,_Gen_CGI_CGImodif,_Gen_CGI,_CGImodif,Modification du Code général des impôts
2,_Gen_CGI_CGImodifModif,_Gen_CGI,_CGImodifModif,Modification de la Modification du Code généra...
3,_Gen_CGI_CGIamendé,_Gen_CGI,_CGIamendé,Code général des impôts amendé
4,_Gen_CGI_CGIappli,_Gen_CGI,_CGIappli,Application du Code général des impôts


In [12]:
df_fisc_standards = get_postgress_data(sql_files['get_textes_fiscaux_standards'], db='Ferdi')

  df = pd.read_sql(query, conn)


In [13]:
df_fisc_standards.head()

Unnamed: 0,TexteFiscStandardCode,RégimeCode,TexteFiscStandardCodeCourt,TexteFiscStandardComplet
0,_Gen_CGI,_Gen,_CGI,Code général des impôts
1,_Gen_IT,_Gen,_IT,Impôt sur le revenu
2,_Gen_VAT,_Gen,_VAT,Taxe sur la valeur ajoutée
3,_Gen_NHI,_Gen,_NHI,Assurance maladie nationale
4,_Gen_LF,_Gen,_LF,Loi de finances


# RAG approach

## Get data from Postgres using llama-index db reader

In [14]:
from postgres_connection import psql_conn_config
from llama_index.readers.database import DatabaseReader
from sql_files import sql_files

In [15]:
db = DatabaseReader(
    scheme="postgresql",  # Database Scheme
    host=psql_conn_config.get("HOSTNAME"),  # Database Host
    port="5432",  # Database Port
    user=psql_conn_config.get("USERNAME"),  # Database User
    password=psql_conn_config.get("PASSWORD"),  # Database Password
    dbname=psql_conn_config.get("DATABASE"),  # Database Name
)

### Load the data as llama_index documents

In [16]:
from llama_index.core import Document, VectorStoreIndex

# documents = db.load_data(query=sql_files['get_docs_MALI'])

texts = df[~df['title'].str.contains("JO")][df['content'].str.len()<2000000].to_dict(orient='index')
# texts = dict(list(texts.items())[-30:])
documents = [Document(text=txt['content'], metadata={'title': txt['title'], 'law type': txt['tag']}) for txt in texts.values()]

  texts = df[~df['title'].str.contains("JO")][df['content'].str.len()<2000000].to_dict(orient='index')


## RAG

### Embeddings model

#### Hugging Face embedding

In [17]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer

embed_model = HuggingFaceEmbedding(model_name="Alibaba-NLP/gte-Qwen2-1.5B-instruct", trust_remote_code=True)
# embed_model = SentenceTransformer("dunzhang/stella_en_1.5B_v5", trust_remote_code=True).cuda()



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### Vector DataBase

In [18]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

import os
from llama_index.core import VectorStoreIndex, load_index_from_storage
from llama_index.core.storage import StorageContext
from llama_index.core import Settings

Settings.embed_model = embed_model # we specify the embedding model to be used

from llama_index.core.node_parser import TokenTextSplitter


In [19]:
len(documents)

73

In [20]:
from llama_index.core.text_splitter import SentenceSplitter

In [21]:
chunk_size = int(1024)

transformations_example = [
    TokenTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=int(chunk_size/10),
        separator=" ",
    ),
    embed_model,
]

transformations_sentence = [SentenceSplitter(chunk_size=chunk_size, chunk_overlap=int(chunk_size/10))]

if not os.path.exists("storage_MLI Mali_w_metadata_1024"):
    index = VectorStoreIndex.from_documents(documents, show_progress=True, transformations=transformations_sentence)
    # save index to disk
    index.set_index_id("vector_index")
    index.storage_context.persist("./storage_MLI Mali")
else:
    print('loading from local')
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir="storage_MLI Mali_w_metadata_1024")
    # load index
    index = load_index_from_storage(storage_context, index_id="vector_index")

Parsing nodes:   0%|          | 0/73 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/16 [00:00<?, ?it/s]

In [20]:
chunk_size = int(4096)

transformations_example = [
    TokenTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=int(chunk_size/10),
        separator=" ",
    ),
    embed_model,
]

if not os.path.exists("storage_MLI Mali_w_metadata_4096_full"):
    index = VectorStoreIndex.from_documents(documents, transformations=transformations_example)
    # save index to disk
    index.set_index_id("vector_index")
    index.storage_context.persist("./storage_MLI Mali_w_metadata_4096_full")
else:
    print('loading from local')
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir="storage_MLI Mali_w_metadata_4096_full")
    # load index
    index = load_index_from_storage(storage_context, index_id="vector_index")

In [22]:
set([v.get('title') for k,v in index.vector_store.to_dict()['metadata_dict'].items()])

{'Contrat de partage de production type (Ministère des Mines)',
 'Décret n°1994-062 (01.02.1994) Droits à l_importation 1994 (SGG)',
 'Décret n°1996-179 (19.06.1996) Application Office malien de l_habitat 1996 (SGG)',
 'Décret n°1997-182 (02.06.1997) Application Jeux de hasard 1997 (SGG)',
 'Décret n°1999-255 (15.09.1999) Application Code minier 1999 (SGG)',
 'Décret n°1999-256 (15.09.1999) Convention minière type 1999 (SGG)',
 'Décret n°2000-050 (10.02.2000) Modification Convention minière type 2000 (SGG)',
 'Décret n°2004-357 (08.09.2004) Application Code pétrolier 2004 (Droit-Afrique)',
 'Décret n°2004-357 (08.09.2004) Application Code pétrolier 2004 (Ministère des Mines)',
 'Décret n°2005-036 (27.01.2005) Impôt spécial sur certains produits 2005 (Droit-Afrique)',
 'Décret n°2008-473 (07.08.2008) Supplément Application Code pétrolier 2008 (Ministère des Mines)',
 'Décret n°2012-475 (20.08.2012) Application Code des investissements 2012 (Droit-Afrique)',
 'Décret n°2016-272 (29.04.20

#### Query pipeline

In [23]:
from llama_index.core import PromptTemplate
from llama_index.core.query_pipeline import QueryPipeline

from llama_index.core.response_synthesizers import ResponseMode
from llama_index.core import get_response_synthesizer

from llama_index.core.response_synthesizers import TreeSummarize

In [24]:
# setting up the llm
llm = Ollama(
    model="llama3.1", 
    temperature=0, 
    request_timeout=120.0,
    context_window=chunk_size) 

Settings.llm = llm

In [25]:
# retriever = index.as_retriever(similarity_top_k=5)
summarizer = TreeSummarize(llm=llm)

In [26]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.query_pipeline import InputComponent


In [27]:
vector_index = index

In [28]:
# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever,
)

from typing import List

In [29]:
definitions = df_fisc_standards.set_index(['TexteFiscStandardCode'])['TexteFiscStandardComplet'].to_dict()
definitions

{'_Gen_CGI': 'Code général des impôts',
 '_Gen_IT': 'Impôt sur le revenu',
 '_Gen_VAT': 'Taxe sur la valeur ajoutée',
 '_Gen_NHI': 'Assurance maladie nationale',
 '_Gen_LF': 'Loi de finances',
 '_Invest_CI': 'Code des investissement',
 '_Min_CM': 'Code minier',
 '_Pétrol_CP': 'Code pétrolier',
 '_Gaz_CG': 'Code gazier',
 '_Forest_CF': 'Code forestier'}

In [35]:
doc_name = 'Ordonnance n°2020-013 (21.12.2020) Loi de finances 2021 (Ministère des Finances)'
# doc_name = "Loi n°2001-075 (18.07.2001) Code des douanes 2001 (eRegulations Mali)"
# doc_name ="img_Loi n°2014-056 (26.12.2014) Annexe fiscale Loi de finances 2015 (Droit-Afrique)" # this document contains  MODIFICATION DU CODE GENERAL DES IMPOTS and Les articles 73 (nouveau) et 74 A du Code Général des Impôts sont modifiés ainsi qu’il suit : so natually it's _Gen_CGI_CGImodifModif

#### Trial 1

In [39]:
from llama_index.core.postprocessor import SimilarityPostprocessor

processor = SimilarityPostprocessor(similarity_cutoff=0.5)
# filtered_nodes = processor.postprocess_nodes(nodes)

In [42]:
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import ResponseMode
from llama_index.core.query_engine import RetrieverQueryEngine

vector_index = index

vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=10) #, alpha=0.5) #, embed_model=embed_model)
response_synthesizer = get_response_synthesizer(llm=llm, response_mode=ResponseMode.COMPACT)

vector_query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[processor]
)

In [43]:
response = vector_query_engine.query(
    f"""
    Consider the contents of the document with this title '{doc_name}'. Then, these definitions: {str(definitions)} 
    In which of these categories : {', '.join(list(definitions.keys()))}, is it part of ? Answer with a valid category, keep the category only.
    """
    # f"""
    # Considérez le contenu du document portant ce nom '{doc_name}'. Ensuite, ces définitions : {str(definitions)}
    # Dans laquelle de ces catégories : {', '.join(list(definitions.keys()))}, cela en fait-il partie ? Répondez avec une catégorie valide, gardez uniquement la catégorie.
    # """
)

# response = vector_query_engine.query(
#     f"""
#     Keep the final category only from this resonse {response}. Do not ignore the '_' at the begining of the category name.
#     """
# )

print(response)

# tree_summarize k=5: _Gen_CGI_CGImodif
# tree_summarize k=10: _Gen_CGI_CGImodif
# tree_summarize k=20: _LF_
# simple_summarize k=5: _Gen_CGI_CGImodif
# simple_summarize k=10: _Gen_CGI_CGImodif
# compact k=5: _Gen_LF_LFmodif
# compact k=10: _Gen_LF_LFmodif
# compact k=3: _Gen_LF_LFmodif

_Gen_LF


In [44]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='07b6ea5d-01a7-42be-a9f3-85ee6fdb0064', embedding=None, metadata={'title': 'img_Loi n°2006-067 (29.12.2006) Code général des impôts 2016 (DGI)', 'law type': 'droit fiscal'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0815a0f6-f745-4d36-908e-1ed1bdd25659', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'title': 'img_Loi n°2006-067 (29.12.2006) Code général des impôts 2016 (DGI)', 'law type': 'droit fiscal'}, hash='40927177d2ba8935b448ef542dafe20364d25d250715e2515d93a7443249a66a'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='c75b44f8-48de-42ac-90a1-478ca8403313', node_type=<ObjectType.TEXT: '1'>, metadata={'title': 'img_Loi n°2006-067 (29.12.2006) Code général des impôts 2016 (DGI)', 'law type': 'droit fiscal'}, hash='71eda55fba8015edb5992dbb648c16671046fa8cd84aaf97679aad17dc851431'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='cbb08eff-

In [45]:
definitions2 = df_fisc_exacts[df_fisc_exacts['TexteFiscStandardCode']==str(response)].set_index(['TexteFiscExactCode'])['TexteFiscExactComplet'].to_dict()
# definitions2.pop("_Gen_LF_LOLF")
# x = definitions2.get("_Gen_LF_LFR")
# definitions2["_Gen_LF_LFR"] = definitions2.get("_Gen_LF_LOLF")
# definitions2["_Gen_LF_LOLF"] = x

definitions2

{'_Gen_LF_LOLF': 'Loi organique relative aux lois de finances',
 '_Gen_LF_LOLFmodif': 'Modification de la Loi organique relative aux lois de finances',
 '_Gen_LF_LF': 'Loi de finances',
 '_Gen_LF_LFmodif': 'Modification de la Loi de finances',
 '_Gen_LF_LFratif': 'Ratification de la Loi de finances',
 '_Gen_LF_LFR': 'Loi de finances rectificative',
 '_Gen_LF_LFRmodif': 'Modification de la Loi de finances rectificative',
 '_Gen_LF_LFRratif': 'Ratification de la Loi de finances rectificative',
 '_Gen_LF_LFappli': 'Application de la Loi de finances',
 '_Gen_LF_LFappliModif': "Modification de l'Application de la Loi de finances"}

In [46]:
response2 = vector_query_engine.query(
    f"""
    The following represents a list of definitions, where we define categories, followed by '=' and then the definitions: {", ".join([f"{k}={v}" for k,v in definitions2.items()])}
    Consider the contents of the document with this name '{doc_name}'.
    In which of these categories : {', '.join(list(definitions2.keys()))}, is it part of ? Answer with a valid category, keep the category only.
    """
    # f"""
    # Ce qui suit représente une liste de définitions, où nous définissons les catégories, suivies de '=' puis des définitions : {", ".join([f"{k}={v}" for k,v in definitions2.items()])}
    # Considérez le contenu du document portant ce nom '{doc_name}'.
    # Dans laquelle de ces catégories : {', '.join(list(definitions2.keys()))}, cela en fait-il partie ? Répondez avec une catégorie valide, gardez uniquement la catégorie.
    # """
)

print(response2)

#### Trial 2

In [26]:
from llama_index.core import PromptTemplate

# custome prompt template
template = (
    "Imagine you are an advanced AI expert in African countries legislature, with access to all current and relevant legal documents, "
    "case studies, and expert analyses. Your goal is to provide insightful, accurate, and concise answers to questions in this domain.\n\n"
    "Here is some context related to the query:\n"
    "-----------------------------------------\n"
    "{context_str}\n"
    "-----------------------------------------\n"
    "Considering the above information, please respond to the following inquiry with detailed references to applicable laws, "
    "precedents, or principles where appropriate:\n\n"
    "Question: {query_str}\n\n"
    "Answer succinctly, starting with the phrase 'According to cyber security law,' and ensure your response is understandable to someone without a legal background."
)
qa_template = PromptTemplate(template)

# build query engine with custom template
# text_qa_template specifies custom template
# similarity_top_k configure the retriever to return the top 3 most similar documents,
# the default value of similarity_top_k is 2
query_engine = index.as_query_engine(text_qa_template=qa_template, similarity_top_k=3)


In [33]:
response = query_engine.query(f"""
    Consider the contents of the document with this title '{doc_name}'. Then, these definitions: {str(definitions)} 
    In which of these categories : {', '.join(list(definitions.keys()))}, is it part of ? Answer with a valid category, keep the category only.
    """)

In [34]:
print(response)

## _Invest_CI

The document appears to be an ordinance related to the creation of a council for agriculture and fishing, which involves investments and regulations. The mention of "Code des investissement" (Investment Code) suggests that it is part of this category.


#### Trial 3 - using service context

In [37]:
from llama_index.node_parser import SentenceWindowNodeParser,SimpleNodeParser
sentence_node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text")
#base_node_parser = SentenceSplitter(llm=llm)
base_node_parser = SimpleNodeParser()
#
nodes = sentence_node_parser.get_nodes_from_documents(documents)
base_nodes = base_node_parser.get_nodes_from_documents(documents)
#
print(f"SENTENCE NODES :\n {nodes[10]}")
print(f"BASE NODES :\n {base_nodes[10]}")

ModuleNotFoundError: No module named 'llama_index.node_parser'

In [34]:
doc_content = df[df['title']==doc_name]['content'].iloc[0]
print(doc_content)

LOI DE FINANCES 2021




MINISTERE DE L'ECONOMIE ET DES FINANCES Janvier 2021
SOMMAIRE
I. NOTE DE PRESENTATION DU BUDGET D’ETAT 2021 ................................................ 1-37
II. ORDONNANCE PORTANT LOI DE FINANCES 2021 ................................................. 38-122
III. BUDGET SPECIAL D’INVESTISSEMENT 2021 ....................................................... 123-285
IV. DOCUMENT DE PROGRAMMATION BUDGETAIRE ET ECONOMIQUE
PLURIANNUELLE 2021-2023 .................................................................................... 286-355

V. AUTRES ANNEXES BUDGETAIRES ..................................................................... 356-1100
- Etat A : Stratégie d’Endettement Public du Mali à Moyen Terme 2021-2023 ......................... 357-379
- Etat B : Plafond des Effectifs par Programme ....................................................................... 380-387
- Etat C : Répartition des Crédits du Budget Général par Dotation et Programme ..........

In [35]:
definitions

{'_Gen_CGI': 'Code général des impôts',
 '_Gen_IT': 'Impôt sur le revenu',
 '_Gen_VAT': 'Taxe sur la valeur ajoutée',
 '_Gen_NHI': 'Assurance maladie nationale',
 '_Gen_LF': 'Loi de finances',
 '_Invest_CI': 'Code des investissement',
 '_Min_CM': 'Code minier',
 '_Pétrol_CP': 'Code pétrolier',
 '_Gaz_CG': 'Code gazier',
 '_Forest_CF': 'Code forestier'}

In [73]:
p = QueryPipeline() #verbose=True)
p.add_modules(
    {
        "input": InputComponent(),
        "retriever": vector_retriever,
        "summarizer": summarizer,
    }
)

p.add_link("input", "retriever")
p.add_link("input", "summarizer", dest_key="query_str")
p.add_link("retriever", "summarizer", dest_key="nodes")

In [74]:
def get_text_fisc(doc_name, definitions=definitions, p=p):
    if "JO" in doc_name:
        return None
    else:
        return p.run(input=f"""
                    Considérez le contenu du document qui porte ce titre '{doc_name}'. Ensuite, ces définitions: {str(definitions)} 
                    Dans laquelle de ces catégories : {', '.join(list(definitions.keys()))}, est-ce que ça fait partie ? Conservez uniquement la catégorie.
                    """)

In [81]:
# doc_name = "Ordonnance n°2020-013 (21.12.2020) Loi de finances 2021 (Ministère des Finances)"

output = get_text_fisc(doc_name)
print(str(output))

_Gen_CGI_CGImodif


In [None]:
df_fisc_exacts = df.title.apply(lambda x: get_text_fisc(x))

In [None]:
df['TexteFiscExactCode'] = df_fisc_exacts

In [None]:
df[['title', 'country', 'TexteFiscExactCode']].to_excel("output/TexteFiscExactCode4.xlsx")

In [38]:
definitions

{'_Gen_CGI_CGI': 'Code général des impôts',
 '_Gen_CGI_CGImodif': 'Modification du Code général des impôts',
 '_Gen_CGI_CGImodifModif': 'Modification de la Modification du Code général des impôts',
 '_Gen_CGI_CGIamendé': 'Code général des impôts amendé',
 '_Gen_CGI_CGIappli': 'Application du Code général des impôts',
 '_Gen_CGI_CGIappliModif': "Modification de l'Application du Code général des impôts",
 '_Gen_CGI_CGIsuppl': 'Supplément au Code général des impôts',
 '_Gen_CGI_CGIsupplAppli': 'Application du Supplément au Code général des impôts',
 '_Gen_CGI_CGIone': 'Code des impôts directs et indirects',
 '_Gen_CGI_CGIoneModif': 'Modification du Code des impôts directs et indirects',
 '_Gen_CGI_CGIoneAmendé': 'Code des impôts directs et indirects amendé',
 '_Gen_CGI_CGIbis': "Code de l'enregistrement, du timbre et de l'impôt sur le revenu des capitaux mobiliers",
 '_Gen_CGI_CGIbisModif': "Modification du Code de l'enregistrement, du timbre et de l'impôt sur le revenu des capitaux mobil

#### Custom retriever trial

#### Other Query pipeline trials

In [None]:
# try chaining basic prompts
# prompt_str = "What's the date of the document that has this title '{doc_title}' ? please respond in english."
prompt_str = "Based on the title and the content of this text '{doc_title}', in which of these categories: legislation, reglementation or convention, is it part of? please reply in French."
prompt_tmpl = PromptTemplate(prompt_str)

p = QueryPipeline(chain=[prompt_tmpl, llm], verbose=True)

In [None]:
# generate question regarding topic
prompt_str1 = "Consider the title and the content of this text '{doc_title}', which is a legal document from Mali."
prompt_tmpl1 = PromptTemplate(prompt_str1)
# use HyDE to hallucinate answer.
prompt_str2 = (
    # "Please consider these definitions:\n"
    # "If it's a decree, then it's legislation."
    # "If it's an ordinance then it's a regulation."
    # "If it's a law then it's a legislation."
    # "If it's an international law it usually is a convention"
    "\n"
    "{response1}\n"
    "in which of these categories: legislation, regulation or convention, is it part of? keep the category only."
)
prompt_tmpl2 = PromptTemplate(prompt_str2)

# llm = OpenAI(model="gpt-3.5-turbo")
retriever = index.as_retriever(retriever_mode=llm, similarity_top_k=5)
p = QueryPipeline(
    chain=[prompt_tmpl1, llm, prompt_tmpl2, llm, retriever], verbose=True
)


In [144]:
output = p.run(input=f"""
               Consider the content of the document that has this title '{doc_name}', and these definitions: {french_definitions} 
               In which of these categories: {', '.join(textes_legaux_exacts)}, is it part of?
               """)

print(str(output))

[1;3;38;2;155;135;227m> Running module 65c2142d-3655-4e00-90d5-438551962fbc with input: 
doc_title: 
               Consider the content of the document that has this title 'Tarif douanier CEDEAO 2017 amendé 2020 (DGD)', and these definitions: 
Loi: A law passed by the legislature (parliament) in a ...

[0m[1;3;38;2;155;135;227m> Running module dff56ffc-71f2-4f80-ad2b-6c941cfaa8d7 with input: 
messages: Consider the title and the content of this text '
               Consider the content of the document that has this title 'Tarif douanier CEDEAO 2017 amendé 2020 (DGD)', and these definitions: 
Loi: A...

[0m[1;3;38;2;155;135;227m> Running module 206d3771-9dae-457a-a72e-1052551c99b9 with input: 
response1: assistant: Based on the content and definitions provided, I would categorize "Tarif douanier CEDEAO 2017 amendé 2020 (DGD)" as follows:

The title suggests that this document is related to tariffs or ...

[0m[1;3;38;2;155;135;227m> Running module 3722a567-c7d1-4b5e-be7b-8677ee0

In [None]:
df_texteLegaleExactCode.to_excel("texteLegaleExactCode.xlsx")