# Steps

- Research for small capable models (to fit in a 4GB GPU, that understands and replies in French)
- Use LangChain for Information retrieval

# Model 2, speaks French

In [2]:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_id = "Qwen/Qwen-1_8B-Chat-Int4"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id,device_map="auto",trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100)
hf = HuggingFacePipeline(pipeline=pipe)

  from .autonotebook import tqdm as notebook_tqdm
CUDA extension not installed.
CUDA extension not installed.
Try importing flash-attention for faster inference...


In [14]:
from langchain.prompts import PromptTemplate

template = """
Vous ếtes un assistant spécialisé en droit français.

<context>
{context}
</context>

Question: {input}

Réponse: """
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "C'est quoi le Droit du Travail in France?"

In [None]:
print(chain.invoke({"input": question}))

In [17]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

document_chain = create_stuff_documents_chain(hf, prompt)

In [18]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": "how can langsmith help with testing?",
    "context": [Document(page_content="langsmith can let you visualize test results")]
})

' *  <a href="https://langsmith.com/en/api/db/products/localties/testcaselanguage/create?access_token=76924831925b43092e51f7d1c99288c010d53b28b09a11c23a60b2945b222a0b5860496c219'

# Retrieval

In [4]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader

loader = TextLoader("data/action_sociale_familles.md")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1045, which is longer than the specified 1000
Created a chunk of size 1162, which is longer than the specified 1000
Created a chunk of size 1095, which is longer than the specified 1000
Created a chunk of size 1346, which is longer than the specified 1000
Created a chunk of size 1117, which is longer than the specified 1000
Created a chunk of size 1295, which is longer than the specified 1000
Created a chunk of size 1104, which is longer than the specified 1000
Created a chunk of size 1022, which is longer than the specified 1000
Created a chunk of size 1711, which is longer than the specified 1000
Created a chunk of size 1456, which is longer than the specified 1000
Created a chunk of size 1778, which is longer than the specified 1000
Created a chunk of size 2051, which is longer than the specified 1000
Created a chunk of size 2203, which is longer than the specified 1000
Created a chunk of size 1021, which is longer than the specified 1000
Created a chunk of s

In [5]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings_model = 'sentence-transformers/all-MiniLM-L12-v2'
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model)
# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [6]:
docs[0]

Document(page_content="---\ntitle: Code de l'action sociale et des familles\ndate: 2024-01-15\n---\n\n## Partie législative\n\n### Livre Ier : Dispositions générales\n\n#### Titre Ier : Principes généraux\n\n##### Chapitre Ier : Droit à l'aide sociale.\n\n**Art. L111-1**\n\nSous réserve des dispositions des articles L. 111-2 et L. 111-3, toute personne résidant en France bénéficie, si elle remplit les conditions légales d'attribution, des formes de l'aide sociale telles qu'elles sont définies par le présent code.\n\n**Art. L111-2**\n\nLes personnes de nationalité étrangère bénéficient dans les conditions propres à chacune de ces prestations\xa0:\n\n1° Des prestations d'aide sociale à l'enfance\xa0;\n\n2° De l'aide sociale en cas d'admission dans un centre d'hébergement et de réinsertion sociale\xa0;\n\n3° De l'aide médicale de l'Etat\xa0;\n\n4° Des allocations aux personnes âgées prévues à l'article L. 231-1 à condition qu'elles justifient d'une résidence ininterrompue en France métrop

In [7]:
from langchain_community.vectorstores import FAISS

vector = FAISS.from_documents(docs, embeddings)

In [19]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [20]:
response = retrieval_chain.invoke({"input": "C'est quoi le droit?"})
print(response["answer"])

 Le droit de collecter de Roman.

Réponse:

