<a href="https://colab.research.google.com/github/YasmineSAYAD/chatbot/blob/main/yasmine_llm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -U langchain-community

In [None]:
%pip install faiss-cpu

In [None]:
from huggingface_hub import snapshot_download
from pathlib import Path
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from transformers import pipeline
import pandas as pd
from datasets import Dataset
from langchain.docstore.document import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from peft import get_peft_model, LoraConfig, TaskType
from google.colab import drive

In [None]:
#Logging to hagging face
login()

In [None]:
#drive.mount("/content/drive")

In [None]:
'''
#download mistral
snapshot_download(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
    local_dir="mistral_models/7B-Instruct-v0.3"
)
'''

In [None]:
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
#load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
#load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    dtype=torch.float16
)

In [None]:
# lora config for optimization
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],# q_proj: Projects queries (what the model seeks to understand)/ v_proj: projects the values (what the model uses to respond)
    lora_dropout=0.01,
    bias="none",
    task_type=TaskType.CAUSAL_LM # CAUSAL_LM: autoregressive text generation
)

model = get_peft_model(model, lora_config)

In [None]:
'''
# test prompt
chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)
prompt = "Quelle est la capitale de la France"
response = chatbot(prompt, max_new_tokens=30, do_sample=False)
print(response[0]["generated_text"])
'''

In [None]:
#Read csv file
file_id = "1vZP_cQWo_sUlcdXpTPu6IixWtd9qJsp1"
url = f"https://drive.google.com/uc?id={file_id}"
df_train_base = pd.read_csv(url)

In [None]:
print(df_train_base.columns.tolist())

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=2095,
    chunk_overlap=200
)

In [None]:
#create documents from df CSV file,
documents = []

for _, row in df_train_base.iterrows():
    # Cut the response into chunks compatible with the model
    chunks = splitter.split_text(row["answer"])      # The text to be indexed (response)

    for chunk in chunks:
        documents.append(
            Document(
                page_content=chunk,
                metadata={
                    "index": row["index"],
                    "question": row["question"],
                    "authors": row["authors"],
                    "title": row["title"],
                    "doi": row["doi"],
                    "journal": row["journal"],
                    "volume": row["volume"],
                    "pages": row["pages"],
                    "license": row["license"]
                }
            )
        )

In [None]:
# generate embedding to transform output to a vector
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents, embedding_model)

In [None]:
# Creating a text generation pipeline with Hugging Face
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=100,
    do_sample=False
)

In [None]:
# Encapsulating the pipeline in a compatible LangChain object
llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Creation of a QA string that only responds if the similarity exceeds 0.7
retriever = db.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "score_threshold": 0.3
    }
)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever= retriever,
    chain_type="map_reduce"
)

In [None]:
# response generation function
def generate_response(query):
    docs = retriever.get_relevant_documents(query)
    if not docs:
        return "Je ne suis pas certain de pouvoir répondre à cette question pour le moment."
    else:
        return qa_chain.invoke(query)

In [None]:
from IPython.display import display
import ipywidgets as widgets

# input
prompt_input = widgets.Text(
    value='',
    placeholder='Écris ton prompt ici...',
    layout=widgets.Layout(width='80%')
)

# button
send_button = widgets.Button(
    description='Envoyer',
    button_style='success',
    layout=widgets.Layout(margin='5px 0 0 0')
)

# output area
output_area = widgets.Output()

# generate response function
def on_send_clicked(b):
    output_area.clear_output()
    with output_area:
        response = generate_response(prompt_input.value)
        print(response)

# bouton action
send_button.on_click(on_send_clicked)

# display widgets
display(prompt_input, send_button, output_area)