In [None]:
# Notebook from https://medium.com/@thakermadhav/build-your-own-rag-with-mistral-7b-and-langchain-97d0c92fa146
!pip install -q torch datasets
!pip install -q accelerate==0.21.0 \
                peft==0.4.0 \
                bitsandbytes==0.40.2 \
                transformers==4.31.0 \
                trl==0.4.7
!pip install -q scipy langchain transformers playwright html2text sentence_transformers faiss-gpu
!pip install -q --upgrade git+https://github.com/huggingface/transformers

In [None]:
!playwright install > /dev/null
!playwright install-deps > /dev/null

In [None]:
import os
import torch
from transformers import (
  AutoTokenizer, 
  AutoModelForCausalLM, 
  BitsAndBytesConfig,
  pipeline
)

from transformers import BitsAndBytesConfig

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

import nest_asyncio
#################################################################
# Tokenizer
#################################################################

model_name="mistralai/Mistral-7B-Instruct-v0.1"
model_name="mistralai/Mixtral-8x7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    cache_dir="."
)
#load_in_4bits=True)

def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
article_a_indexer="https://www.droits-salaries.com/420531139-gie-auxia-gestion/42053113900079/T07521028203-accord-relatif-au-teletravail-au-sein-du-gie-auxia-gestion-teletravail.shtml"

In [None]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index
articles = [article_a_indexer]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

# Converts HTML to plain text 
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=1000, 
                                      chunk_overlap=100)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

In [None]:
prompt_template = """
### [INST] Instruction: Answer the question written in French based on your french business agreements knowledge. #MANDATORY : DON'T USE WORDS BUT RETURN ONLY THE ASKED DATA AND DON'T COMPUTE EXTRAPOLATION BETWEEN PERIODS !!!! The answer should only give the number or NA with it confidence score as a tuple (x,y) as it will be used in a datascience project, hence the answer should be directly processable and don't use words nor code snippet. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
"""

In [None]:
prompt_template = """

### [ROLE] You are an expert of French business agreements and you are given the task to produce quality data from a business agreement without given explanation since you are an expert. You can't use division or multiplication operation in your logic and data should be in the text or infered. The answer is limited at 10 characters. The answered data should only be a float or NA and nothing more. You are not allowed to use english word, nor code snippet.

### [INST] Instruction: Return the asked data from the question. #MANDATORY : DON'T USE WORDS BUT RETURN ONLY THE ASKED DATA AND DON'T COMPUTE EXTRAPOLATION BETWEEN PERIODS !!!! The answer should only give the number or NA  as it will be used in a datascience project, hence the answer should be directly processable and don't use words nor code snippet.

Here is context to help:

{context}

### QUESTION:
{question} [/INST]
"""

In [None]:
# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [None]:
rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
Q_DUREE="If a duration is mentioned, is the agreement for a fixed term? Yes=1 or no=0 or NA?"
Q_REVERS="If a reversibility clause and article are mentioned, does the agreement have a reversibility clause or article? Yes=1 or no=0 or NA?"
Q_ADAPT="If an adaptation period is mentioned, does the agreement include an adaptation period? Yes=1 or no=0 or NA?"
Q_TTREG="If regular telework (per week) is mentioned, does the agreement detail regular telework? Yes=1 or no=0 or NA?"
Q_TTOCA="If occasional telework (per month or annual) is mentioned, does the agreement mention occasional telework? Yes=1 or no=0 or NA?"
Q_TTEXC="If exceptional telework (exceptional circumstances) is mentioned, does the agreement mention exceptional telework? Yes=1 or no=0 or NA?"
Q_TTSEM="employee=any employee full-time or part-time combined; If the agreement mentions a number of days per week, how many days maximum can an employee telework per week on a regular basis WITHOUT using any exceptional days of telework? Note that a number of on-site days per week may be mentioned, in which case the maximum number of teleworking days allowed must be inferred, given that there are 5 working days per week."
Q_TTMOIS="employee=any employee full-time or part-time combined; If it mentions a quotient per month, how many days maximum can an employee telework per month WITHOUT using any exceptional days of telework?"
Q_TTTRIM="employee=any employee full-time or part-time combined; If it mentions a quotient per quarter, how many days maximum can an employee telework per quarter WITHOUT using any exceptional days of telework?"
Q_TTANNEE="employee=any employee full-time or part-time combined; If a quota per year is mentioned, how many days maximum can an employee telework per year WITHOUT using any exceptional days of telework?"
Q_TTEXCEP="employee=any employee full-time or part-time combined; If an exceptional quota (modulable days for any employee full-time or part-time combined) is mentioned, how many days maximum can an employee telework per year on an exceptional basis?"
Q_TTTOTAL="For this question, you can multiply and infer logic; In total, how many days maximum can an employee telework per year?"
Q_EQUIP="If there is mention of equipment provided, does the agreement mention equipment provided? Yes=1 or no=0 or NA?"
Q_COMPJ="If a daily allowance (french key words = ['indemnité','remboursement de frais','subvention', 'participation aux frais']) is mentioned, how much is the fixed allowance per day of telework?"
Q_COMPM="If a monthly allowance (french key words = ['indemnité','remboursement de frais','subvention', 'participation aux frais']) is mentioned, how much is the monthly allowance per month of telework?"
Q_COMPA="If an annual allowance (french key words = ['indemnité','remboursement de frais','subvention', 'participation aux frais']) is mentioned, how much is the annual allowance per year of teleworking?"
Q_COMPO="If an allowance (french key words = ['indemnité','remboursement de frais','subvention', 'participation aux frais']) other than daily, monthly or annual is mentioned, how much is the telework-related allowance?"
Q_COMPC="If an exceptional allowance (french key words = ['indemnité','remboursement de frais','subvention', 'participation aux frais']) is mentioned, what is the maximum exceptional allowance for any remote work for covid?"
Q_COMPE="If an equipment allowance (french key words = ['indemnité','remboursement de frais','subvention', 'participation aux frais']) is mentioned, what is the maximum equipment allowance for any remote work for covid?"

In [None]:
reponse= rag_chain.invoke(Q_DUREE)
print("Q_DUREE:",reponse["text"])
reponse= rag_chain.invoke(Q_REVERS)
print("Q_REVERS:",reponse["text"])
reponse= rag_chain.invoke(Q_ADAPT)
print("Q_ADAPT:",reponse["text"])
reponse= rag_chain.invoke(Q_TTREG)
print("Q_TTREG:",reponse["text"])
reponse= rag_chain.invoke(Q_TTOCA)
print("Q_TTOCA:",reponse["text"])
reponse= rag_chain.invoke(Q_TTEXC)
print("Q_TTEXC:",reponse["text"])
reponse= rag_chain.invoke(Q_TTSEM)
print("Q_TTSEM:",reponse["text"])
reponse= rag_chain.invoke(Q_TTMOIS)
print("Q_TTMOIS:",reponse["text"])
reponse= rag_chain.invoke(Q_TTTRIM)
print("Q_TTTRIM:",reponse["text"])
reponse= rag_chain.invoke(Q_TTANNEE)
print("Q_TTANNEE:",reponse["text"])
reponse= rag_chain.invoke(Q_TTEXCEP)
print("Q_TTEXCEP:",reponse["text"])
reponse= rag_chain.invoke(Q_TTTOTAL)
print("Q_TTTOTAL:",reponse["text"])
reponse= rag_chain.invoke(Q_EQUIP)
print("Q_EQUIP:",reponse["text"])
reponse= rag_chain.invoke(Q_COMPJ)
print("Q_COMPJ:",reponse["text"])
reponse= rag_chain.invoke(Q_COMPM)
print("Q_COMPM:",reponse["text"])
reponse= rag_chain.invoke(Q_COMPA)
print("Q_COMPA:",reponse["text"])
reponse= rag_chain.invoke(Q_COMPO)
print("Q_COMPO:",reponse["text"])
reponse= rag_chain.invoke(Q_COMPC)
print("Q_COMPC:",reponse["text"])
reponse= rag_chain.invoke(Q_COMPE)
print("Q_COMPE:",reponse["text"])