In [1]:
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline,BitsAndBytesConfig
import transformers
import torch
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter
from langchain.document_loaders import PDFPlumberLoader
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
import os
import streamlit as st
from llama_index import Document, LangchainEmbedding, SimpleDirectoryReader, ListIndex,VectorStoreIndex, StorageContext, GPTVectorStoreIndex, LLMPredictor, ServiceContext, load_index_from_storage
#from llama_index.vector_stores import DeepLakeVectorStore
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import base64
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
import torch.cuda as cuda

from langchain.agents.agent_toolkits import (VectorStoreInfo, create_vectorstore_agent, VectorStoreToolkit)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = 'tiiuae/falcon-40b-instruct'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
# set quantization configuration to load large Falcon model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto'
)
# Set model to inference mode
model.eval()
print(f"Model loaded on {device}")


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/LLM-as-a-Service/falcon_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ubuntu/LLM-as-a-Service/falcon_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Loading checkpoint shards: 100%|██████████| 9/9 [00:36<00:00,  4.03s/it]

Model loaded on cuda:0





In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

In [4]:
from transformers import StoppingCriteria, StoppingCriteriaList
import torch


tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
# we create a list of stopping criteria
stop_token_ids = [
    tokenizer.convert_tokens_to_ids(x) for x in [
        ['Human', ':'], ['AI', ':']
    ]
]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

In [5]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [6]:
F40_pipeline = pipeline(
                task="text-generation",
                model = model,
                tokenizer = tokenizer,
                return_full_text = True,
                stopping_criteria=stopping_criteria,
                max_length= 512, 
                temperature= 0.0,
                repetition_penalty=1.1
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForC

In [7]:
#Convert to HF pipeline.
hf_pipeline = HuggingFacePipeline(pipeline=F40_pipeline)

In [8]:
#OpenAI key config
from dotenv import dotenv_values
env_variables = dotenv_values('env.env')
os.environ['OPENAI_API_KEY'] =

In [9]:
#creating embeddings
#OpenAI ADA-002
openai_embed = OpenAIEmbeddings()

#Langchain embedding SBERT
sbert_embed = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

In [11]:
test = "test the query"
embed = openai_embed.embed_query(test)
print(embed, len(embed))

[-0.011750645004212856, 0.008277921937406063, -0.00771179236471653, -0.01337999477982521, -0.01135711558163166, 0.039270076900720596, -0.019096525385975838, -0.014581293798983097, -0.030488161370158195, -0.030377697199583054, 0.00818817038089037, 0.011515908874571323, -0.0005139181157574058, -0.017149589955806732, 0.0038110201712697744, -0.00012330152094364166, 0.01824042573571205, -0.011805877089500427, 0.01920698955655098, -0.03300122544169426, 0.0073113590478897095, -0.012019902467727661, 0.0070455544628202915, -0.0015188847901299596, -0.005374780856072903, -0.013801139779388905, 0.012289158999919891, -0.023970764130353928, -0.01648680493235588, 0.009782998822629452, 0.0071663749404251575, -0.00733207119628787, -0.03719887137413025, -0.016017330810427666, -0.010721946135163307, 0.0032448903657495975, -0.004884595517069101, -0.008692163974046707, 0.027823207899928093, 0.013745907694101334, 0.020518753677606583, -0.006420740392059088, -0.003219000296667218, -0.014498446136713028, 0.00

## getting the data

In [10]:
loader = PyPDFLoader('./GRI.pdf')

In [14]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [11]:
pages = loader.load_and_split()#[1:5]
pages

[Document(page_content='A Short Introduction to  \nthe GRI Standards\nwww.globalreporting.org www.globalreporting.org', metadata={'source': './GRI.pdf', 'page': 0}),
 Document(page_content='2\nA Short Introduction to the GRI Standards\nIntroduction\nThe GRI Standards are a modular system \nof interconnected standards. They allow \norganizations to publicly report the \nimpacts of their activities in a structured \nway that is transparent to stakeholders \nand other interested parties.\nThis Short Introduction will:\n•  give new users of the GRI Standards an overview \nof how the Standards are set up, and equip them to \nstart working with the various elements involved in the \nreporting process;\n•  be of assistance to experienced users in gaining an \nunderstanding of changes in the system and the role of \nthe GRI Sector Standards; and\n•  aid stakeholders and other information users (such \nas analysts and policymakers) to understand how the \nreporting process works and what to loo

In [16]:
sections = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=100, length_function=len).split_documents(pages)

In [17]:
sections

[Document(page_content='A Short Introduction to  \nthe GRI Standards\nwww.globalreporting.org www.globalreporting.org', metadata={'source': './GRI.pdf', 'page': 0}),
 Document(page_content='2\nA Short Introduction to the GRI Standards\nIntroduction\nThe GRI Standards are a modular system \nof interconnected standards. They allow \norganizations to publicly report the \nimpacts of their activities in a structured \nway that is transparent to stakeholders \nand other interested parties.\nThis Short Introduction will:\n•  give new users of the GRI Standards an overview \nof how the Standards are set up, and equip them to \nstart working with the various elements involved in the \nreporting process;\n•  be of assistance to experienced users in gaining an \nunderstanding of changes in the system and the role of \nthe GRI Sector Standards; and\n•  aid stakeholders and other information users (such \nas analysts and policymakers) to understand how the \nreporting process works and what to loo

## Setup DB with ChromaDB

In [23]:
db5 = Chroma.from_documents(sections, openai_embed, persist_directory="./chroma_db")
db5.persist()

In [24]:
query = "what are the GRI standards?"
response = db5.similarity_search(query)

In [27]:
response

[Document(page_content='A Short Introduction to  \nthe GRI Standards\nwww.globalreporting.org www.globalreporting.org', metadata={'source': './GRI.pdf', 'page': 0}),
 Document(page_content='A Short Introduction to  \nthe GRI Standards\nwww.globalreporting.org www.globalreporting.org', metadata={'source': './GRI.pdf', 'page': 0}),
 Document(page_content='A Short Introduction to  \nthe GRI Standards\nwww.globalreporting.org www.globalreporting.org', metadata={'source': './GRI.pdf', 'page': 0}),
 Document(page_content='A Short Introduction to  \nthe GRI Standards\nwww.globalreporting.org www.globalreporting.org', metadata={'source': './GRI.pdf', 'page': 0})]

In [28]:
query = "what are the GRI standards?"
db3 = Chroma(persist_directory="./chroma_db", embedding_function=openai_embed)
docs = db3.similarity_search(query)
print(docs[0].page_content)

A Short Introduction to  
the GRI Standards
www.globalreporting.org www.globalreporting.org


## Retrival QA

In [29]:
gri_std = RetrievalQA.from_chain_type(llm=hf_pipeline, chain_type="stuff",retriever=db3.as_retriever())

## Math Module

In [30]:
from langchain import LLMMathChain
math_llm = LLMMathChain.from_llm(llm=hf_pipeline, verbose=True)

## Search

In [None]:
from langchain import SerpAPIWrapper
search = SerpAPIWrapper()

## Youtube search

In [31]:
from langchain.tools import YouTubeSearchTool
youtubeSearch = YouTubeSearchTool()

In [32]:
yt = youtubeSearch.run("GRI Standards")

"['/watch?v=vNl20Yg5yE4&pp=ygUNR1JJIFN0YW5kYXJkcw%3D%3D', '/watch?v=jJ6RST7DO28&pp=ygUNR1JJIFN0YW5kYXJkcw%3D%3D']"

## Wikipedia Serach

In [107]:
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
wikipedia = WikipediaAPIWrapper(api_wrapper=WikipediaAPIWrapper)

## Agent initialization


In [33]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import asyncio
import time

### Tools

In [52]:
tools = [
Tool(
    name='pdf query',
    func=gri_std.run,
    description='useful when you need to answer questions about a pdf. Input should be a search query. And the output should be a simple answer based on the pdf and search query, then use the tool youtube search to look for videos to support the answer.'
),
Tool(name="Calculator",
     func=math_llm.run,
     description="useful when you need to answer a question about maths"
),
Tool(name="youtube search",
     func=youtubeSearch.run,
     description="use this tool to search for a youtube video based on the search query. The input should be any search query. The output should be a link to the youtube video.")]

In [134]:
pdf_template = """You are an AI that focuses on answering questions related to a pdf. \
Here is the question: {input}"""

youtube_template = """You are an AI that looks for additional video resources related to the question. You just provide links to one or two relevant youtube video, \
Here is the question:
{input}"""

search_template = """You are an AI that is designed to find additional resources on the internet to support the user. You just provide one or two links relevant to the question. \
Here is the question:
{input}"""

In [125]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import asyncio
import time

### Agent 1

In [47]:
agent = initialize_agent(
    tools, hf_pipeline, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

In [48]:
agent.agent.llm_chain.prompt.template

'Answer the following questions as best you can. You have access to the following tools:\n\npdf query: useful when you need to answer questions about a pdf. Input should be a search query. And the output should be a simple answer based on the pdf and search query.\nCalculator: useful when you need to answer a question about maths\nyoutube search: use this tool to search for a youtube video based on the search query. The input should be any search query. The output should be a link to the youtube video.\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [pdf query, Calculator, youtube search]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {inpu

In [50]:
query1 = "What is the purpose of the GRI standards?"

In [42]:
questions = ""
num_question = query1.count("?")
if num_question > 1:    
    input_questions = query1.split("? ")
    questions = [question.strip() + '?' for question in input_questions if question]
    questions
else: 
    quesitons = query1
    quesitons
questions

''

In [51]:
agent.run(query1)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.




[1m> Entering new  chain...[0m
[32;1m[1;3m The Global Reporting Initiative (GRI) Standards are a set of guidelines that organizations can follow to report their sustainability performance. They aim to promote transparency and accountability in reporting practices.
Action: pdf query
Action Input: GRI Standards[0m

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



Observation: [36;1m[1;3m The Global Reporting Initiative (GRI) is a non-profit organization that has developed a comprehensive sustainability reporting framework that is widely used around the world. The GRI Standards are designed to help organizations understand and communicate their impacts on critical sustainability issues such as climate change, human rights, governance, and social equity. By using the GRI Standards, companies can report on their performance in a consistent and transparent manner, allowing stakeholders to compare their performance across different industries and regions.[0m
Thought:[32;1m[1;3m Now I know the purpose of the GRI standards
Final Answer: The purpose of the GRI standards is to provide a comprehensive framework for organizations to report their sustainability performance in a consistent and transparent manner.[0m

[1m> Finished chain.[0m


'The purpose of the GRI standards is to provide a comprehensive framework for organizations to report their sustainability performance in a consistent and transparent manner.'

In [53]:
youtubeSearch.run(query1)

"['/watch?v=vNl20Yg5yE4&pp=ygUpV2hhdCBpcyB0aGUgcHVycG9zZSBvZiB0aGUgR1JJIHN0YW5kYXJkcz8%3D', '/watch?v=jJ6RST7DO28&pp=ygUpV2hhdCBpcyB0aGUgcHVycG9zZSBvZiB0aGUgR1JJIHN0YW5kYXJkcz8%3D']"

: 

### Agent 2 Plan and execute

In [88]:
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner


In [89]:
planner = load_chat_planner(hf_pipeline)

In [90]:
executor = load_agent_executor(hf_pipeline, tools, verbose=True)

In [91]:
agent2 = PlanAndExecute(planner=planner,executor=executor,verbose=True)

In [96]:
agent2.run("What are the GRI standards?")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.




[1m> Entering new  chain...[0m
steps=[]

In [45]:
from langchain.agents.agent_toolkits import (VectorStoreInfo, create_vectorstore_agent, VectorStoreToolkit)

vectorstore_info = VectorStoreInfo(
    name="paper",
    description="a scientific paper as a pdf",
    vectorstore=db3
)

In [46]:
toolkit = VectorStoreToolkit(vectorstore_info=vectorstore_info,llm = hf_pipeline)

In [47]:
agent_executor = create_vectorstore_agent(
    llm=hf_pipeline,
    toolkit=toolkit,
    verbose=True
)

In [48]:
agent_executor.run('What has been the impact of the GRI standards?')

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.




[1m> Entering new  chain...[0m
[32;1m[1;3m I am not sure what GRI stands for, but I can try to find out
Action: paper
Action Input: "What has been the impact of the GRI standards?"[0m

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Input length of input_ids is 576, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



Observation: [36;1m[1;3m The[0m
Thought:[32;1m[1;3m I need more context to answer that question
Action: paper_with_sources
Action Input: "What has been the impact of the GRI standards?"[0m

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Input length of input_ids is 2031, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



Observation: [33;1m[1;3m{"answer": " The", "sources": ""}[0m
Thought:[32;1m[1;3m I still don't know the answer
Final Answer: I don't know[0m

[1m> Finished chain.[0m


"I don't know"

In [12]:
from transformers import StoppingCriteria, StoppingCriteriaList
import torch


tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
# we create a list of stopping criteria
stop_token_ids = [
    tokenizer.convert_tokens_to_ids(x) for x in [
        ['Human', ':'], ['AI', ':']
    ]
]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

In [13]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [14]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForC

In [15]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generate_text)

In [16]:
Interprete_template = """
Use exactly one word and Interprete the text below and evaluate the text and provide me with the sentiment and subject. 
sentiment: is the text in a positive, neutral or negative sentiment? 
subject: What subject is the text about? Use exactly one word.

text: {input}
'AI':
"""

In [17]:
from langchain import PromptTemplate, LLMChain
prompt_template = PromptTemplate.from_template(template=Interprete_template)
Interprete_chain = LLMChain(llm=hf_pipeline, prompt=prompt_template,output_key="Interprete")
Interprete_chain.predict(input="I ordered Pizza Salami and it was awesome!")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


'The text has a positive sentiment as it expresses enjoyment of the pizza. The subject of the text is food, specifically pizza with salami toppings.'

In [None]:
torch.cuda.device_count()

In [None]:
EMB_SBERT_MPNET_BASE = "sentence-transformers/all-mpnet-base-v2"

EMB_INSTRUCTOR_XL = "hkunlp/instructor-xl"

In [None]:
llm = HuggingFacePipeline(pipeline = hf_pipeline)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name=EMB_SBERT_MPNET_BASE)

In [None]:
path_to_file = '../oecd_env.pdf'
loader = PyPDFLoader(path_to_file)
documents = loader.load_and_split()
#documents = loader.load()
#text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
#texts = text_splitter.split_documents(documents)

In [None]:
text_splitter.split_text(documents)

In [None]:
documents

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(documents)

#text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=10)  # This the encoding for text-embedding-ada-002
#texts = text_splitter.split_documents(texts)

In [None]:
texts[3].page_content

In [None]:
persist_directory = '.'
vectordb = Chroma.from_documents(documents=documents, embedding=embedding_model, persist_directory=persist_directory)

In [None]:
vectordb.persist()

In [None]:
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 3, "search_type" : "similarity"})

qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                       chain_type="stuff", 
                                       retriever=retriever, 
                                       return_source_documents=True,
                                       verbose=True)

In [None]:
def process_llm_response(llm_response):
    print('result:',llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])
    print('llm resp', llm_response)

In [None]:
query = "what is the theory of Planned Behaviour?"

llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "what is the impact of the GRI guidelines?" #"how is the sustainability performance reported?"#"what is the impact of the GRI guidelines?"#"who wrote this article?"#"who uses the GRI standards?"#"what is the impact of the GRI guidelines?"#"who is impacted by the GRI guidelines?"
qa_chain.run(query)

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

In [None]:
from chromaviz import visualize_collection
visualize_collection(vectordb._collection)

In [None]:
question_template = """
    context: {context}
    question: {question}
    answer: 
    """
QUESTION_PROMPT = PromptTemplate(
        template=question_template, input_variables=["context", "question"]
    )
qa_chain.combine_documents_chain.llm_chain.prompt = QUESTION_PROMPT

In [None]:
chain1 = LLMC

In [None]:
test_template = PromptTemplate

In [None]:
query = "what is the impact of the GRI guidelines?"#"who wrote this article?"#"who uses the GRI standards?"#"what is the impact of the GRI guidelines?"#"who is impacted by the GRI guidelines?"
qa_chain.combine_documents_chain.verbose = True
qa_chain.return_source_documents = True
qa_chain({"query":query})

* Test a different embedding
* check how the results are generated
* test a different way to split the document

In [None]:
import bitsandbytes