In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
import os 
from dotenv import load_dotenv
from time import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
#loader = DirectoryLoader('PDF_Testing', glob="./*.pdf", loader_cls=PyPDFLoader)
loader = PyPDFLoader('How effective are climate protests at swaying policy.pdf')
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [4]:
len(texts)

216

In [5]:
texts[5]

Document(page_content='are trying to work out how best to get their \nvoices heard.\nSeptember, for example, saw protests', metadata={'source': 'How effective are climate protests at swaying policy.pdf', 'page': 0})

In [6]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

In [7]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

load INSTRUCTOR_Transformer
max_seq_length  512


In [8]:
%%time
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

CPU times: total: 1.44 s
Wall time: 3.75 s


In [9]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("What is paranoia?")

In [10]:
len(docs)

2

In [11]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [12]:
def process_llm_response(qa_chain, query):
    print(f"Query: {query}\n")
    time_1 = time()
    llm_response = qa_chain(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print(f"\nResult:", llm_response['result'])
    print(f"\nmetadata:", llm_response['source_documents'][0].metadata)

In [13]:
query = "How many people took part in actions linked with climate protest?"
process_llm_response(qa_chain, query)

Query: How many people took part in actions linked with climate protest?

Inference time: 7.249 sec.

Result:  According to the given context, it is not explicitly mentioned how many people took part in actions linked with climate protest. The context only mentions Taylor's article "Did Last Summer’s Black Lives Matter Protests Spark a Climate Movement?" which explores whether the Black Lives Matter protests of summer 2020 led to a climate movement. The article does not provide a specific number of people who participated in climate protests during that time.

metadata: {'page': 3, 'source': 'How effective are climate protests at swaying policy.pdf'}


In [14]:
# break it down
query = "What drives people to become climate protesters?"
process_llm_response(qa_chain, query)

Query: What drives people to become climate protesters?

Inference time: 6.53 sec.

Result:  Many people are driven to become climate protesters due to their strong beliefs in the urgency and importance of addressing climate change, as well as their frustration with the lack of action from policymakers and corporations. They may also be motivated by a sense of moral obligation, a desire to protect future generations, and a commitment to social and environmental justice. Additionally, some may be inspired by the success of past climate protests and the potential for collective action to create change. However, the factors that drive individuals to become climate protesters can vary widely, and may also include personal experiences with climate impacts, scientific knowledge, and cultural and social influences.

metadata: {'page': 1, 'source': 'How effective are climate protests at swaying policy.pdf'}


In [None]:
query = "what factors determine who takes to the streets over climate change?"
process_llm_response(qa_chain, query)

Query: what factors determine who takes to the streets over climate change?



In [None]:
query = "Please give me an example of climate protest"
process_llm_response(qa_chain, query)

In [None]:
query = "What tactics or action can activists choose to protest?"
process_llm_response(qa_chain, query)

In [None]:
query = "Any successful example of tactics that activists choose for climate protest?"
process_llm_response(qa_chain, query)

In [None]:
query = "What motivates repression of protests?"
process_llm_response(qa_chain, query)

In [None]:
query = "How effective are climate protests at swaying policy?"
process_llm_response(qa_chain, query)

In [None]:
query = "how does a movement best effect change and convert citizens’ concerns into policies that address the problem?"
process_llm_response(qa_chain, query)

In [None]:
query = "What are the pros and cons of the tactics that protesters use??"
process_llm_response(qa_chain, query)

In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)