<a href="https://colab.research.google.com/github/UdayG01/Book-Pal-Llama2/blob/main/bookpalllama2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
! pip install llama-cpp-python
! pip install langchain
! pip install pypdf
! pip install unstructured
! pip install sentence_transformers
! pip install pinecone-client
! pip install huggingface_hub



In [3]:
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from sentence_transformers import SentenceTransformer
from langchain.chains.question_answering import load_qa_chain
import pinecone

In [None]:
# Loading the pdf
loader = PyPDFLoader("/content/The_Stranger_Albert_Camus.pdf")
data = loader.load()

In [5]:
# Splitting the text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)

docs=text_splitter.split_documents(data)

print(len(docs))
print(docs[0])

469
page_content='THE STRANGER' metadata={'source': '/content/The_Stranger_Albert_Camus.pdf', 'page': 2}


In [6]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '58da24ae-2fc8-4c12-97a1-9a4d2ff088df')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'asia-southeast1-gcp-free')

In [7]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [8]:
# setting the vector store
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "langchain"

In [9]:
# Embedding all the text pieces from the docs and creating new index entries
#docsearch = Pinecone.from_texts([text.page_content for text in docs], embeddings, index_name=index_name)

# if we already have an existing index
docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [10]:
# making a query and performing similarity search
query = 'who is the protagonist of the book?'

docs = docsearch.similarity_search(query, k=3)
# k values signifies that the model return top k answers

print(docs)

[Document(page_content='THE \nSTRANGER \nALBERT CAMU S \nTranslated from the French \nby Matthew Ward \nVIN TAGE IN TER NATI ON AL \nVIN TAGE BOOKS \nA DIVISION OF RAND OM HOUS E, INC. \nNEW YORK', metadata={}), Document(page_content='THE STRANGER', metadata={}), Document(page_content='Knopf, Judith Jones, for years of patience and faith. \nNancy Festinger and Melissa Weissberg also deserve my \ngratitude. \nvii', metadata={})]


In [11]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose

Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.1.77.tar.gz (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Running command pip subprocess to install build dependencies
  Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
  Collecting setuptools>=42
    Using cached setuptools-68.0.0-py3-none-any.whl (804 kB)
  Collecting scikit-build>=0.13
    Using cached scikit_build-0.17.6-py3-none-any.whl (84 kB)
  Collecting cmake>=3.18
    Using cached cmake-3.27.1-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (26.0 MB)
  Collecting ninja
    Using cached ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (145 kB)
  Collecting distro (from scikit-build>=0.13)
    Using cached distro-1.8.0-py3-none-any.whl (20 kB)
  Collecting packaging (from scikit-build

In [12]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from huggingface_hub import hf_hub_download
from langchain.chains.question_answering import load_qa_chain

verbose?..

In [13]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [14]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin"

# model_name_or_path = "TheBloke/Llama-2-7B-Chat-GGML"
# model_basename = "llama-2-7b-chat.ggmlv3.q2_K.bin"

In [15]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

In [26]:
n_gpu_layers = 40
n_batch = 256

# Loading model,
llm = LlamaCpp(
    model_path=model_path,
    max_tokens=256,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    n_ctx=1024,
    verbose=False,
)

In [27]:
chain=load_qa_chain(llm, chain_type="stuff")

In [28]:
query="who is the protagnist of the book"
docs=docsearch.similarity_search(query, k=4)

print(docs)

[Document(page_content="The Myth of Sisyphus (Le Mythe de Sisyphe) \nand Other Essays I955 \nThe Rebel (L'Homme Revolte) I954 \nThe Plague (La Peste) I948 \nThe Stranger (L'Etranger) I 946", metadata={}), Document(page_content='THE \nSTRANGER \nALBERT CAMU S \nTranslated from the French \nby Matthew Ward \nVIN TAGE IN TER NATI ON AL \nVIN TAGE BOOKS \nA DIVISION OF RAND OM HOUS E, INC. \nNEW YORK', metadata={}), Document(page_content="in Camus's writing that one approaches a degree of its \nstill startling originality. \nIn the second half of the novel Camus gives freer \nrein to a lyricism which is his alone as he takes Meursault, \nnow stripped of his liberty, beyond sensation to enforced \nvi", metadata={}), Document(page_content='The Stranger, The Plagu e, The Fall, and Exile and \nthe Kingdom; his philosophical essays, The Myth of \nSisyphus and The Rebel; and his plays have assured \nhis preeminent position in modern French letters. In \n1957 Camus was awarded the Nobel Prize for

In [29]:
chain.run(input_documents=docs, question=query)

 The protagonist of the novel "The Stranger" is Meursault.

' The protagonist of the novel "The Stranger" is Meursault.'

In [39]:
query = 'who did Meursalt murder in the book "The Stranger", and what psychological consequences it had for the protagnist'
docs=docsearch.similarity_search(query, k=4)

chain.run(input_documents=docs, question=query)


 Meursault murders an Arab man on a beach, and this event leads to profound psychological repercussions as he grapples with his own sense of morality and existential meaninglessness.

' Meursault murders an Arab man on a beach, and this event leads to profound psychological repercussions as he grapples with his own sense of morality and existential meaninglessness.'

In [41]:
query = input("Enter query for the book 'The Stranger by Albert Camus: ")

docs=docsearch.similarity_search(query, k=4)
chain.run(input_documents=docs, question=query)


Enter query for the book 'The Stranger by Albert Camus: What are some more works of Albert Camus besides "The Stranger"
 Besides "The Stranger", Albert Camus has written other works such as "The Plague", "The Fall", and "Exile and the Kingdom". He also wrote philosophical essays like "The Myth of Sisyphus" and "The Rebel", as well as plays. Additionally, he was awarded the Nobel Prize for Literature in 1957.

' Besides "The Stranger", Albert Camus has written other works such as "The Plague", "The Fall", and "Exile and the Kingdom". He also wrote philosophical essays like "The Myth of Sisyphus" and "The Rebel", as well as plays. Additionally, he was awarded the Nobel Prize for Literature in 1957.'