In [None]:
!pip -q install langchain
!pip -q install bitsandbytes accelerate xformers einops
!pip -q install datasets loralib sentencepiece
!pip -q install pypdf
!pip -q install sentence_transformers

In [None]:
!pip install chromadb


In [None]:
!pip install openai
!pip install tiktoken

In [4]:
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import TextLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import notebook_login
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from langchain import HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
import os
import sys

In [None]:
!mkdir docs

In [None]:
from google.colab import drive
drive.mount('/content/MyDrive')

In [7]:
#upload docs into docs folder
document=[]
for file in os.listdir("docs"):
  if file.endswith(".pdf"):
    pdf_path="./docs/"+file
    loader=PyPDFLoader(pdf_path)
    document.extend(loader.load())
  elif file.endswith('.docx') or file.endswith('.doc'):
    doc_path="./docs/"+file
    loader=Docx2txtLoader(doc_path)
    document.extend(loader.load())
  elif file.endswith('.txt'):
    text_path="./docs/"+file
    loader=TextLoader(text_path)
    document.extend(loader.load())

In [None]:
document

In [None]:
len(document)

In [37]:
document_splitter=CharacterTextSplitter(separator='\n', chunk_size=1000, chunk_overlap=100)

In [None]:
document_chunks=document_splitter.split_documents(document)

In [39]:
len(document_chunks)

323

In [40]:
document_chunks[0]

Document(page_content='The North Atlantic Treaty Organization (NATO, /ˈneɪtoʊ/; French: Organisation du traité de l\'Atlantique nord, OTAN), also called the North Atlantic Alliance, is an intergovernmental military alliance between 31 member states – 29 European and two North American. Established in the aftermath of World War II, the organization implemented the North Atlantic Treaty, signed in Washington, D.C., on 4 April 1949.[3][4] NATO is a collective security system: its independent member states agree to defend each other against attacks by third parties. During the Cold War, NATO operated as a check on the threat posed by the Soviet Union. The alliance remained in place after the dissolution of the Soviet Union and the Warsaw Pact, and has been involved in military operations in the Balkans, the Middle East, South Asia, and Africa. The organization\'s motto is animus in consulendo liber[5] (Latin for "a mind unfettered in deliberation").', metadata={'source': './docs/nato.txt'}

In [41]:
document_chunks[1]

Document(page_content="NATO's main headquarters are located in Brussels, Belgium, while NATO's military headquarters are near Mons, Belgium. The alliance has increased its NATO Response Force deployments in Eastern Europe[6] and the combined militaries of all NATO members include around 3.5 million soldiers and personnel.[7] Their combined military spending as of 2022 constituted around 55 percent of the global nominal total.[8] Moreover, members have agreed to reach or maintain the target defence spending of at least two percent of their GDP by 2024.[9][10]", metadata={'source': './docs/nato.txt'})

In [42]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [None]:
embeddings

In [44]:
vectordb=Chroma.from_documents(document_chunks,embedding=embeddings, persist_directory='./data')


In [45]:
vectordb.persist()

In [None]:
#hugging face token
notebook_login()

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True,)


model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                              #load_in_8bit=True,
                                              load_in_4bit=True
                                             )

In [48]:
pipe=pipeline("text-generation",
              model=model,
              tokenizer=tokenizer,
              torch_dtype=torch.bfloat16,
              device_map='auto',
              max_new_tokens=512,
              min_new_tokens=-1,
              top_k=30

              )


In [49]:
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})

In [50]:
llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7dff241770a0>, model_kwargs={'temperature': 0})

In [51]:
memory=ConversationBufferMemory(memory_key='chat_history', return_messages=True)

In [52]:
#Create Q/A Chain
pdf_qa=ConversationalRetrievalChain.from_llm(llm=llm,
                                             retriever=vectordb.as_retriever(search_kwargs={'k':6}),
                                             verbose=False, memory=memory)

In [53]:
result=pdf_qa({"question":"what is nato?"})

In [54]:
result['answer']

' NATO is an intergovernmental military alliance between 31 member states – 29 European and two North American. Established in the aftermath of World War II, the organization implemented the North Atlantic Treaty, signed in Washington, D.C., on 4 April 1949. NATO is a collective security system: its independent member states agree to defend each other against attacks by third parties. During the Cold War, NATO operated as a check on the threat posed by the Soviet Union. The alliance remained in place after the dissolution of the Soviet Union and the Warsaw Pact, and has been involved in military operations in the Balkans, the Middle East, South Asia, and Africa. The organization\'s motto is animus in consulendo liber (Latin for "a mind unfettered in deliberation").'