# 1. Import dependencies

In [7]:

from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from sentence_transformers import SentenceTransformer
from langchain.chains.question_answering import load_qa_chain
import pinecone
import os

# 2. Load your data

In [8]:
#loader = PyPDFLoader("Deep_Learning_Ian_Goodfellow.pdf")
loader = PyPDFLoader("perlembagaan_eng.pdf")
data = loader.load()



In [9]:
#Taking a peek at our data
print(len(data))
data[0]

146


Document(page_content='The Constitution of Malaysia \nPictorial Narrative \nThe composition is dominated by the Jalur Gemilang -the national flag \nof Malaysia. The valley of the blue canton signifies the unity of the \nMalaysian people and rising above it, the Crescent and the 14-point Federal \nStar, its golden rays illuminating other objects in the painting. The Crescent \nsymbolises Islam, the country\'s official religion. The royal yellow is also \nthe colour of the Malay rulers. Radiating from the Federal Star, the Stripes \nof Glory: the 14 alternate red and white stripes represent the equal status \nwithin the federation of the 13 member states and the federal government. \nCentral are the Petronas Towers denoting Malaysia\'s economic prog\xad\nress and modernity. Over the Skybridge, the National Monument- Tugu \nNegara -remembers those who lost their lives in Malaysia\'s struggle for \nfreedom, principally duri~g the Japanese occupation in World War II and \nthe Malayan emerge

# 3. Chunking the data for embedding

In [10]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)

docs=text_splitter.split_documents(data)

len(docs)

1566

In [11]:
docs[123]

Document(page_content="have a particular Malaysian resonance -the political economy of law; the \nrule of law; constitutional government; constitutional monarchy; parlia\xad\nmentary democracy; federalism and states' rights; fundamental rights; the \njudiciary; even the separation of powers. This Malaysian resonance -\nconstitutional ideas in Malaysian garb -will be in evidence in all the \nchapters of this book. In the spirit of this series the task of this book is", metadata={'source': 'perlembagaan_eng.pdf', 'page': 13})

# 4. Initialize embeddings

In [12]:
huggingface_key = '<INSERT YOUR KEY HERE>'
pinecone_key = '<INSERT YOUR KEY HERE>'

os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_key
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', pinecone_key)
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'gcp-starter')
      

In [13]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')



# 5. Initialize Vector Database

In [14]:
#initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "langchainpinecone" # put in the name of your pinecone index here. must go to app.pinecone.io to check 

### Here is where our data/docs is embedded and uploaded to the pinecone vector database 


In [15]:
docsearch=Pinecone.from_texts([t.page_content for t in docs], embeddings, index_name=index_name)

# 6. Similarity Search

In [16]:
query="How many states does Malaysia have"
docs=docsearch.similarity_search(query,k=1)
docs

[Document(page_content="ter five). Of course this arrangement affected only four of the 13 States \nnow forming Malaysia, but all the States have been profoundly affected \nby this 1909 Federal Constitution. The Rulers' influence was increas\xad\ningly limited to customary and religious matters, and by convention they \ndid not participate in Federal Council debates. Within the States, powers \nwere vested more and more in the person of the Ruler, but exercised in")]

# Llama 2 Model

In [17]:
#!git clone --recursive -j8 https://github.com/abetlen/llama-cpp-python.git



In [18]:

from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from huggingface_hub import hf_hub_download
from langchain.chains.question_answering import load_qa_chain

In [29]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager

In [37]:

#model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
#model_name_or_path = "TheBloke/Llama-2-7b-Chat-GGUF"
#model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # the model is in bin format


model_name_or_path = "TheBloke/CodeLlama-13B-Python-GGUF"
model_basename = "codellama-13b-python.Q5_K_M.gguf"


#model_name_or_path = "TheBloke/Llama-2-7b-Chat-GGUF"
#model_basename = "llama-2-7b-chat.Q4_K_M.gguf" # the model is in bin format


#model_name_or_path = "TheBloke/Mistral-7B-v0.1-GGUF"
#model_basename = "mistral-7b-v0.1.Q4_K_M.gguf" # the model is in bin format


In [38]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)


Downloading (…)b-python.Q5_K_M.gguf: 100%|██████████| 9.23G/9.23G [18:05<00:00, 8.50MB/s]


In [39]:
n_gpu_layers = 3  # Change this value based on your model and your GPU VRAM pool.
n_batch = 256  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Loading model,
llm = LlamaCpp(
    model_path=model_path,
    max_tokens=256,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    n_ctx=1024,
    verbose=True,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [44]:
chain=load_qa_chain(llm, chain_type="stuff")
query="how many states are there in Malaysia"

docs=docsearch.similarity_search(query,k=2)
docs

[Document(page_content="ter five). Of course this arrangement affected only four of the 13 States \nnow forming Malaysia, but all the States have been profoundly affected \nby this 1909 Federal Constitution. The Rulers' influence was increas\xad\ningly limited to customary and religious matters, and by convention they \ndid not participate in Federal Council debates. Within the States, powers \nwere vested more and more in the person of the Ruler, but exercised in"),
 Document(page_content="ter five). Of course this arrangement affected only four of the 13 States \nnow forming Malaysia, but all the States have been profoundly affected \nby this 1909 Federal Constitution. The Rulers' influence was increas\xad\ningly limited to customary and religious matters, and by convention they \ndid not participate in Federal Council debates. Within the States, powers \nwere vested more and more in the person of the Ruler, but exercised in")]

In [45]:
chain.run(input_documents=docs, question=query)

 all 13 of them the the the the the the the the the the the the the the the the the the the the States States States were are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are in Malaysia



Answer: according to Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Articl

' all 13 of them the the the the the the the the the the the the the the the the the the the the States States States were are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are are in Malaysia\n\n\n\nAnswer: according to Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article Article A

In [50]:
#query="YOLOv7 trained on which dataset"
#query="how many states are there in Malaysia"
#query="How do you change the constitution"
query="What are the difference between federal and states"


docs=docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Federal powers are largely limited to Islamic law and custom, land, agri\xad culture,'

In [47]:
from langchain.llms import HuggingFaceHub

In [48]:
llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})



In [71]:
chain=load_qa_chain(llm, chain_type="stuff")
query="how many states are there in Malaysia"
query="What are the roles of the Rulers"
query="Who is the head of the country"
query="How do you change the constitution"


docs=docsearch.similarity_search(query)

chain.run(input_documents=docs, question=query)

'a Bill to amend the Constitution must be supported at its second and third readings by two-'