In [2]:
# Download the Llama model 7B from Hugging Face repository and store the .bin file in a directory (around 3GB size requirement)
# https://huggingface.co/TheBloke/LLaMa-7B-GGML/tree/main

In [1]:
# If there is an error for installing llama-cpp-python below:
# pip install llama-cpp-python
# Error: could not build wheels for llama-cpp-python, which is required to install pyproject.toml-based projects
# Download Visual Studio 2022 Build Tools
# Check : https://github.com/imartinez/privateGPT/issues/445

In [3]:
#import the dependencies
from llama_cpp import Llama

In [22]:
# llama 7B model path
llama_7B_model = "C:/Users/Dipjyoti/Downloads/llama-7b.ggmlv3.q2_K.bin"

In [4]:
# load your llm from the saved directory
llm = Llama(model_path= llama_7B_model)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [5]:
# pass a prompt to your llm
response =llm("Who directed The Dark Knight?")

In [6]:
# Check the response
print(response['choices'][0]['text'])


Christopher Nolan is the director of both Batman Begins, starring Christian Bale, and its sequel, The Dark Knight. Other actors in the films include Maggie Gyllenhaal as Rachel Dawes, Gary Oldman as Lieutenant Gordon, and Michael Caine as Alfred Pennyworth, Bruce Wayne’s (Bale) butler/chauffeur.
Who is in the movie “Pirates of the Caribbean: 3”?
Who is in the movie “Transformers: Dark of the Moon”?


In [None]:
# successfully executed our first LLM on the CPU, completely offline and in a fully randomized fashion

#### Getting Started with LLM — LangChain Integration

In [None]:
# LangChain framework to develop applications using LLMs
# Prompt Engineering : prompt template. It is a reproducible way to generate a prompt. It contains a text string the template, 
# that can take in a set of parameters from the end user and generates a prompt. 

In [7]:
# Import PromptTemplate
from langchain import PromptTemplate

In [8]:
# An example prompt with no input variables
template="Tell me a joke"

In [9]:
#Create prompt from template
prompt = PromptTemplate.from_template(template)

In [10]:
# Check prompt variable
prompt

PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='Tell me a joke', template_format='f-string', validate_template=True)

In [11]:
# Check input variables
prompt.input_variables

[]

In [12]:
# Check prompt template
prompt.template

'Tell me a joke'

##### Example prompt with one input variable

In [13]:
# Define a template
template="Tell me a {adjective} joke"
#Create prompt from template
prompt = PromptTemplate.from_template(template)
#Check prompt variable
prompt

PromptTemplate(input_variables=['adjective'], output_parser=None, partial_variables={}, template='Tell me a {adjective} joke', template_format='f-string', validate_template=True)

In [14]:
# Check input variables
prompt.input_variables

['adjective']

In [15]:
# Check prompt template
prompt.template

'Tell me a {adjective} joke'

In [16]:
# Format the prompt
formatted_prompt = prompt.format(adjective = "funny")
formatted_prompt

'Tell me a funny joke'

##### Example prompt with multiple input variables

In [17]:
# Define a template
template="Tell me a {adjective} joke about {content}"
#Create prompt from template
prompt = PromptTemplate.from_template(template)
#Check prompt variable
prompt

PromptTemplate(input_variables=['adjective', 'content'], output_parser=None, partial_variables={}, template='Tell me a {adjective} joke about {content}', template_format='f-string', validate_template=True)

In [18]:
# Check input variables
prompt.input_variables

['adjective', 'content']

In [19]:
# Check prompt template
prompt.template

'Tell me a {adjective} joke about {content}'

In [20]:
# Format the prompt
formatted_prompt = prompt.format(adjective = "funny", content = "chickens")
formatted_prompt

'Tell me a funny joke about chickens'

#### Prompt the LLM

In [21]:
# import the dependencies
from langchain.llms import LlamaCpp
from langchain import PromptTemplate

In [23]:
# Import LLM
llm = LlamaCpp(model_path= llama_7B_model)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [24]:
# Define a template
template = """Q: Who directed {movie_name}
Answer:"""

In [25]:
#Create prompt from template
prompt = PromptTemplate.from_template(template)

In [26]:
#Check prompt variable
prompt

PromptTemplate(input_variables=['movie_name'], output_parser=None, partial_variables={}, template='Q: Who directed {movie_name}\nAnswer:', template_format='f-string', validate_template=True)

In [27]:
# Check input variables
prompt.input_variables

['movie_name']

In [28]:
# Check prompt template
prompt.template

'Q: Who directed {movie_name}\nAnswer:'

In [29]:
# Format the prompt
formatted_prompt = prompt.format(movie_name = "The Dark Knight")
formatted_prompt

'Q: Who directed The Dark Knight\nAnswer:'

In [30]:
# Prompt the LLM
llm(prompt = formatted_prompt, llm=llm, stop = ["Q:", "\n"])

' Christopher Nolan'

So far we have used individual components. We took the prompt template formatted it, then took the llm, and then passed those params inside llm to generate the answer. Using an LLM in isolation is fine for simple applications, but more complex applications require chaining LLMs — either with each other or with other components.

LangChain provides the Chain interface for such chained 🔗applications. We define a Chain very generically as a sequence of calls to components, which can include other chains. Chains allow us to combine multiple components together to create a single, coherent application.

For example, we can create a chain that takes user input, formats it with a Prompt Template, and then passes the formatted response to an LLM. We can build more complex chains by combining multiple chains together, or by combining chains with other components.

#### LangChain

In [None]:
# Create a very simple chain 🔗 that will take user input, format the prompt with it, and then send it to the LLM using the above 
# individual components that we’ve already created.

In [31]:
prompt

PromptTemplate(input_variables=['movie_name'], output_parser=None, partial_variables={}, template='Q: Who directed {movie_name}\nAnswer:', template_format='f-string', validate_template=True)

In [32]:
# Import the dependencies
from langchain.chains import LLMChain
# Define chain
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [33]:
# Run the chain only specifying the input variables
llm_chain.run("The Dark Knight")

Llama.generate: prefix-match hit


' Christopher Nolan.'

In [None]:
# When dealing with multiple variables, we have the option to input them collectively by utilizing a dictionary.

#### Generating Embeddings and Vectorstore for Question Answering

##### Loading & Transforming Documents

In [34]:
# Import the dependencies
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

In [35]:
# Load Doc - a text file with wikipedia paragraphs of DC Superheroes
loader = TextLoader("raw.txt")
docs = loader.load()

In [36]:
docs

[Document(page_content='Batman does not possess any superpowers, instead relying on his intellect, fighting skills, and wealth.\n\nAs a baby, his parents sent him to Earth in a small spaceship shortly before Krypton was destroyed in a natural cataclysm. His ship landed in the American countryside near the fictional town of Smallville. He was found and adopted by farmers Jonathan and Martha Kent, who named him Clark Kent. \n\nIn her homeland, the island nation of Themyscira, her official title is Princess Diana of Themyscira. The character was being written as a very "confident", "impulsive" and "good-hearted" character in her. He referred to her trait of feeling compassion as both her strength and weakness.\n\nThis new Flash was Barry Allen, a police scientist who gained super-speed when bathed by chemicals after a shelf of them was struck by lightning. He adopted the name The Scarlet Speedster after reading a comic book featuring the Golden Age Flash.', metadata={'source': 'raw.txt'})

In [37]:
# Transform into chunks
text_splitter = CharacterTextSplitter(chunk_size=10, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

Created a chunk of size 103, which is longer than the specified 10
Created a chunk of size 294, which is longer than the specified 10
Created a chunk of size 287, which is longer than the specified 10


In [38]:
# compare the size of doxs and texts
len(docs)

1

In [39]:
len(texts)

4

In [40]:
# visualize chunks and doc
print(docs)

[Document(page_content='Batman does not possess any superpowers, instead relying on his intellect, fighting skills, and wealth.\n\nAs a baby, his parents sent him to Earth in a small spaceship shortly before Krypton was destroyed in a natural cataclysm. His ship landed in the American countryside near the fictional town of Smallville. He was found and adopted by farmers Jonathan and Martha Kent, who named him Clark Kent. \n\nIn her homeland, the island nation of Themyscira, her official title is Princess Diana of Themyscira. The character was being written as a very "confident", "impulsive" and "good-hearted" character in her. He referred to her trait of feeling compassion as both her strength and weakness.\n\nThis new Flash was Barry Allen, a police scientist who gained super-speed when bathed by chemicals after a shelf of them was struck by lightning. He adopted the name The Scarlet Speedster after reading a comic book featuring the Golden Age Flash.', metadata={'source': 'raw.txt'})

In [41]:
print(texts[0])

page_content='Batman does not possess any superpowers, instead relying on his intellect, fighting skills, and wealth.' metadata={'source': 'raw.txt'}


### Embeddiings

Word embedding is simply a vector representation of a word, with the vector containing real numbers. Since languages typically contain at least tens of thousands of words, simple binary word vectors can become impractical due to a high number of dimensions. Word embeddings solve this problem by providing dense representations of words in a low-dimensional vector space.

When we talk about retrieval, we refer to retrieving a set of vectors that are most similar to a query in a form of a vector that is embedded in the same Latent space.

The base Embeddings class in LangChain exposes two methods: one for embedding documents and one for embedding a query. The former takes as input multiple texts, while the latter takes a single text.

In [42]:
# Import Depednencies
from langchain.embeddings import LlamaCppEmbeddings
embeddings = LlamaCppEmbeddings(model_path=llama_7B_model)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [43]:
# Convert lancgchain doc to str
_texts = []
for i in range(len(texts)):
    _texts.append(texts[i].page_content)

In [44]:
# Visualize
texts[0]

Document(page_content='Batman does not possess any superpowers, instead relying on his intellect, fighting skills, and wealth.', metadata={'source': 'raw.txt'})

In [45]:
_texts[0]

'Batman does not possess any superpowers, instead relying on his intellect, fighting skills, and wealth.'

In [46]:
# Embed list of texts
embedded_texts = embeddings.embed_documents(_texts)

In [47]:
len(embedded_texts), len(embedded_texts[0])

(4, 4096)

In [48]:
# visualize embedding
embedded_texts[0][:4]

[0.5122052431106567,
 0.07216522097587585,
 -1.0619093179702759,
 -2.156808614730835]

In [49]:
# Embed query
query = "What skills did Batman has?"
embedded_query = embeddings.embed_query(query)
len(embedded_query)

4096

In [50]:
embedded_query[:4]

[1.7351198196411133,
 -0.13924887776374817,
 -0.7899298071861267,
 -2.5887067317962646]

#### Creating Vector Store & Retrieving Docs

A vector store efficiently manages the storage of embedded data and facilitates vector search operations on your behalf. Embedding and storing the resulting embedding vectors is a prevalent method for storing and searching unstructured data. During query time, the unstructured query is also embedded, and the embedding vectors that exhibit the highest similarity to the embedded query are retrieved. This approach enables effective retrieval of relevant information from the vector store.

##### Use Chroma, an embedding database and vector store specifically crafted to simplify the development of AI applications incorporating embeddings.

In [51]:
# Import Dependencies
from langchain.vectorstores import Chroma
# Create a Chroma vectorstore from a list of documents
db = Chroma.from_documents(texts, embeddings)

In [52]:
# Perform similarity search with the query over db
query = "Who is an orphan here"
docs = db.similarity_search(query, k=1)
docs

[Document(page_content='This new Flash was Barry Allen, a police scientist who gained super-speed when bathed by chemicals after a shelf of them was struck by lightning. He adopted the name The Scarlet Speedster after reading a comic book featuring the Golden Age Flash.', metadata={'source': 'raw.txt'})]

In [53]:
# Search for documents using query vector
query = "Who is an orphan here"
query_vector = embeddings.embed_query(query)
docs = db.similarity_search_by_vector(query_vector, k=1)
docs

[Document(page_content='This new Flash was Barry Allen, a police scientist who gained super-speed when bathed by chemicals after a shelf of them was struck by lightning. He adopted the name The Scarlet Speedster after reading a comic book featuring the Golden Age Flash.', metadata={'source': 'raw.txt'})]

### Question Answering bot with LLM

In [54]:
# Import Dependencies
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [55]:
# Craft a prompt template that works best for your LLM

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer:"""

In [56]:
# context will be the similar doc and question will be query
prompt = PromptTemplate.from_template(template)
prompt.input_variables

['context', 'question']

In [57]:
# prepare context from query
query = "Who is an orphan here?"

In [58]:
similar_doc = db.similarity_search(query, k=1)
context = similar_doc[0].page_content
print(context)

In her homeland, the island nation of Themyscira, her official title is Princess Diana of Themyscira. The character was being written as a very "confident", "impulsive" and "good-hearted" character in her. He referred to her trait of feeling compassion as both her strength and weakness.


In [59]:
# Use LLM to Generate Answer from the context
query_llm = LLMChain(llm=llm, prompt=prompt)
response = query_llm.run({"context": context, "question": query})
print(response)

Llama.generate: prefix-match hit


 Wonder Woman (Diana Prince)
Post by: 20ofcourse on December 31, 2018, 05:40:16 PM
I believe that one was a question in a previous thread...
In his homeland, the island nation of Atlantis, he is known as Arthur Curry, a part-time police officer with the ability to "speak" (and think) through telepathy. The character was created by writer Robert Bernstein and artist Ramona Fradguson.
Post by: 20ofcourse on January 18, 2019, 07:56:34 PM
While the first Aquaman is in the golden age, the second one is part of the silver age. The first being in the 1940s and the second in the 1950s
Answer: Arthur Curry (Aquaman)
Post by: 20ofcourse on January 19, 2019, 06:35:28 AM
While the first Superman is in the golden age, the second one is part of the silver age.


#### End