In [33]:
    from IPython.display import HTML, display

    def set_css():
        display(HTML(''' <style> pre { white-space: pre-wrap; } </style> '''))

    get_ipython().events.register('pre_run_cell', set_css)

# SETUP

In [1]:
!pip install langchain deeplake openai tiktoken dotenv langchain-google-community

Collecting langchain==0.0.208
  Downloading langchain-0.0.208-py3-none-any.whl.metadata (13 kB)
Collecting deeplake==3.9.27
  Downloading deeplake-3.9.27.tar.gz (618 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m618.7/618.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting openai==0.27.8
  Downloading openai-0.27.8-py3-none-any.whl.metadata (13 kB)
Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain==0.0.208)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl.metadata (22 kB)
Collecting langchainplus-sdk>=0.0.13 (from langchain==0.0.208)
  Downloading langchainplus_sdk-0.0.20-py3-none-any.whl.metadata (8.7 kB)
Collecting numpy<2,>=1 (from langchain==0.0.208)
  Downloading numpy-1.26.4-cp31

In [1]:
# connect google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from dotenv import load_dotenv

load_dotenv('/content/drive/MyDrive/rag/langchain-vector/langchain-vec.env')

True

In [3]:
from langchain.llms import OpenAI



## API TEST DRIVE

In [None]:
# Before executing the following code, make sure to have
# your OpenAI key saved in the “OPENAI_API_KEY” environment variable.
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.9)

In [None]:
text = "Suggest a personalized workout routine for someone looking to improve cardiovascular endurance and prefers outdoor activities."
print(llm(text))



Warm-up:
- Jog for 5 minutes around the neighborhood or park

Circuit 1:
- Sprint for 30 seconds, followed by a 1-minute jog
- Repeat for 10 rounds

Circuit 2:
- Jump rope for 1 minute
- Mountain climbers for 30 seconds
- High knees for 30 seconds
- Rest for 1 minute
- Repeat for 3 rounds

Circuit 3:
- Bike for 10 minutes at a moderate pace
- Stop and do 10 push-ups
- Bike for another 10 minutes, increasing intensity every 2 minutes
- Stop and do 10 burpees
- Bike for another 10 minutes, increasing intensity every 2 minutes

Finisher:
- Run up and down a nearby hill or a flight of stairs for 5 minutes
- Cool down with a 5 minute walk or jog

Outdoor Activities:
- Go for a hike or trail run for 30-45 minutes
- Play a game of tennis or basketball for 30 minutes
- Swim laps in a pool for 20-30 minutes
- Go for a bike ride for 45 minutes to an hour
- Try a fun outdoor activity like paddleboarding, kayaking, or rock


### LLMChain

In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.9)
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)
prompt

PromptTemplate(input_variables=['product'], output_parser=None, partial_variables={}, template='What is a good name for a company that makes {product}?', template_format='f-string', validate_template=True)

In [None]:
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("eco-friendly water bottles"))



1. "EcoHydrate"
2. "GreenWave Bottles"
3. "EnviroBottle Co."
4. "SustainaBottle"
5. "PureEarth Bottles"
6. "EcoWater Co."
7. "Revolutionary Bottle Co."
8. "EcoFlow Bottles"
9. "Nature's Resource Bottles"
10. "EcoSip Co."


In [None]:

chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("Holographic Handhelp Gaming console"))



"HyperHolo Gaming Co."


### LLM Memory

In [None]:
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
import time

llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    memory=ConversationBufferMemory()
)

# Start the conversation
conversation.predict(input="Tell me about yourself.")

# Continue the conversation
conversation.predict(input="What can you do?")
conversation.predict(input="How can you help me with managing my data science projects?")

time.sleep(2)

# Display the conversation
print(conversation)

### Deep Lake VectorStore

In [19]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

# Before executing the following code, make sure to have your
# Activeloop key saved in the “ACTIVELOOP_TOKEN” environment variable.

# instantiate the LLM and embeddings models
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# create our documents
texts = [
    "Napoleon Bonaparte was born in 15 August 1769",
    "Louis XIV was born in 5 September 1638"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

In [20]:
docs

[Document(page_content='Napoleon Bonaparte was born in 15 August 1769', metadata={}),
 Document(page_content='Louis XIV was born in 5 September 1638', metadata={})]

In [21]:
# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = "vedantti"
my_activeloop_dataset_name = "langchain_course_from_zero_to_hero"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Your Deep Lake dataset has been successfully created!


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:17<00:00, 17.96s/it]

Dataset(path='hub://vedantti/langchain_course_from_zero_to_hero', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
   text       text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   





['3867b110-6d9e-11f0-bcad-0242ac1c000c',
 '3867b2e6-6d9e-11f0-bcad-0242ac1c000c']

In [24]:
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=db.as_retriever()
)

In [25]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions."
    ),
]

agent = initialize_agent(
	tools,
	llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True
)

In [28]:
response = agent.run("When was Napoleone born?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to answer this question
Action: Retrieval QA System
Action Input: When was Napoleone born?[0m
Observation: [36;1m[1;3m
Napoleon Bonaparte was born in 15 August 1769.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Napoleon Bonaparte was born in 15 August 1769.[0m

[1m> Finished chain.[0m
Napoleon Bonaparte was born in 15 August 1769.


In [29]:
response = agent.run("If Napolean died in 1799, what was his age when he died?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to find the answer.
Action: Retrieval QA System
Action Input: "What was Napolean's age when he died?"[0m
Observation: [36;1m[1;3m
Napoleon was 51 years old when he died.[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: 51 years old.[0m

[1m> Finished chain.[0m
51 years old.


In [30]:
response = agent.run("If Napolean died in 1799 what was his age when he died?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to find the answer.
Action: Retrieval QA System
Action Input: "What was Napolean's age when he died in 1799?"[0m
Observation: [36;1m[1;3m
Napoleon was 30 years old when he died in 1799.[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: Napoleon was 30 years old when he died in 1799.[0m

[1m> Finished chain.[0m
Napoleon was 30 years old when he died in 1799.


#### Adding more data to Vector Store

In [31]:
# load the existing Deep Lake dataset and specify the embedding function
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# create new documents
texts = [
    "Lady Gaga was born in 28 March 1986",
    "Michael Jeffrey Jordan was born in 17 February 1963"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Deep Lake Dataset in hub://vedantti/langchain_course_from_zero_to_hero already exists, loading from the storage


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:17<00:00, 17.53s/it]

Dataset(path='hub://vedantti/langchain_course_from_zero_to_hero', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (4, 1536)  float32   None   
    id        text      (4, 1)      str     None   
 metadata     json      (4, 1)      str     None   
   text       text      (4, 1)      str     None   





['57f755ac-6d9f-11f0-bcad-0242ac1c000c',
 '57f7570a-6d9f-11f0-bcad-0242ac1c000c']

In [32]:
response = agent.run("When was Michael Jordan born?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to find the answer.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m I don't know.[0m
Thought:[32;1m[1;3m I should try rephrasing the question.
Action: Retrieval QA System
Action Input: "What is Michael Jordan's birthdate?"[0m
Observation: [36;1m[1;3m I don't know.[0m
Thought:[32;1m[1;3m I should try using a different source.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m I don't know.[0m
Thought:[32;1m[1;3m I should try using a different source.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m I don't know.[0m
Thought:[32;1m[1;3m I should try using a different source.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m I don't know.[0m
Thought:[32;1m[1;3m I should

KeyboardInterrupt: 

In [34]:
# instantiate the wrapper class for GPT3
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0) # changing temperature to 0 for less hallucinations

# create a retriever from the db
retrieval_qa = RetrievalQA.from_chain_type(
	  llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

# instantiate a tool that uses the retriever
tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions." # Necessary for Zero_Shot
    ),
]

# create an agent that uses the tool
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

In [35]:
response = agent.run("When was Michael Jordan born?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to find the answer.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m Michael Jordan was born on 17 February 1963.[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: Michael Jordan was born on 17 February 1963.[0m

[1m> Finished chain.[0m
Michael Jordan was born on 17 February 1963.
