In [1]:
from langchain.document_loaders import TextLoader

# text to write to a local file
# taken from https://www.theverge.com/2023/3/14/23639313/google-ai_language-model-palm-api-challenge-openai
text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3 Google is offering developers access to one of its most advanced AI language models: PaLM. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.”

PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs, PaLM is a flexible system that can potentially carry out all sorts of text generation and editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for example, or you could use it for tasks like summarizing text or even writing code. (It’s similar to features Google also announced today for its Workspace apps like Google Docs and Gmail.)"""

# write text to local file
with open("example_file.text", "w") as file:
    file.write(text)

In [2]:
# use TextLoader to load text from local file
loader = TextLoader('./example_file.text')
docs_from_file = loader.load()

print(docs_from_file)

[Document(metadata={'source': './example_file.text'}, page_content='Google opens up its AI language model PaLM to challenge OpenAI and GPT-3 Google is offering developers access to one of its most advanced AI language models: PaLM. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.”\n\nPaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs, PaLM is a flexible system that can potentially carry out all sorts of text generation and editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for example, or you could use it for tasks like summarizing text or even writing code. (It’s similar to features Google also announced today for its Workspace apps like Google Docs and Gmail.)')]


In [3]:
from langchain.text_splitter import CharacterTextSplitter

# create a text splitter
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)

# split documents into chunks
docs = text_splitter.split_documents(docs_from_file)

print(len(docs))

Created a chunk of size 373, which is longer than the specified 200


2


In [8]:
import keyring
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DeepLake

OPENAI_API_KEY = keyring.get_password('openai', 'key_for_windows')
ACTIVELOOP_API_KEY = keyring.get_password('activeloop', 'key_for_windows')
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002', openai_api_key=OPENAI_API_KEY)

my_activeloop_org_id = 'ahn283'
my_activeloop_dataset_name = 'langchain_cource_indexers_retrievers'
data_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=data_path, embedding_function=embeddings, token=ACTIVELOOP_API_KEY)

# add documents to our DeepLake dataset
db.add_documents(docs)

Using embedding function is deprecated and will be removed in the future. Please use embedding instead.


Your Deep Lake dataset has been successfully created!


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:23<00:00, 23.93s/it]

Dataset(path='hub://ahn283/langchain_cource_indexers_retrievers', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
   text       text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   





['f2aacc7f-892f-11ef-b5f0-e0c26439031d',
 'f2aacc80-892f-11ef-bbce-e0c26439031d']

In [9]:
# chroma db
import keyring
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.vectorstores import Chroma

OPENAI_API_KEY = keyring.get_password('openai', 'key_for_windows')

embeddings = embeddings = OpenAIEmbeddings(model='text-embedding-ada-002', openai_api_key=OPENAI_API_KEY)
vectorstore_path = './vectorstore'
chroma_db = Chroma.from_documents(docs, embeddings, persist_directory=vectorstore_path)
chroma_db.persist()

  chroma_db.persist()


In [10]:
# create retriever from db
retriever = db.as_retriever()  

In [14]:
# create a retrieval chain
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI

qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY),
    chain_type="stuff",
    retriever=retriever
)

  llm=ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY),


In [15]:
query = "How Google plans to challenge OpenAI?"
response = qa_chain.run(query)
print(response)

Google plans to challenge OpenAI by offering developers access to its advanced AI language model, PaLM. PaLM is a large language model similar to OpenAI's GPT series and Meta's LLaMA family of models. By providing an API for PaLM and other AI enterprise tools, Google aims to help businesses generate text, images, code, videos, audio, and more from simple natural language prompts. This move allows Google to compete with OpenAI and other companies in the field of AI language models.


In [16]:
# Chroma db retriever
retriever_chroma = chroma_db.as_retriever(
    search_kwargs={'k': 3}
)
docs = retriever_chroma.invoke(query)
docs

Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2


[Document(metadata={'source': './example_file.text'}, page_content='Google opens up its AI language model PaLM to challenge OpenAI and GPT-3 Google is offering developers access to one of its most advanced AI language models: PaLM. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.”'),
 Document(metadata={'source': './example_file.text'}, page_content='PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs, PaLM is a flexible system that can potentially carry out all sorts of text generation and editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for example, or you could use it for tasks like summarizing text or even writing code. (It’s similar to features Google also announced tod

In [18]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# create GPT3 wrapper
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.0, openai_api_key=OPENAI_API_KEY)

# create compressor for the retriever
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

# retrieving compressed documents
retrieved_docs = compression_retriever.get_relevant_documents(
    "How goolge plant to challenge OpenAI?"
)

print(retrieved_docs[0].page_content)

Google opens up its AI language model PaLM to challenge OpenAI
