In [1]:
from langchain.document_loaders import TextLoader

# text to write to a local file
# taken from https://www.theverge.com/2023/3/14/23639313/google-ai-language-model-palm-api-challenge-openai
text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3
Google is offering developers access to one of its most advanced AI language models: PaLM.
The search giant is launching an API for PaLM alongside a number of AI enterprise tools
it says will help businesses “generate text, images, code, videos, audio, and more from
simple natural language prompts.”

PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or
Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs,
PaLM is a flexible system that can potentially carry out all sorts of text generation and
editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for
example, or you could use it for tasks like summarizing text or even writing code.
(It’s similar to features Google also announced today for its Workspace apps like Google
Docs and Gmail.)
"""

# write text to local file
with open("my_file.txt", "w") as file:
    file.write(text)

# use TextLoader to load text from local file
loader = TextLoader("my_file.txt")
docs_from_file = loader.load()

print(len(docs_from_file))
# 1

1


In [2]:
from langchain.text_splitter import CharacterTextSplitter

# create a text splitter
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)

# split documents into chunks
docs = text_splitter.split_documents(docs_from_file)

print(len(docs))
# 2


Created a chunk of size 373, which is longer than the specified 200


2


In [3]:
from langchain.embeddings import HuggingFaceEmbeddings

# Before executing the following code, make sure to have
# your OpenAI key saved in the “OPENAI_API_KEY” environment variable.
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain.vectorstores import DeepLake
from dotenv import load_dotenv
import os

load_dotenv("keys.env") 
ACTIVELOOP_TOKEN= os.getenv("ACTIVELOOP_TOKEN")
# Before executing the following code, make sure to have your
# Activeloop key saved in the “ACTIVELOOP_TOKEN” environment variable.
# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = "akashghanathey"
my_activeloop_dataset_name = "langchain_course_indexers_retrievers"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings,read_only=True)

# add documents to our Deep Lake dataset

Using embedding function is deprecated and will be removed in the future. Please use embedding instead.


Deep Lake Dataset in hub://akashghanathey/langchain_course_indexers_retrievers already exists, loading from the storage


In [5]:
retriever = db.as_retriever()

In [6]:
from langchain_community.llms import GPT4All
from langchain_core.prompts import PromptTemplate
from langchain_core.callbacks.base import BaseCallbackManager
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
llm = GPT4All(
    model="C:\\Users\\akash\\OneDrive\\Documents\\GPT4ALL\\Meta-Llama-3-8B-Instruct.Q4_0.gguf",
    callbacks=callback_manager,
    verbose=False,
    device="gpu"  # Use lowercase and correct parameter name
)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
	base_compressor=compressor,
	base_retriever=retriever
)

In [7]:
retrieved_docs = compression_retriever.get_relevant_documents(
	"what is google doing how will it effect openai"
)
print(retrieved_docs[0].page_content)

  retrieved_docs = compression_retriever.get_relevant_documents(


 
1. Google opens up its AI language model PaLM to challenge OpenAI and GPT-3.
2. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.” 
NO_OUTPUT
> Answer: Google is opening up its AI language model PaLM to challenge OpenAI and GPT-3. It's launching an API for PaLM alongside other AI enterprise tools that will help businesses generate various types of content from simple text prompts.
>
*AS IS*
Extracted relevant parts:
1. Google opens up its AI language model PaLM to challenge OpenAI and GPT-3.
2. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.” 
NO_OUTPUT
> Answer: Google is opening up its AI language model PaLM to challenge OpenAI and GPT-3. It's launching an API for PaL

In [9]:
import sys
print(sys.executable)


C:\Users\akash\anaconda3\envs\Project_1\python.exe


In [7]:
import streamlit as st
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser

In [8]:
llm_engine=ChatOllama(
    model="deepseek-r1:14b",
    base_url="http://localhost:11434",

    temperature=0.3

)