In [1]:
from langchain_community.document_loaders import TextLoader

In [2]:
loader = TextLoader(r"C:\Users\91996\LANGCHAIN\api\rag\speech.txt", encoding="utf-8")
text_doc = loader.load()
text_doc

[Document(metadata={'source': 'C:\\Users\\91996\\LANGCHAIN\\api\\rag\\speech.txt'}, page_content='Well, I’m going to open up with a quote. It’s one of my favorite quotes. “Actions speak louder than words.” That literally is what defines my career. I made my first Olympic team, 15 years old. I got fifth place. I wasn’t happy. I wasn’t satisfied. They gave me a piece of paper that said, congratulations, you participated. That piece of paper motivated me for that whole next four years. I said, there’s not a shot in hell, this is ever going to happen again. My very first [Olympic] race, I did not medal. And then in 2012, I had one race I didn’t medal. I can go back and look at those races because I want to make sure that feeling stays with me until the next time I have a chance to get out there to do the same thing again. From 2002 to 2008, guess how many days I took off. In those six years, guess how many days I took off. None. Zero. Why? I wanted something that nobody else had the opport

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN") 
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [4]:
# web based loader

from langchain_community.document_loaders import WebBaseLoader
import bs4

# load,chunk and index html page

loader = WebBaseLoader(web_paths = ["https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"],
                        bs_kwargs = dict(parse_only = bs4.SoupStrainer(
                            class_ = ("post-title", "post-content", "post-header") 
                        )))

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
text_documents = loader.load()
print(text_documents[0].page_content[:500])



      Prompt Engineering
    
Date: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: Lilian Weng


Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.
This post only focuses on prompt engineering fo


In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
documents = text_splitter.split_documents(text_documents)
documents[:5]

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/'}, page_content='Prompt Engineering\n    \nDate: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: Lilian Weng'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/'}, page_content='Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models. At its core, the goal of prompt engineering is about alignment and model steerability. Check my previous post on controllable text generation.'),
 Document(metadata={'sour

In [7]:
documents[1].page_content[:500]

'Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models. At its co'

In [8]:
# from langchain_community.embeddings import OllamaEmbeddings

from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(documents[:50], embeddings)
# db.persist()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [9]:
import os
print(os.getenv("CHROMA_DB_DIR"))


None


In [10]:
query = "What is instruction prompting?"
result = db.similarity_search(query)

In [11]:
result[0].page_content

'Instruction Prompting#\nThe purpose of presenting few-shot examples in the prompt is to explain our intent to the model; in other words, describe the task instruction to the model in the form of demonstrations. However, few-shot can be expensive in terms of token usage and restricts the input length due to limited context length. So, why not just give the instruction directly?\nInstructed LM (e.g. InstructGPT, natural instruction) finetunes a pretrained model with high-quality tuples of (task instruction, input, ground truth output) to make LM better understand user intention and follow instruction. RLHF (Reinforcement Learning from Human Feedback) is a common method to do so. The benefit of instruction following style fine-tuning improves the model to be more aligned with human intention and greatly reduces the cost of communication.'

In [12]:
from langchain_community.llms import Ollama

llm = Ollama(model = "llama2")
llm

  llm = Ollama(model = "llama2")


Ollama()

In [13]:
# Design ChatPrompt template

from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context.
Think step by step before providing a detailed answer.
If you don't know the answer then return "I don't know".
<context>
{context}
</conetxt>
Question: {input}""")


In [14]:
# Chain introduction
# Create stuff document chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)


In [15]:
# Defining Retriever

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001E2B7262610>, search_kwargs={})

In [16]:
# Retriever + Chain

from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [17]:
retrieval_chain.invoke({"input": "How is few shot prompting different from zero shot?"})

{'input': 'How is few shot prompting different from zero shot?',
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/'}, page_content='Instruction Prompting#\nThe purpose of presenting few-shot examples in the prompt is to explain our intent to the model; in other words, describe the task instruction to the model in the form of demonstrations. However, few-shot can be expensive in terms of token usage and restricts the input length due to limited context length. So, why not just give the instruction directly?\nInstructed LM (e.g. InstructGPT, natural instruction) finetunes a pretrained model with high-quality tuples of (task instruction, input, ground truth output) to make LM better understand user intention and follow instruction. RLHF (Reinforcement Learning from Human Feedback) is a common method to do so. The benefit of instruction following style fine-tuning improves the model to be more aligned with human intention and great