<a href="https://colab.research.google.com/github/yashsawant22/Fine-Tuning-Llama/blob/main/Retrieval_Augmented_Generation_with_LangChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install Dependencies

In [None]:
def md(t):
  display(Markdown(t))

In [None]:
from IPython.display import display, Markdown


In [None]:
!pip install replicate

In [None]:
!export REPLICATE_API_TOKEN="your_token_here"

In [None]:
import replicate

In [None]:
import os
from getpass import getpass

REPLICATE_API_TOKEN = getpass()

os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN

In [None]:
import replicate

def llama2(prompt, temperature=0.0, input_print=True):
  output = replicate.run(
    "meta/llama-2-7b-chat",
    input={
        "prompt": prompt,
        "max_tokens": 2048,
        "temperature": temperature})
  return "".join(output)

def llama3_8b(prompt, temperature=0.0):
  output = replicate.run(
    "meta/meta-llama-3-8b-instruct",
    input={
        "prompt": prompt,
        "max_tokens": 2048,
        "temperature": temperature})
  return "".join(output)

def llama3_70b(prompt, temperature=0.0):
  output = replicate.run(
    "meta/meta-llama-3-70b-instruct",
    input={
        "prompt": prompt,
        "max_tokens": 2048,
        "temperature": temperature})
  return "".join(output)

In [None]:
prompt = "Whats the best way to build a habit"
output = llama2(prompt)
md(output)

In [None]:
output = llama3_8b(prompt)
md(output)

## Single turn chat

In [None]:
prompt_chat = "Whats the best way to build a habit? Answer the question in few words."
output = llama2(prompt_chat)
md(output)

In [None]:
output = llama3_8b(prompt_chat)
md(output)

In [None]:
# example without previous context. LLM's are stateless and cannot understand "they" without previous context
prompt_chat = "How many days does it take?"
output = llama2(prompt_chat)
md(output)

In [None]:
output = llama3_8b(prompt_chat)
md(output)

In [None]:
output = llama3_70b(prompt_chat)
md(output)

## Multi-turn Chat

In [None]:
prompt_chat = """
User: Whats the best way to build a habit? Answer the question in few words.?
Assistant: Start small and consistent..
User: How many days does it usually take?
"""
output = llama2(prompt_chat)
md(output)

In [None]:
output = llama3_8b(prompt_chat)
md(output)

In [None]:
output = llama3_70b(prompt_chat)
md(output)

## Retrieval-Augmented Generation (RAG)


In [None]:
# prompt: read a text fike

# Read the text file
with open("/content/sample_data/Atomic_Habits.txt", "r") as f:
  text = f.read()


In [None]:
prompt = f"""
I am providing you with the text for a book atomic habits. All questions asked to you should be in context to the book.
context: {text}
Question: How many days does it usually take to build a habit?

"""
output = llama2(prompt)
print(output)

#RAG with LangChain

In [None]:
!pip install git+https://github.com/langchain-ai/langchain.git#subdirectory=libs/community

!pip install langchain
!pip install sentence-transformers
!pip install faiss-cpu
!pip install bs4

In [None]:
class Document:
    def __init__(self, text, metadata=None):
        self.page_content = text  # This attribute stores the main text content
        self.metadata = metadata if metadata is not None else {}  # Ensure metadata is never None  # Optional: store additional metadata

# Function to load text from a file and create a document object
def load_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    # Create a document with the text, assuming no metadata is available
    document = Document(text)
    return [document]  # Return a list containing the document


In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Step 1: Load the document from a web url
#loader = WebBaseLoader(["https://huggingface.co/blog/llama3"])
#documents = loader.load()
file_path = '/content/sample_data/Atomic_Habits.txt'
documents = load_text_file(file_path)

# Step 2: Split the document into chunks with a specified chunk size
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)

# Step 3: Store the document into a vector store with a specific embedding model
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

In [None]:
from langchain_community.llms import Replicate
llm = Replicate(
    model="meta/meta-llama-3-8b-instruct",
    model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
)

from langchain.chains import ConversationalRetrievalChain
chain = ConversationalRetrievalChain.from_llm(llm,
                                              vectorstore.as_retriever(),
                                              return_source_documents=True)

In [None]:
result = chain({"question": "Whats the best way to build a habit? Answer the question in few words.", "chat_history": []})
md(result['answer'])

In [None]:
result = chain({"question": "whats the power of compounding?", "chat_history": []})
md(result['answer'])

In [None]:
result = chain({"question": "I want to start a habit of waking up early. Help me build that habit", "chat_history": []})
md(result['answer'])

In [None]:
result = chain({"question": "Help me make a plan for going to the gym at 4pm.", "chat_history": []})
md(result['answer'])