<a href="https://colab.research.google.com/github/amsac/ML_Notebooks/blob/main/M4_AST_03_LangChain_with_Open_Source_LLMs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Advanced Certification Programme in AI and MLOps
## A Program by IISc and TalentSprint
### Assignment 3: Open Source LLMs with LangChain 🦜🔗

## Learning Objectives

At the end of the experiment, you will be able to:

* use open source LLMs: **`zephyr-7b-beta`**, **`Mistral-7B-Instruct-v0.2`**,  and **`Llama2`** through HuggingFaceHub with LangChain
* understand & use the concept of Prompt template, Memory and output parsers in LangChain


### Setup Steps:

In [None]:
#@title Please enter your registration id to start: { run: "auto", display-mode: "form" }
Id = "" #@param {type:"string"}

In [None]:
#@title Please enter your password (your registered phone number) to continue: { run: "auto", display-mode: "form" }
password = "" #@param {type:"string"}

In [None]:
#@title Run this cell to complete the setup for this Notebook
from IPython import get_ipython

ipython = get_ipython()

notebook= "M4_AST_03_LangChain_with_Open_Source_LLMs" #name of the notebook

def setup():
#  ipython.magic("sx pip3 install torch")

    # ipython.magic("wget https://cdn.iisc.talentsprint.com/AIandMLOps/Datasets/Acoustic_Extinguisher_Fire_Dataset.xlsx")
    from IPython.display import HTML, display
    display(HTML('<script src="https://dashboard.talentsprint.com/aiml/record_ip.html?traineeId={0}&recordId={1}"></script>'.format(getId(),submission_id)))
    print("Setup completed successfully")
    return

def submit_notebook():
    ipython.magic("notebook -e "+ notebook + ".ipynb")

    import requests, json, base64, datetime

    url = "https://dashboard.talentsprint.com/xp/app/save_notebook_attempts"
    if not submission_id:
      data = {"id" : getId(), "notebook" : notebook, "mobile" : getPassword()}
      r = requests.post(url, data = data)
      r = json.loads(r.text)

      if r["status"] == "Success":
          return r["record_id"]
      elif "err" in r:
        print(r["err"])
        return None
      else:
        print ("Something is wrong, the notebook will not be submitted for grading")
        return None

    elif getAnswer() and getComplexity() and getAdditional() and getConcepts() and getComments() and getMentorSupport():
      f = open(notebook + ".ipynb", "rb")
      file_hash = base64.b64encode(f.read())

      data = {"complexity" : Complexity, "additional" :Additional,
              "concepts" : Concepts, "record_id" : submission_id,
              "answer" : Answer, "id" : Id, "file_hash" : file_hash,
              "notebook" : notebook,
              "feedback_experiments_input" : Comments,
              "feedback_mentor_support": Mentor_support}
      r = requests.post(url, data = data)
      r = json.loads(r.text)
      if "err" in r:
        print(r["err"])
        return None
      else:
        print("Your submission is successful.")
        print("Ref Id:", submission_id)
        print("Date of submission: ", r["date"])
        print("Time of submission: ", r["time"])
        print("View your submissions: https://aimlops-iisc.talentsprint.com/notebook_submissions")
        #print("For any queries/discrepancies, please connect with mentors through the chat icon in LMS dashboard.")
        return submission_id
    else: submission_id


def getAdditional():
  try:
    if not Additional:
      raise NameError
    else:
      return Additional
  except NameError:
    print ("Please answer Additional Question")
    return None

def getComplexity():
  try:
    if not Complexity:
      raise NameError
    else:
      return Complexity
  except NameError:
    print ("Please answer Complexity Question")
    return None

def getConcepts():
  try:
    if not Concepts:
      raise NameError
    else:
      return Concepts
  except NameError:
    print ("Please answer Concepts Question")
    return None


# def getWalkthrough():
#   try:
#     if not Walkthrough:
#       raise NameError
#     else:
#       return Walkthrough
#   except NameError:
#     print ("Please answer Walkthrough Question")
#     return None

def getComments():
  try:
    if not Comments:
      raise NameError
    else:
      return Comments
  except NameError:
    print ("Please answer Comments Question")
    return None


def getMentorSupport():
  try:
    if not Mentor_support:
      raise NameError
    else:
      return Mentor_support
  except NameError:
    print ("Please answer Mentor support Question")
    return None

def getAnswer():
  try:
    if not Answer:
      raise NameError
    else:
      return Answer
  except NameError:
    print ("Please answer Question")
    return None


def getId():
  try:
    return Id if Id else None
  except NameError:
    return None

def getPassword():
  try:
    return password if password else None
  except NameError:
    return None

submission_id = None
### Setup
if getPassword() and getId():
  submission_id = submit_notebook()
  if submission_id:
    setup()
else:
  print ("Please complete Id and Password cells before running setup")



### Install required dependencies

In [None]:
# Langchain
!pip -q install langchain

# Library to communicate with HF hub
!pip -q install --upgrade huggingface_hub

In [None]:
!pip -q install langchain_community

In [None]:
!pip -q install langchain_huggingface

### Import required packages

In [None]:
# Import the 'os' module to interact with the operating system, such as setting environment variables.
import os

# Import 'getpass' to securely prompt the user for a password or API key without displaying it on the screen.
from getpass import getpass

# Import 'HuggingFaceEndpoint' from LangChain's community module to connect and interact with Hugging Face's hosted models.
from langchain_community.llms import HuggingFaceEndpoint

# Import 'PromptTemplate' from LangChain to create structured templates for prompting language models.
from langchain.prompts import PromptTemplate

### **Provide your HuggingFace api key/access token**

In [None]:
# Securely prompt the user to enter their Hugging Face access token without displaying it on the screen.
pass_token = getpass("Enter your HuggingFace access token: ")

# Store the entered token as an environment variable named 'HF_TOKEN'.
os.environ["HF_TOKEN"] = pass_token

# Also store the token in 'HUGGINGFACEHUB_API_TOKEN', which may be required for authentication with Hugging Face's API.
os.environ["HUGGINGFACEHUB_API_TOKEN"] = pass_token

# Delete the 'pass_token' variable from memory for security reasons after storing it in environment variables.
del pass_token

### **Exploring Open Source LLMs hosted on HuggingFace**

>**I.** `HuggingFaceH4/zephyr-7b-beta`
>
>**II.** `mistralai/Mistral-7B-Instruct-v0.2`
>
>**III.** `LlaMa2`

[LangChain link](https://python.langchain.com/docs/integrations/chat/huggingface) for using Hugging Face LLM's as chat models.

### **I.** [**HuggingFaceH4/zephyr-7b-beta**](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)

In [None]:
# Import HuggingFace model abstraction class from langchain
from langchain_huggingface import HuggingFaceEndpoint

In [None]:
# Initialize the Hugging Face language model endpoint with specific parameters.
llm = HuggingFaceEndpoint(
    # Specify the repository ID of the model to use. In this case, it's "zephyr-7b-beta" from HuggingFaceH4.
    repo_id="HuggingFaceH4/zephyr-7b-beta",

    # Define the task type for the model, which is "text-generation" (used for generating text responses).
    task="text-generation",

    # Set the maximum number of new tokens the model can generate in response.
    max_new_tokens=512,

    # Use 'top_k' sampling to limit the number of highest-probability tokens considered at each generation step.
    top_k=30,

    # Set the temperature, which controls the randomness of the output. A lower value (e.g., 0.1) makes responses more deterministic.
    temperature=0.1,

    # Apply a repetition penalty to discourage the model from repeating the same words or phrases too frequently.
    repetition_penalty=1.03,
)

In [None]:
# Invoke the Hugging Face language model with a specific prompt asking for five points on learning programming.
response = llm.invoke("How to learn programming? give 5 points")

# Print the model's generated response to the console.
print(response)

#### **Prompt Template**

Prompt templates are predefined recipes for generating prompts for language models.

A template may include instructions, few-shot examples, and specific context and questions appropriate for a given task.

LangChain provides tooling to create and work with prompt templates.

To know more about Prompt template, refer [here](https://python.langchain.com/docs/modules/model_io/prompts/quick_start).

#### **Example-1**

In [None]:
# Import the PromptTemplate class from LangChain to create structured prompts.
from langchain.prompts import PromptTemplate

# Create a prompt template using placeholders for dynamic input values.
prompt_template = PromptTemplate.from_template(
    "Tell me a {adjective} joke about {content}."
)

# Format the template by replacing the placeholders with actual values.
messages = prompt_template.format(adjective="funny", content="Trump")

# Output the formatted prompt string.
messages

In [None]:
# Import the ChatHuggingFace class from LangChain's Hugging Face integration.
from langchain_huggingface import ChatHuggingFace

# Initialize a chat model using the previously defined Hugging Face LLM.
chat_model = ChatHuggingFace(llm=llm)

In [None]:
# Invoke the chat model with the formatted prompt message.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

**Practice-1 :**
Create a prompt template envisioning a situation where you have to pass three parameters, and the language model responds to it.

In [None]:
# YOUR CODES HERE for above practice exercise

#### **Example-2**

In [None]:
# Import message classes from LangChain's schema module.
from langchain.schema import (
    HumanMessage,  # Represents a message sent by the human (user) in a conversation.
    SystemMessage,  # Represents a message providing system-level instructions or context.
)

In [None]:
# Import the PromptTemplate class from LangChain to create structured prompts.
from langchain.prompts import PromptTemplate

# Create a prompt template with placeholders for dynamic values.
prompt_template = PromptTemplate.from_template(
    "Tell me {count} facts about {event_or_place}."
)

# Format the template by replacing placeholders with actual values.
user_msg = prompt_template.format(count=5, event_or_place="Tajmahal")

# Output the formatted prompt string.
user_msg

In [None]:
# Create a list of messages to structure a conversation with both system and human roles.
messages = [
    # SystemMessage provides a directive to the model about its role or behavior.
    SystemMessage(content="You're a knowledgeable historian"),

    # HumanMessage contains the prompt or query from the user, which is the formatted message.
    HumanMessage(content=user_msg),
]

In [None]:
# Import the ChatHuggingFace class from LangChain's Hugging Face integration to handle chat interactions.
from langchain_huggingface import ChatHuggingFace

In [None]:
# Initialize the ChatHuggingFace model with the previously defined Hugging Face language model (llm).
chat_model = ChatHuggingFace(llm=llm)

In [None]:
# Access the model identifier of the Hugging Face model used by the ChatHuggingFace instance.
chat_model.model_id

In [None]:
# Convert the sequence of messages into a chat prompt that can be processed by the Hugging Face model.
chat_model._to_chat_prompt(messages)

In [None]:
# Convert the sequence of messages into a chat prompt and print the resulting formatted prompt.
print(chat_model._to_chat_prompt(messages))

In [None]:
# Invoke the chat model with the sequence of messages, sending it to the Hugging Face model for processing.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

**Practice-2 :**
Create a prompt template envisioning a situation where the language model behaves like a particular persona, and the user's query requires information involving three parameters.

In [None]:
# YOUR CODES HERE for above practice exercise

#### **Example-3**

The prompt to *chat models* is a list of chat messages.

Each chat message is associated with content, and an additional parameter called `role`. For example, in the OpenAI Chat Completions API, a chat message can be associated with an AI assistant, a human or a system role.

In [None]:
# Import the ChatPromptTemplate class from LangChain's core prompts module to create structured chat prompts.
from langchain_core.prompts import ChatPromptTemplate

In [None]:
# Create a ChatPromptTemplate using a list of predefined message templates for system, human, and AI interactions.
chat_template = ChatPromptTemplate.from_messages(
    [
        # The system message, defining the behavior or persona of the AI.
        ("system", "You are a helpful A {persona}."),

        # The first human message, a simple greeting.
        ("human", "Hello, how are you doing?"),

        # The AI's response to the human's greeting.
        ("ai", "I'm doing well, thanks!"),

        # The second human message, which will be dynamically filled with user input.
        ("human", "{user_input}"),
    ]
)

In [None]:
# Define the persona for the AI, which will be used in the system message template.
persona = """trustworthy friend"""

# Define the user's query, asking for help with understanding a concept.
query = """
I am not able to understand the concept taught in class. \
Could you please suggest something? \
I need your help. Give 5 points to work on.
"""

# Format the messages in the chat template by filling in the placeholders with the defined persona and query.
messages = chat_template.format_messages(persona=persona, user_input=query)

In [None]:
messages

In [None]:
# Convert the formatted messages into a chat prompt that can be processed by the Hugging Face model.
chat_model._to_chat_prompt(messages)

In [None]:
print(chat_model._to_chat_prompt(messages))

In [None]:
# Invoke the chat model with the formatted messages and get the model's response.
response = chat_model.invoke(messages)

# Print the content of the response from the chat model.
print(response.content)

**Practice-3 :**
Create a prompt template that takes context and a question from the user and answers the question based on the given context.

Hint: Keep the context in the system message and the question in the human message.

**Context:** Meet Aryan Kapoor, a rising star in the entertainment industry whose talent knows no bounds. In 2023, Aryan captivated
audiences with his mesmerizing performance in the critically acclaimed film "Echoes of Eternity," earning him the prestigious Best Actor award at the National Film Awards. His versatility shone brightly in 2024 when he showcased his vocal prowess as a playback singer in the chart-topping soundtrack of the blockbuster movie "Infinite Horizon." The same year,  Aryan's captivating screen presence garnered him the coveted Filmfare Critics Award for Best Actor. As his star continued to
ascend, Aryan was honored with the International Icon of the Year award at the Global Entertainment Awards in 2025, recognizing his global impact and widespread admiration. With each role he undertakes, Aryan Kapoor cements his status as an unrivaled  talent in the world of cinema, leaving audiences eagerly anticipating his next masterpiece.

**Question:** What awards did Aryan Kapoor win for his contributions to the entertainment industry, and in which years were they received?

In [None]:
# YOUR CODES HERE for above practice exercise

In [None]:
# Create a ChatPromptTemplate using a list of predefined message templates for system and human interactions.
chat_template = ChatPromptTemplate.from_messages(
    [
        # The system message, providing the context for the assistant and setting expectations for the response.
        ("system", "You are a helpful assistant and know this context ```{context}``"),

        # The human message, containing the query, asking for a point-based answer and to stick to the provided context.
        ("human", " pls reply ```{question}``` in points based on the context provided. Strictly don't add extra facts and information ?"),
    ]
)

In [None]:
# Define the user's question about Aryan Kapoor's awards and the years they were received.
question = """What awards did Aryan Kapoor win for his contributions to the entertainment industry,
and in which years were they received?"""

# Define the context, which provides detailed information about Aryan Kapoor's achievements.
context = """
Meet Aryan Kapoor, a rising star in the entertainment industry whose talent knows no bounds. In 2023, Aryan captivated
audiences with his mesmerizing performance in the critically acclaimed film "Echoes of Eternity," earning him the
 prestigious Best Actor award at the National Film Awards. His versatility shone brightly in 2024 when he showcased his
  vocal prowess as a playback singer in the chart-topping soundtrack of the blockbuster movie "Infinite Horizon." The same year,
  Aryan's captivating screen presence garnered him the coveted Filmfare Critics Award for Best Actor. As his star continued to
  ascend, Aryan was honored with the International Icon of the Year award at the Global Entertainment Awards in 2025, recognizing
   his global impact and widespread admiration. With each role he undertakes, Aryan Kapoor cements his status as an unrivaled
   talent in the world of cinema, leaving audiences eagerly anticipating his next masterpiece.
"""

# Format the chat prompt messages by inserting the context and question into the message template.
messages = chat_template.format_messages(context=context, question=question)

In [None]:
# Convert the formatted messages into a chat prompt that can be processed by the Hugging Face model.
chat_model._to_chat_prompt(messages)

In [None]:
# Send the formatted messages to the chat model for processing and obtain the model's response.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

#### **Output Parsers**

Let's start with defining how we would like the LLM output to look like:

In [None]:
# An example output format
{
  "gift": False,
  "delivery_days": 5,
  "price_value": "pretty affordable!"
}

In [None]:
customer_review = """\
This leaf blower is pretty amazing.  It has four settings:\
candle blower, gentle breeze, windy city, and tornado. \
It arrived in two days, just in time for my wife's \
anniversary present. \
I think my wife liked it so much she was speechless. \
So far I've been the only one using it, and I've been \
using it every other morning to clear the leaves on our lawn. \
It's slightly more expensive than the other leaf blowers \
out there, but I think it's worth it for the extra features.
"""

In [None]:
review_template = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift or present for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product \
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

Format the output as JSON with the following keys:
gift
delivery_days
price_value

text: {text}
"""

In [None]:
# Import ChatPromptTemplate from LangChain Core to create structured prompts for chat models.
from langchain_core.prompts import ChatPromptTemplate

# Creating a chat prompt template using a predefined template string (review_template).
prompt_template = ChatPromptTemplate.from_template(review_template)

# Print the created prompt template to check its structure.
print(prompt_template)

In [None]:
# Format the chat prompt messages by filling in the placeholder(s) in the prompt template with the actual customer review text.
messages = prompt_template.format_messages(text=customer_review)

# Send the formatted messages to the chat model for processing and obtain the model's response.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

In [None]:
print(type(response.content))

#### **Parse the LLM output string into a structured data**:

Language models output text. But there are times where you want to get more structured information than just text back. While some model providers support [built-in ways to return structured output](https://python.langchain.com/docs/how_to/structured_output/), not all do.

Output parsers are classes that help structure language model responses.

Below we go over the main type of output parser, the `PydanticOutputParser`.



[Structured output parser](https://python.langchain.com/docs/how_to/output_parser_structured/)

In [None]:
# Import necessary modules
from langchain_core.output_parsers import PydanticOutputParser  # Parses model output into structured data
from pydantic import BaseModel, Field  # Used for defining structured data models

# Define the structured data model for extracting product-related information
class Product_Info(BaseModel):
    """Product service info."""

    # Field to check if the product was purchased as a gift
    gift: str = Field(description="Was the item purchased as a gift for someone else? "
                                  "Answer 'True' if yes, 'False' if not or unknown.")

    # Field to capture the number of days taken for delivery
    delivery_days: int = Field(description="How many days did it take for the product to arrive? "
                                           "If this information is not found, output -1.")

    # Field to extract and store sentences related to price or value as a list
    price_value: list = Field(description="Extract sentences about the value or price, "
                                          "and output them as a comma-separated Python list.")

In [None]:
# Set up a parser + inject instructions into the prompt template
parser = PydanticOutputParser(pydantic_object = Product_Info)

In [None]:
# Import PromptTemplate from LangChain to create structured prompts for the model
from langchain.prompts import PromptTemplate

# Create a prompt template to guide the model's response generation
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{text}\n",  # Template structure
    input_variables=["text"],  # Defines 'text' as a required input variable
    partial_variables={"format_instructions": parser.get_format_instructions()},  # Inserts format instructions dynamically
)

In [None]:
# Create a pipeline where the formatted prompt is passed to the language model (llm)
prompt_and_model = prompt | llm  # The "|" operator chains the prompt and the LLM for execution

# Invoke the pipeline with a user query (customer review) to generate a structured response
output = prompt_and_model.invoke({"text": customer_review})

# Parse the model's output into the predefined structured format using the parser
result = parser.invoke(output)

# Display the final structured result
result

In [None]:
print(result.gift)
print(result.delivery_days)
print(result.price_value)

**Practice-4 :** Continuing the practice-3, can you get the ouput in the below format :

{'Year': 'Award}

In [None]:
# YOUR CODES HERE for above practice exercise

#### [**Customizing Conversational Memory**](https://python.langchain.com/docs/how_to/chatbots_memory/)

LangChain can helps in building better chatbots, or have
an LLM with more effective chats by better managing
what it remembers from the conversation you've had so far.

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
    ]
)

chain = prompt | chat_model


In [None]:
# Import ChatMessageHistory to manage and store conversation history
from langchain_community.chat_message_histories import ChatMessageHistory

# Import RunnableWithMessageHistory to enable conversation-aware execution of chat models
from langchain_core.runnables.history import RunnableWithMessageHistory

In [None]:
# Dictionary to store chat message histories for different sessions
store = {}

# Function to retrieve or create a new chat history for a given session ID
def get_session_history(session_id: str) -> ChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()  # Initialize new history if session does not exist
    return store[session_id]  # Return the existing or new session history

# Create a chat pipeline with message history tracking
chain_with_message_history = RunnableWithMessageHistory(
    runnable=chain,  # The main conversation chain (LLM model + prompt)
    get_session_history=get_session_history,  # Function to retrieve chat history per session
    input_messages_key="input",  # Key in the input where the new user message is stored
    history_messages_key="chat_history",  # Key in the input where the previous chat history is stored
)

In [None]:
# Invoke the chat model with message history tracking
chain_with_message_history.invoke(
    {"input": "Hi, my name is James"},  # User's input message
    {"configurable": {"session_id": "user1"}},  # Configuration containing session ID
)

In [None]:
store

In [None]:
# Invoke the chat model with message history tracking for a specific user session
chain_with_message_history.invoke(
    input={"input": "Do you remember my name?"},  # User's current input message
    config={"configurable": {"session_id": "user1"}}  # Session configuration to maintain chat history
)


In [None]:
store

In [None]:
# Invoke the chat model with message history tracking to retrieve the user's name
chain_with_message_history.invoke(
    input={"input": "Can you tell me what is my name?"},  # User's current query asking for their name
    config={"configurable": {"session_id": "user1"}}  # Session configuration to ensure chat history for "user1"
)

In [None]:
store

### **II. mistralai/Mistral-7B-Instruct-v0.2**

Note that you need to ask for access before using this model. Go to https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 and click on `Agree and access repository`.


In [None]:
# Importing HuggingFaceEndpoint from langchain_huggingface to interact with Hugging Face models via an API endpoint
from langchain_huggingface import HuggingFaceEndpoint

In [None]:
question = "How to learn programing? Give 5 examples. "

In [None]:
# Set the Hugging Face model repository ID
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"  # Model identifier for Mistral-7B

# Define additional model parameters, including max token length and authentication token
model_kwargs = {
    "max_length": 128,  # Maximum token length for generated output
    "token": os.environ["HF_TOKEN"]  # Hugging Face API token for authentication, stored in environment variables
}

# Initialize the HuggingFaceEndpoint to interact with the model hosted on Hugging Face
llm = HuggingFaceEndpoint(
    repo_id=repo_id,  # Model repo ID to specify which model to use
    task="text-generation",  # Task to perform, here it's text generation (common for LLMs)
    temperature=0.5,  # Temperature parameter controls the randomness of the output (higher value = more random)
    model_kwargs=model_kwargs  # Pass the additional model parameters (max length, token)
)

In [None]:
# Use the Hugging Face model endpoint to generate a response based on the input question
response = llm.invoke(question)  # Pass the user's question to the model for text generation
print(response)  # Output the generated response from the model

#### **Prompt Template**

**Example-1**

In [None]:
# Importing message types from langchain.schema to represent different types of messages in a conversation
from langchain.schema import (
    HumanMessage,  # Represents a message from the user (human)
    SystemMessage,  # Represents a message from the system, providing context or instructions
)

In [None]:
# Importing the ChatPromptTemplate class from langchain_core.prompts to create and format chat-based prompts
from langchain_core.prompts import ChatPromptTemplate

In [None]:
template_s = """You are a {style1}.\
Tell me  {count} facts about {event_or_place}.```
"""

In [None]:
# Creating a ChatPromptTemplate using a template string, where 'template_s' is a predefined prompt template
prompt_template = ChatPromptTemplate.from_template(template_s)

In [None]:
# Accessing the prompt of the first message in the ChatPromptTemplate
prompt_template.messages[0].prompt

In [None]:
# Accessing the input variables of the first message's prompt in the ChatPromptTemplate
prompt_template.messages[0].prompt.input_variables

In [None]:
# Formatting the messages using the prompt_template with specific values for placeholders
user_messages = prompt_template.format_messages(
    style1="knowledgeable historian",  # Replacing the 'style1' placeholder with the value "knowledgeable historian"
    count=5,                           # Replacing the 'count' placeholder with the value 5
    event_or_place="Tajmahal"          # Replacing the 'event_or_place' placeholder with the value "Tajmahal"
)

In [None]:
user_messages

In [None]:
# Importing ChatHuggingFace from langchain_huggingface to interface with Hugging Face models
from langchain_huggingface import ChatHuggingFace

# Creating an instance of ChatHuggingFace using a previously defined language model (llm)
chat_model = ChatHuggingFace(llm=llm)

# Accessing the model ID associated with the chat_model instance
chat_model.model_id

In [None]:
# Converting the user messages to a chat-friendly format for the chat_model
chat_model._to_chat_prompt(user_messages)

In [None]:
# Invoking the chat model with user messages to generate a response
response = chat_model.invoke(user_messages)

# Printing the content of the response generated by the chat model
print(response.content)

**Example-2**

In [None]:
# Creating a list of messages with a single human message as input
messages = [HumanMessage(content="How to learn programming? give 5 points")]

In [None]:
# Converting the list of messages to the appropriate chat format for the chat model
chat_model._to_chat_prompt(messages)

In [None]:
# Sending the formatted messages to the chat model and generating a response
response = chat_model.invoke(messages)

# Printing the content of the response generated by the chat model
print(response.content)

### **III.** **[Llama2](https://ai.meta.com/llama/)** ***(Optional)***

**NOTE:**

>For using this model you have to click `Download models` link available in [this](https://ai.meta.com/llama/) reference which re-direct to a **form for request**. It may take 1 hour to 2 days to get the **approval** for usage of this model through HuggingFace. You will get an email for the same.

>Once the request is approved, connect to **GPU runtime** for below steps. Also, you need to provide your HF api key/access token.

Trying Llama2-2-7b model:


In [None]:
%%capture
# This is a Jupyter notebook magic command that captures the output of the cell,
# preventing it from being printed in the notebook.

!pip install -q transformers accelerate langchain xformers bitsandbytes
# Using pip to install the following Python packages:
# - transformers: A library for working with transformer models like GPT, BERT, etc.
# - accelerate: A library to speed up the training and inference of large models efficiently.
# - langchain: A framework for building applications using large language models (LLMs).
# - xformers: Tools and optimizations for efficient memory usage and performance in transformers.
# - bitsandbytes: A library for running models with low-bit precision to improve memory usage and speed.
# The '-q' flag ensures the installation process runs quietly without verbose output.

In [None]:
# Enter your HuggingFace access token when prompted

import os  # Importing the 'os' module for interacting with the operating system
from getpass import getpass  # Importing 'getpass' to securely input the HuggingFace token without displaying it

# Prompt the user to input their HuggingFace access token without showing the input
pass_token = getpass("Enter your HuggingFace access token: ")

# Setting the HuggingFace token as environment variables to be used in API requests
os.environ["HF_TOKEN"] = pass_token  # Storing the token as 'HF_TOKEN' in the environment
os.environ["HUGGINGFACEHUB_API_TOKEN"] = pass_token  # Storing the token as 'HUGGINGFACEHUB_API_TOKEN'

# Delete the pass_token variable for security reasons to ensure it doesn't remain in memory
del pass_token

## Initializing the Hugging Face Pipeline

The first thing we need to do is initialize a `text-generation` pipeline with Hugging Face transformers. The Pipeline requires three things that we must initialize first, those are:

* A LLM, in this case it will be `meta-llama/Llama-2-7b-chat-hf`.

* The respective tokenizer for the model.

We'll explain these as we get to them, let's begin with our model.

We initialize the model and move it to our CUDA-enabled GPU. Using Colab this can take 5-10 minutes to download and initialize the model.

In [None]:
from torch import cuda, bfloat16
# Importing 'cuda' from PyTorch to interact with GPU for acceleration (e.g., checking for available CUDA devices, using the GPU for tensor operations).
# Importing 'bfloat16' from PyTorch, a 16-bit floating point format often used to save memory and speed up computation, especially in deep learning.

import transformers
# Importing the 'transformers' library from Hugging Face, which provides tools for working with transformer models like GPT, BERT, and others.

In [None]:
# Define the model ID for the pre-trained model you want to use from Hugging Face
model_id = 'meta-llama/Llama-2-7b-chat-hf'
# 'meta-llama/Llama-2-7b-chat-hf' is the ID of a pre-trained Llama-2 model hosted on Hugging Face.
# This model is fine-tuned for chat applications.

# Determine which device (GPU or CPU) to use for model inference
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
# If a CUDA-enabled GPU is available, use the first available GPU (cuda:0).
# If CUDA is not available, use the CPU ('cpu').
device  # This stores the device configuration for later use when loading the model.

In [None]:
# Load the tokenizer for the model specified by 'model_id' from Hugging Face
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
# 'AutoTokenizer' automatically selects the appropriate tokenizer based on the model provided.
# The tokenizer is responsible for converting text input into token IDs that the model can understand.

# Set up a text-generation pipeline using the specified model and tokenizer
pipeline = transformers.pipeline(
    "text-generation",  # Specifies the type of task (text generation in this case)
    model=model_id,  # The model to use for text generation (in this case, Llama-2 model)
    tokenizer=tokenizer,  # Tokenizer to convert input text into tokens
    torch_dtype=bfloat16,  # Use 'bfloat16' precision for tensor computations to reduce memory usage and speed up computations
    trust_remote_code=True,  # Trust remote code when loading model weights and configurations
    device_map="auto",  # Automatically map the model layers to available devices (CPU/GPU)
    max_length=1000,  # Maximum length of the generated text (number of tokens)
    do_sample=True,  # Enable sampling for text generation (instead of deterministic outputs)
    top_k=10,  # Limits the sampling to the top 10 most probable next tokens to ensure diversity
    num_return_sequences=1,  # Number of generated sequences to return
    eos_token_id=tokenizer.eos_token_id  # End-of-sequence token ID, so generation stops when this token is produced
)

In [None]:
# Pass the input prompt to the pipeline and generate text based on the given prompt
res = pipeline("How to learn programming?")
# The pipeline processes the input prompt ("How to learn programming?") and generates text.
# The result is stored in the variable 'res'. The pipeline returns a list of generated sequences.

# Print the generated text from the first sequence
print(res[0]["generated_text"])
# The generated text is accessed from the first result in the list (res[0]).
# 'generated_text' is the key that contains the model's output for the generated text.

#### **Now implementing with LangChain**

In [None]:
# Install the LangChain library, a framework for building language model-based applications
!pip -q install langchain
# The '-q' flag suppresses unnecessary output, making the installation process less verbose.
# LangChain is useful for building chains of various language model components like prompts, tools, and memory.

# Install the langchain_community library, which contains community-supported modules for LangChain
!pip -q install langchain_community
# This library includes additional modules contributed by the LangChain community to extend its functionality.

In [None]:
# Import HuggingFacePipeline from langchain.llms
from langchain.llms import HuggingFacePipeline

# Create an LLM instance by wrapping the HuggingFace pipeline inside LangChain
# The pipeline processes the input data and generates responses.
llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0.7})
# 'pipeline': The pre-configured HuggingFace pipeline used to generate responses based on input prompts.
# 'model_kwargs': Additional keyword arguments passed to the model, in this case, setting the 'temperature' to 0.7.
# 'temperature': A parameter that controls the randomness of the output (higher values lead to more randomness).

In [None]:
# Invoke the HuggingFace model wrapped inside the LangChain LLM with the query "How to learn programming?"
# The 'invoke' method runs the input query through the model pipeline and generates a response.
print(llm.invoke("How to learn programming?"))
# This will output the response generated by the model to the prompt "How to learn programming?".
# The model will process the query and provide an answer based on its training.

#### **Prompt Template**

In [None]:
# Importing ChatPromptTemplate from langchain.prompts
from langchain.prompts import ChatPromptTemplate
# This class allows for creating templates for chat-based prompts. You can define the structure of messages in the chat,
# and then dynamically replace parts of it based on inputs or parameters.

In [None]:
# Define a multi-line string template for a chat prompt using placeholders for style and text
template_s = """Reply the answer
like {style1}.
text: ```{text1}```
"""
# This string will be used as a template, where:
# {style1}: Placeholder for the style in which the answer should be given (e.g., "knowledgeable historian").
# {text1}: Placeholder for the text or query to which the model will respond.

In [None]:
# Create a ChatPromptTemplate instance using the previously defined template
prompt_template = ChatPromptTemplate.from_template(template_s)
# This initializes the ChatPromptTemplate with the string 'template_s' as its format.
# The template will later allow dynamic substitution of the placeholders {style1} and {text1}.

In [None]:
# Access the first message in the prompt template and retrieve its prompt string
prompt_template.messages[0].prompt
# This accesses the first message in the 'messages' list of the ChatPromptTemplate,
# and retrieves the actual prompt string (i.e., the template structure defined earlier).

In [None]:
# Access the input variables that are used in the prompt template
prompt_template.messages[0].prompt.input_variables
# This retrieves the list of input variables (placeholders) used in the first message's prompt template.
# In this case, the input variables are {style1} and {text1}, which are placeholders for dynamic content.

In [None]:
# Define the style to be used in the prompt template
style = """trustworthy friend"""
# This assigns the string "trustworthy friend" to the variable 'style'.
# This value will be used as the persona or tone in which the response should be framed in the prompt template.

In [None]:
# Define the query to be used in the prompt template
query = """
I am not able to understand the concept taught in class.
Could you please suggest something?
I need your help. Give 5 points to work on.
"""
# This variable 'query' holds the question or request that will be input to the language model.
# It's a message asking for help with understanding a concept and requesting 5 points to work on.

In [None]:
# Format the messages using the prompt template, filling in the placeholders
user_messages = prompt_template.format_messages(
    style1=style,   # Provide the style (e.g., "trustworthy friend")
    text1=query     # Provide the user query (e.g., the request for help with 5 points)
)
# This line fills the placeholders {style1} and {text1} in the prompt template with the defined values
# of 'style' (persona) and 'query' (the actual question). The resulting 'user_messages' will be used
# to generate the final input for the model.

In [None]:
# Print the first formatted message from the user_messages list
print(user_messages[0])
# This will output the first message in the 'user_messages' list after formatting.
# The output will be a string where the placeholders {style1} and {text1} have been replaced by the values of 'style' and 'query'.

In [None]:
# Call the LLM to translate to the style of the customer message
llm_response = llm.invoke(user_messages)

In [None]:
print(llm_response)

### Please answer the questions below to complete the experiment:




In [None]:
#@title Which of the following prompt techniques in LangChain allows flexible templated prompts that are suitable for better describing the role and content? { run: "auto", form-width: "500px", display-mode: "form" }
Answer = "" #@param ["", "PromptTemplate", "ChatPromptTemplate", "Both"]

In [None]:
#@title How was the experiment? { run: "auto", form-width: "500px", display-mode: "form" }
Complexity = "" #@param ["","Too Simple, I am wasting time", "Good, But Not Challenging for me", "Good and Challenging for me", "Was Tough, but I did it", "Too Difficult for me"]

In [None]:
#@title If it was too easy, what more would you have liked to be added? If it was very difficult, what would you have liked to have been removed? { run: "auto", display-mode: "form" }
Additional = "" #@param {type:"string"}

In [None]:
#@title Can you identify the concepts from the lecture which this experiment covered? { run: "auto", vertical-output: true, display-mode: "form" }
Concepts = "" #@param ["","Yes", "No"]

In [None]:
#@title  Text and image description/explanation and code comments within the experiment: { run: "auto", vertical-output: true, display-mode: "form" }
Comments = "" #@param ["","Very Useful", "Somewhat Useful", "Not Useful", "Didn't use"]

In [None]:
#@title Mentor Support: { run: "auto", vertical-output: true, display-mode: "form" }
Mentor_support = "" #@param ["","Very Useful", "Somewhat Useful", "Not Useful", "Didn't use"]

In [None]:
#@title Run this cell to submit your notebook for grading { vertical-output: true }
try:
  if submission_id:
      return_id = submit_notebook()
      if return_id : submission_id = return_id
  else:
      print("Please complete the setup first.")
except NameError:
  print ("Please complete the setup first.")