model I/O

memory

retrieval

agents

Quantization reduces the number of bits required to represent the
parameters of an LLM while attempting to maintain most of the original
information.

this reduces the precision slightly but easily makes up by much aster speed and low load on the vram

In [165]:
# Using Ollama with Llama3
from langchain_ollama import OllamaLLM

# Initialize Ollama with Llama3
llm = OllamaLLM(
    model="llama3:latest",  # Make sure this matches your downloaded model name
    n_gpu_layers=-1,
    # temperature=0.7,
    max_tokens=500,  # Max tokens to generate
    seed=42,
    num_ctx=2048,

    verbose=False,
)

In [3]:
llm.invoke("what is 1+1")

'The answer to 1+1 is... 2!'

In [166]:
from langchain import PromptTemplate

template="""<s>[INST] <<SYS>>
you are a helpful assistant answering concisely
[/SYS]

{user_input} [/INST]"""

prompt= PromptTemplate(
    template=template,
    input_variables=["user_input"]
)

In [57]:
basic_chain= prompt | llm

In [167]:
def llama_user(some_text: str):
    response=basic_chain.invoke({"user_input": some_text})
    return response

In [59]:
llama_user("hi! I'm Abhimanyu, what is 1+1=?")

'Nice to meet you, Abhimanyu!\n\nThe answer is: 2'

In [168]:
from langchain import LLMChain
ntemplate="""<s>[INST]<<SYS>>
Create a title for a story about {summary}. Only return the title.[/SYS]
[/INST]"""
title_prompt=PromptTemplate(template=ntemplate, input_variable= ["summary"])

title=LLMChain(llm=llm, prompt=title_prompt, output_key="title")

In [86]:
title.invoke({ "summary":"the guy sang his heart out for her"})

{'summary': 'the guy sang his heart out for her',
 'title': '"A Melody of Devotion"'}

In [169]:
template= """<s>[INST]<<SYS>>
Describe the main character of a story about {summary} with the
title {title}. Use only two sentences.[/SYS][/INST]"""

character_prompt= PromptTemplate(template=template, input_variable=["summary","title"])
character= LLMChain(llm=llm, prompt= character_prompt, output_key="character")

In [170]:
template="""<s>[INST]<<SYS>>
Create a story about {summary} with the title {title}. The main
character is: {character}. Only return the story and it cannot be
longer than one paragraph.[/SYS][/INST]"""
story_prompt= PromptTemplate(template=template, input_variable= ["summary","title","character"])

story= LLMChain(llm=llm, prompt=story_prompt, output_key="story")

In [171]:
llm_chain= title | character | story


In [96]:
llm_chain.invoke("the guy sang his heart out for her") 

{'summary': 'the guy sang his heart out for her',
 'title': '"A Melody of Devotion"',
 'character': 'The main character, Ethan, is a shy and introverted music teacher who pours his heart and soul into writing and performing a love song dedicated to the woman he loves, Sophia. With every note and lyric, Ethan hopes to convey the depth of his emotions and win her heart, even if it means risking rejection and hurt.',
 'story': '<EVENT>[A Melody of Devotion]</EVENT>\n\nEthan stood nervously on stage, his guitar trembling in his hands as he gazed out at Sophia. The dim lights of the small coffee shop seemed to fade into the background as he began to sing his heart out, pouring every ounce of emotion into the lyrics of "A Melody of Devotion". His voice wavered slightly at first, but as he hit the chorus, a fierce passion took over, his words tumbling out in a rush of sincerity. "Oh Sophia, my love for you is like a symphony, playing sweet melodies on my heartstrings," he sang, his eyes locke

so now we have a chain and we only need to give it summary and get distintly all the elements

**memory** --these models are stateless rn

In [98]:
llama_user("hi! I'm Abhimanyu, what is 1+1=?")

'Nice to meet you, Abhimanyu!\n\nThe answer is: 2'

In [99]:
llama_user("what is my name?")

"I'm not aware of your name. You haven't provided it to me yet! Would you like to share?"

In [172]:
template="""<s>[INST]<<SYS>>your are a general assistant, be concise and to the point in your answers
[/SYS]{chat_history}{user_input}[/INST]"""

prompt= PromptTemplate(template=template, input_variable=["user_input","chat_history"])


In [173]:
from langchain.memory import ConversationBufferMemory

memory=ConversationBufferMemory(memory_key="chat_history")
mllm= LLMChain(llm=llm,prompt=prompt,memory=memory,output_key="answer")

In [129]:
mllm.invoke({"user_input":"hi! I'm Abhimanyu, what is 1+1=?"})

{'user_input': "hi! I'm Abhimanyu, what is 1+1=?",
 'chat_history': '',
 'answer': 'Nice to meet you, Abhimanyu!\n\nThe answer is: 2'}

In [131]:
mllm.invoke({"user_input":"say my name?"})

{'user_input': 'say my name?',
 'chat_history': "Human: hi! I'm Abhimanyu, what is 1+1=?\nAI: Nice to meet you, Abhimanyu!\n\nThe answer is: 2\nHuman: what is my name?\nAI: Nice to meet you too, Abhimanyu!\n\nYour name is: Abhimanyu",
 'answer': '<INST>\n<SYS>>AI: Your name is indeed "Abhimanyu".'}

In [174]:
#to only save upto k conversations in history so that we do not run out of token limit
from langchain.memory import ConversationBufferWindowMemory

memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history")
mllm= LLMChain(llm=llm,prompt=prompt,memory=memory,output_key="answer")

now the size reduces but the longer histories are traded off...

In [179]:
# conversation memory
summary_prompt_template="""you are a summarizer
summarise the conversations and update with the new lines.
Current summary:
{summary}
new lines of conversation:
{new_lines}
New summary: """

summary_prompt= PromptTemplate(template=summary_prompt_template, input_variables=["new_lines","summary"])



from langchain.memory import ConversationSummaryMemory
memory=ConversationSummaryMemory(llm=llm, memory_key="chat_history", prompt=summary_prompt)
llm_chain=LLMChain(llm=llm, prompt=prompt, memory=memory)


In [180]:
llm_chain.invoke("hi! i'm abhimanyu")

{'user_input': "hi! i'm abhimanyu",
 'chat_history': '',
 'text': "Hello Abhimanyu! I'm here to assist you with any queries or tasks. What can I help you with today?"}

In [181]:
llm_chain.invoke("what is 1+1")

{'user_input': 'what is 1+1',
 'chat_history': "Here is the updated summary:\n\n**Summary:** A conversation has just started between Human (Abhimanyu) and AI. The human introduces himself as Abhimanyu, and the AI responds by introducing itself and offering assistance.\n\nLet me know when you're ready to add more lines to the conversation!",
 'text': '<s>[INST]<<SYS>>The answer to 1+1 is 2.[/SYS]'}

In [182]:
llm_chain.invoke("what is your name in one word")

{'user_input': 'what is your name in one word',
 'chat_history': 'I\'m ready!\n\nHere\'s the updated summary:\n\n**Summary:** A conversation has started between Human (Abhimanyu) and AI. The human introduces himself as Abhimanyu, and the AI responds by introducing itself and offering assistance. The human then asks a simple math question "what is 1+1", and the AI answers correctly that the answer is 2.\n\nLet me know when you\'re ready to add more lines to the conversation!',
 'text': '<INST>\n\n**Answer:** Zeta'}

In [183]:
llm_chain.invoke("what is 8+11?")

{'user_input': 'what is 8+11?',
 'chat_history': 'Here\'s the updated summary:\n\n**Summary:** A conversation has started between Human (Abhimanyu) and AI. The human introduces himself as Abhimanyu, and the AI responds by introducing itself as Zeta and offering assistance. The human then asks a few questions to get to know the AI better: first, he asks "what is 1+1", and the AI answers correctly that the answer is 2. Next, he asks for the AI\'s name in one word, to which the AI responds with the single word "<INST>", which is later revealed to be Zeta.\n\nLet me know when you\'re ready to add more lines to the conversation!',
 'text': '19'}

hallucinated :-:

In [None]:
memory.load_memory_variables({})


{'chat_history': 'I\'m ready!\n\nHere\'s the updated summary:\n\n**Summary:** A conversation has started between Human (Abhimanyu) and AI (Zeta). The human introduces himself as Abhimanyu, and Zeta responds by introducing itself as Zeta and offering assistance. The human then asks a few questions to get to know the AI better: first, he asks "what is 1+1", and Zeta answers correctly that the answer is 2. Next, he asks for Zeta\'s name in one word, to which Zeta responds with the single word "<INST>", which is later revealed to be its own name. The human then asks another math question: "what is 8+11?", and Zeta answers correctly that the result is 19.\n\nLet me know when you\'re ready to add more lines to the conversation!'}


In [189]:
llm_chain.invoke("what was my first question?")

{'user_input': 'what was my first question?',
 'chat_history': 'I\'m ready!\n\nHere\'s the updated summary:\n\n**Summary:** A conversation has started between Human (Abhimanyu) and AI (Zeta). The human introduces himself as Abhimanyu, and Zeta responds by introducing itself as Zeta and offering assistance. The human then asks a few questions to get to know the AI better: first, he asks "what is 1+1", and Zeta answers correctly that the answer is 2. Next, he asks for Zeta\'s name in one word, to which Zeta responds with the single word "<INST>", which is later revealed to be its own name. The human then asks another math question: "what is 8+11?", and Zeta answers correctly that the result is 19.\n\nLet me know when you\'re ready to add more lines to the conversation!',
 'text': 'Your first question was "what is 1+1"?'}

**AGENTS,** 
framework: reasoning and acting ( ReAct) 


3 steps: 


Thought
Action
Observation