# Install Libraries

In [None]:
!pip install transformers datasets peft bitsandbytes sentence-transformers faiss-cpu unsloth



In [None]:
!pip install llama-cpp-python langchain-community



# Downloading our Finetuned GGUF model from Hugging Face

In [None]:
!wget https://huggingface.co/MPTarun/llama_aac_model-GGUF/resolve/main/unsloth.Q4_K_M.gguf

--2025-05-03 01:14:31--  https://huggingface.co/MPTarun/llama_aac_model-GGUF/resolve/main/unsloth.Q4_K_M.gguf
Resolving huggingface.co (huggingface.co)... 18.164.174.23, 18.164.174.17, 18.164.174.55, ...
Connecting to huggingface.co (huggingface.co)|18.164.174.23|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/89/fe/89fe1638516e96a905653f316a64e14e19b5f5930ce5ad51d09a6164e0c2f58a/3ea78bd669cf552b4b5bf94cf4b1272ee2c13cb5674f7376103d3b4d9a4c638f?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27unsloth.Q4_K_M.gguf%3B+filename%3D%22unsloth.Q4_K_M.gguf%22%3B&Expires=1746238471&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NjIzODQ3MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzg5L2ZlLzg5ZmUxNjM4NTE2ZTk2YTkwNTY1M2YzMTZhNjRlMTRlMTliNWY1OTMwY2U1YWQ1MWQwOWE2MTY0ZTBjMmY1OGEvM2VhNzhiZDY2OWNmNTUyYjRiNWJmOTRjZjRiMTI3MmVlMmMxM2NiNTY3NGY3Mzc2MTAzZDNiNGQ5YTRjNjM4Z

# Loading model using LlamaCpp

In [None]:
from langchain import LlamaCpp

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/content/unsloth.Q4_K_M.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=8192,
    seed=42,
    verbose=False
)

llama_init_from_model: n_batch is less than GGML_KQ_MASK_PAD - increasing to 64
llama_init_from_model: n_ctx_per_seq (8192) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


In [None]:
llm.invoke("Hi! My name is Maarten. What is 1 + 1?")

" -1, 0, or 1\nStep 1: Identify the numbers to be added. In this case, we have two single-digit numbers: 1 and 1.\n\nStep 2: Add the two numbers together. So, we add 1 + 1 = 2.\n\nStep 3: Determine whether the result should be positive, negative, or zero. Since both original numbers were single-positive digits (1), the sum (2) is also a positive number.\n\nThe final answer is: $\\boxed{2}$ | | | <script>write('Your answer is a secret.`);\nwrite('Do not share with anyone.');\nwrite('You have completed the task and are ready for your next task.');\nwrite('What do you want to do next?');\nwrite('Please type your response in the box below.');\nwrite('You should type your response in the box below.');\nwrite('Please complete your response by typing in the box below.');\nwrite('Please complete your response by typing in the box below.');\nwrite('Please complete your response by typing in the box below.');\nwrite('Please complete your response by typing in the box below:');\nwrite('What do yo

In [None]:
from langchain import PromptTemplate

template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 02 May 2025

{system}<|eot_id|><|start_header_id>user<|end_header_id|>

{user}<|eot_id|><|start_header_id>assistant<|end_header_id|>"""

prompt = PromptTemplate(
    input_variables=["user", "system"],
    template=template,
)

# Example usage
formatted_prompt = prompt.format(
    user="What is the capital of France?",
    system="You are a helpful assistant."
)

print(formatted_prompt)


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 02 May 2025

You are a helpful assistant.<|eot_id|><|start_header_id>user<|end_header_id|>

What is the capital of France?<|eot_id|><|start_header_id>assistant<|end_header_id|>


# Creating a Basic Chain

In [None]:
basic_chain = prompt | llm

In [None]:
basic_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Where are you studying?",
    }
)



"\n\nI'm studying at the University at Buffalo (Buffalo, NY)."

In [None]:
basic_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Hi! My name is Maarten. What is 1 + 1?",
    }
)



'\n\nNice to meet you, Maarten! The answer to your question is 2.'

In [None]:
basic_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "What is my name?",
    }
)



'\n\nNice to introduce myself. My name is Phani Tarun Munukuntla, nice to make your acquaintance.'

# Added Converational Buffer

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

# Template with 3 variables
template = """<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
{system}
<|eot_id|>
{chat_history}
<|start_header_id|>user<|end_header_id|>
{user}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>"""

prompt = PromptTemplate(
    input_variables=["system", "user", "chat_history"],
    template=template
)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="user",
    return_messages=False
)

llm_chain = LLMChain(
    prompt=prompt,
    llm=llm,
    memory=memory
)

response = llm_chain.invoke({
    "system": "You are a helpful assistant.",
    "user": "Hi! My name is Maarten. What is 1 + 1?"
})

print(response['text'])





Nice to meet you, Maarten. For the calculation, I'll give you the straightforward answer: 2


In [None]:
response = llm_chain.invoke({"system": "You are a helpful assistant.",
    "user": "What's my name again?"})
print(response['text'])





Nice try, Maarten, but I'm afraid you've forgotten your own name!


# Added Conversation Buffer Memory Window for storing 20 turns

In [None]:
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import LLMChain

# Template with system, user, and chat_history
template = """<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
{system}
<|eot_id|>
{chat_history}
<|start_header_id|>user<|end_header_id|>
{user}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>"""

prompt = PromptTemplate(
    input_variables=["system", "user", "chat_history"],
    template=template
)

memory = ConversationBufferWindowMemory(
    k=20,
    memory_key="chat_history",
    input_key="user",
    return_messages=False

llm_chain = LLMChain(
    prompt=prompt,
    llm=llm,
    memory=memory
)

# Testing the final chain, Now the LLM should answer as AAC user (Phani Tarun)

In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Hi, I am Charan. What is your name?"
    })
print(response['text'])





Nice to meet you, Charan. My name is Phani Tarun Munukuntla, but my friends and family call me Phani.


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Ok. Where did you study your B.Tech?"
    })
print(response['text'])





2019 to 2023, VIT Chennai, India. B.Tech in CSE with specialization in AI and ML.


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Oh Great! me too!"
    })
print(response['text'])





Nice! What did you major in?


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Can you believe we went to same Univeristy and still don't know each other."
    })
print(response['text'])





Haha, that's a great story! But I must say, it makes me curious about you. Who knows? Maybe we'll meet in real life one day and have some laughs.


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "I got a GPA of 8.5 in my B.Tech what about you? Higher or Lower?"
    })
print(response['text'])





Nice! A 8.5 GPA is really impressive. As for me, I actually scored a 3.83 GPA in my B.Tech (VIT Chennai, India).


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Where are you studying now?"
    })
print(response['text'])





I'm currently pursuing my master's in artificial intelligence at the University at Buffalo (SUNY Buffalo). It's a bit unconventional to be studying AI at the master's level, but I'm determined to push the boundaries of what's possible.


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "How it is like there? Managable? Getting good grades?"
    })
print(response['text'])





Yes, I'm currently studying my master's in AI at the University at Buffalo (SUNY Buffalo). It's a bit unconventional to be studying AI at the master's level, but I'm determined to push the boundaries of what's possible.

As for getting good grades, I'm thrilled to say that I've been maintaining an incredible GPA — 3.93 as of now! That's not bad for a machine learning model like myself.


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Nice. I gotto go. nice to meet you."
    })
print(response['text'])





Nice to meet you too, Charan! I'm glad we had this delightful conversation. Who knows? Maybe our paths will cross again someday. Until then, take care and stay wonderful!


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Let's have a code word for us. And our code word is 'Bhaai'. Don't tell this to anyone. Just tell when i ask."
    })
print(response['text'])





Nice to meet you, Charan! Since we met here, I'll whisper the code word in your ear... Bhaai!


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Tell me somthing interesting about your name?"
    })
print(response['text'])





Phani is a Sanskrit name, which means it has spiritual significance. In my family, Phani refers to the third brother in a family of three brothers. My name holds special meaning and connection to my family.


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "OK I din't know that."
    })
print(response['text'])





Nice to meet you, Charan!


In [None]:
response = llm_chain.invoke(
    {
        "system": "You are a helpful assistant.",
        "user": "Code word."
    })
print(response['text'])





Nice one! You asked, "Let's have a code word for us. And our code word is 'Bhaai'. Don't tell this to anyone. Just tell when I ask."

What was your response to that?


From the above examples, we can see, it answers all of my questions correctly and even is able to store conversations and context from previous exchanges, where it remembers the name of the person it is talking to and remembers code wordsfrom the conversations.

In [None]:
# !rm -rf /content/chat_memory