In [2]:
!pip install langchain-community langchain-core
!pip install llama-cpp-python
!pip install langchain

Collecting langchain-community
  Downloading langchain_community-0.3.16-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core
  Downloading langchain_core-0.3.33-py3-none-any.whl.metadata (6.3 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.16 (from langchain-community)
  Downloading langchain-0.3.17-py3-none-any.whl.metadata (7.1 kB)
Collecting langsmith<0.4,>=0.1.125 (from langchain-community)
  Downloading langsmith-0.3.4-py3-none-any.whl.metadata (14 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing

In [3]:
from langchain import LlamaCpp, LLMChain, PromptTemplate
from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory

In [4]:
llm = LlamaCpp(model_path="Phi-3-mini-4k-instruct-fp16.gguf", 
               n_gpu_layers=-1, 
               max_tokens=500, 
               n_ctx=2048, 
               seed=42,
               verbose=False
)

llama_init_from_model: n_batch is less than GGML_KQ_MASK_PAD - increasing to 32
llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_

In [5]:
template = """<s><|user|>
{input_prompt}<|end|>
<|assistant|>
"""
prompt = PromptTemplate(
    template=template,
    input_variables=["input_prompt"]
)

In [6]:
basic_chain = prompt | llm

In [7]:
basic_chain.invoke({
    'input_prompt': 'Hi, My name is Hassan. What is 1 + 1 ?'
})



" 1 + 1 equals 2. It's a basic arithmetic addition problem!"

##### Multiple Prompts

In [8]:
template = """<s><|user|>
Create a title for a story about {summary}. Only return the title.
<|end|>
<|assistant|>
"""
title_prompt = PromptTemplate(template=template, input_variables=['summary'])
title = LLMChain(llm=llm, prompt=title_prompt, output_key='title')

  title = LLMChain(llm=llm, prompt=title_prompt, output_key='title')


In [9]:
title.invoke({"summary": "a girl that lost her mother"})



{'summary': 'a girl that lost her mother',
 'title': ' "Echoes of a Mother\'s Love: A Girl\'s Journey Through Loss"'}

In [10]:
template="""<s><|user|>
Describe the main character of story about {summary} with the title {title}.
User only two sentences. <|end|>
<|assistant|>
"""
character_prompt=PromptTemplate(template=template, input_variables=['summary', 'title'])
character=LLMChain(llm=llm, prompt=character_prompt, output_key='character')

In [11]:
template="""<s><|user|>
Create a stroy about {summary} with the title {title}. The nain character is: {character}. Only return the 
story and it cannot be longer than one paragraph. <|end|> 
<|assistant|>
"""
story_prompt=PromptTemplate(template=template, input_variables=['summary', 'title', 'character'])
story=LLMChain(llm=llm, prompt=story_prompt, output_key='story')

In [12]:
llm_chain = title | character | story
llm_chain.invoke("a girl that lost her mother")



{'summary': 'a girl that lost her mother',
 'title': ' "Whispers of a Mother\'s Love: A Tale of Healing"',
 'character': " The main character, Lily, is a compassionate and resilient young girl who grapples with the loss of her mother while learning to navigate life without her guiding presence. As she embarks on a journey towards healing and self-discovery, Lily draws strength from cherished memories of her mother's love and wisdom that continue to guide her through difficult times.",
 'story': ' "Whispers of a Mother\'s Love: A Tale of Healing" tells the story of Lily, a compassionate and resilient young girl who was forced to face the harsh reality of losing her mother. With each passing day without her guiding presence, Lily struggled with grief and confusion, but found solace in cherished memories that revealed the depths of her mother\'s love and wisdom. As she embarked on a journey towards healing and self-discovery, Lily learned to draw strength from these whispers of her mother

##### Conversation Buffer

In [13]:
template="""<s><|user|> Current Conversation: {chat_history}
{input_prompt}<|end|>
<|assistant|>
"""
prompt=PromptTemplate(
    template=template,
    input_variables=['input_prompt', 'chat_history']
)

In [14]:
memory = ConversationBufferMemory(memory_key="chat_history")
llm_chain = LLMChain(
    prompt=prompt,
    llm=llm,
    memory=memory
)

  memory = ConversationBufferMemory(memory_key="chat_history")


In [15]:
llm_chain.invoke({"input_prompt": "Hi! My name is Hassan. What is 1 + 1?"})



{'input_prompt': 'Hi! My name is Hassan. What is 1 + 1?',
 'chat_history': '',
 'text': " The answer to 1 + 1 is 2. It's a basic arithmetic operation where you add one unit to another, resulting in two units."}

In [16]:
llm_chain.invoke({"input_prompt": "What is my name?"})



{'input_prompt': 'What is my name?',
 'chat_history': "Human: Hi! My name is Hassan. What is 1 + 1?\nAI:  The answer to 1 + 1 is 2. It's a basic arithmetic operation where you add one unit to another, resulting in two units.",
 'text': ' Your name is Hassan.'}

##### Summary Buffer

In [17]:
summary_prompt_template = """<s><|user|>Summarize the
conversations and update with the new lines.

Current summary
{summary}

new lines of conversation:
{new_lines}

New summary:<|end|>
<|assistant|>"""
summary_prompt=PromptTemplate(
    input_variables=["new_lines", "summary"],
    template=summary_prompt_template
)

In [18]:
memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",
    prompt=summary_prompt
)

  memory = ConversationSummaryMemory(


In [27]:
llm_chain=LLMChain(
    prompt=prompt,
    llm=llm,
    memory=memory
)

In [28]:
llm_chain.invoke({
    "input_prompt": "Hi! My name is Hassan, What is 1+1?"
})



{'input_prompt': 'Hi! My name is Hassan, What is 1+1?',
 'chat_history': ' Hassan introduces himself and inquires about the sum of 1+1, to which the AI confirms it is 2. The AI explains basic arithmetic addition and its process. When asked about their names, the AI clarifies that as an Assistant, it doesn\'t have access to personal data unless shared during the conversation. Hassan then asks how he introduced himself in this context, to which the AI responds by mentioning that within this conversation, it has been identified as "Assistant." The AI explains its purpose is to assist and answer questions without needing a specific name beyond its function.',
 'text': ' Hello Hassan! I\'m delighted to assist you. The sum of 1+1 is indeed 2. In basic arithmetic, addition is the process where two or more numbers are combined to find their total amount. Here\'s how it works:\n\nIf we have one item and add another single item to that collection (in this case, both items being \'1\'), then you 

In [29]:
llm_chain.invoke({
    "input_prompt": "How I indroduced my self?" 
})



{'input_prompt': 'How I indroduced my self?',
 'chat_history': ' Hassan introduces himself and asks the AI about the sum of 1+1, to which it confirms that it is 2. The AI explains basic arithmetic addition as combining two or more numbers to find their total amount using relatable examples. It identifies itself as "Assistant" in this conversation, emphasizing its role to assist users with various queries without the necessity of personal names during interactions.',
 'text': ' You introduced yourself by stating your name, "Hassan," and then asked the AI about a simple math problem - the sum of 1+1. It\'s perfectly fine to introduce oneself with their name during interactions; it helps personalize the conversation while maintaining clarity on who is seeking assistance or information. However, if you prefer not to use your real name for privacy reasons, simply introducing yourself by a nickname or initials would still be effective and acceptable in many contexts.'}