In [2]:
from langchain_ollama import ChatOllama
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationSummaryMemory
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import Optional
from langchain.output_parsers import OutputFixingParser

In [3]:
class StructuredResponse(BaseModel):
    Answer: str = Field(description="Main answer text, can include explanation or code")
    Command: Optional[str] = Field(default="", description="Custom command for tools, if any")

In [4]:

base_parser = PydanticOutputParser(pydantic_object=StructuredResponse)


# ---------------------------
# 2. LLM + Memory
# ---------------------------
llm = ChatOllama(model="gemma3:4b", temperature=0)
parser = OutputFixingParser.from_llm(parser=base_parser, llm=llm)
summary_memory = ConversationSummaryMemory(llm=llm)

  summary_memory = ConversationSummaryMemory(llm=llm)


In [5]:
prompt = PromptTemplate(
    template="""
You are a helpful assistant.

Conversation so far:
{history}

User input: {input}

Respond strictly in JSON format.
Always return both fields: "Answer" and "Command".
If no command, set "Command": "".

{format_instructions}
""",
    input_variables=["input", "history"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [6]:
structured_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    output_parser=parser,

)

  structured_chain = LLMChain(


In [7]:
def ask(question: str):
    # Load history from memory
    history_vars = summary_memory.load_memory_variables({})
    history = history_vars.get("history", "")
    
    # Run chain (LangChain already uses parser, so output is StructuredResponse)
    raw_result = structured_chain.invoke({"input": question, "history": history})
    print("result-->",raw_result)
    # LLMChain returns a dict with key "text" holding our parsed object
    result: StructuredResponse = raw_result["text"]
    
    # Save only Answer back into memory (not full JSON)
    summary_memory.save_context(
        {"input": question},
        {"output": result.Answer}
    )
    return result


In [8]:
response = ask("Hello, my name is Anshul.")
print("Structured:", response)
print("Summary memory:", summary_memory.load_memory_variables({}))

result--> {'input': 'Hello, my name is Anshul.', 'history': '', 'text': StructuredResponse(Answer='Hello Anshul, nice to meet you!', Command='')}
Structured: Answer='Hello Anshul, nice to meet you!' Command=''
Summary memory: {'history': 'The human introduces himself as Anshul, and the AI responds with a greeting and expresses pleasure in meeting him.'}


In [10]:
response = ask("who are u?")
print(response)

result--> {'input': 'who are u?', 'history': 'The human introduces himself as Anshul, and the AI confirms his name is Anshul.', 'text': StructuredResponse(Answer='I am an AI assistant, a large language model created by Google.', Command='')}
Answer='I am an AI assistant, a large language model created by Google.' Command=''
