# LLM Generation at a high level

```python
query = "Capital of"

output = ""
for i in range(MAX_GENERATED_TOKENS):
    output = output + LLM(output)
```

1. output = Capital of
2. output = Capital of France
2. output = Capital of France is
2. output = Capital of France Paris

```
query = A chat between a curious user and an artificial intelligence assistant.
        The assistant gives helpful, detailed, and polite answers to the user's questions.
        USER: My name is Aniket
        ASSISTANT:
```

In [None]:
longchat_template = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: {input}
ASSISTANT:"""

In [None]:
print(longchat_template.format(input="My name is Aniket"))

In [None]:
print(longchat_template.format(input="What is the capital of France?"))

In [None]:
from llm_inference import LLMInference, prepare_weights
from rich import print

In [None]:
# path = str(prepare_weights("meta-llama/Llama-2-7b-chat-hf"))
# model = LLMInference(checkpoint_dir=path, quantize="bnb.nf4")

In [None]:
path = str(prepare_weights("lmsys/longchat-7b-16k"))
model = LLMInference(checkpoint_dir=path, quantize="bnb.nf4")

In [None]:
longchat_template = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: {input}
ASSISTANT:"""

In [None]:
output = model.chat(longchat_template.format(input="What is the capital of France?"))
print(output)

In [None]:
output = model.chat(longchat_template.format(input="My name is Aniket"))
print(output)

In [None]:
output = model.chat(longchat_template.format(input="Write a poem on Lightning AI"))
print(output)

## Memory

In [None]:
output = model.chat(longchat_template.format(input="My name is Aniket?"))
print(output)


output = model.chat(longchat_template.format(input="What is my name?"))
print(output)

In [None]:
longchat_template = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions.
Context:
User: My name is Aniket
Assistant: Hi, Aniket how are you?

USER: {input}
ASSISTANT:"""

In [None]:
longchat_template = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
Context:
USER: My name is Aniket!
ASSISTANT: How can I help you Aniket?
USER: {input}
ASSISTANT:"""

output = model.chat(longchat_template.format(input="What is my name?"))
print(output)

In [None]:
longchat_template = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
Context:
{history}
USER: {input}
ASSISTANT:"""

history ="USER: Hi, I am Aniket!\nAssistant: How can I help you Aniket?"


output = model.chat(longchat_template.format(input="What is my name?", history=history))
print(output)

[PromptTemplate doc](https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/)

In [None]:
from langchain.prompts import PromptTemplate

longchat_template = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions.
Context:
{history}
USER: {input}
ASSISTANT:"""

longchat_prompt_template = PromptTemplate(
    input_variables=["input", "history"], template=longchat_template
)

In [None]:
print(longchat_prompt_template.format(
    input = "What is my name?",
    history ="USER: Hi, I am Aniket!\nAssistant: How can I help you Aniket?"
))

In [None]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferWindowMemory

from llm_chain import LitGPTLLM


llm = LitGPTLLM(model=model)


conversation = ConversationChain(
    llm=llm,
    prompt=longchat_prompt_template,
    verbose=False,
    memory=ConversationBufferWindowMemory(ai_prefix="Assistant", human_prefix="User", k=2),
)

In [None]:
conversation("hi, I am Aniket")["response"]

In [None]:
conversation("What is my name?")["response"]

In [None]:
conversation("What is the timezone of London?")["response"]

In [None]:
print(conversation.memory.chat_memory)

In [None]:
from langchain.memory import ConversationBufferMemory

conversation = ConversationChain(
    llm=llm,
    prompt=longchat_prompt_template,
    verbose=False,
    memory=ConversationBufferMemory(ai_prefix="Assistant", human_prefix="User"),
)

In [None]:
output = conversation(
    "PyTorch Lightning is an open-source library developed by Lightning AI team."
)["response"]
print(output)

In [None]:
output = conversation(
    "who developed PyTorch Lightning? just give me the name of the team or person and nothing else."
)["response"]
print(output)

* https://twitter.com/yanndubs/status/1681644889145237504?s=20