In [6]:
# Get apikey
import dotenv
import os

dotenv.load_dotenv()
apikey = os.getenv("AI-API-KEY")
model_sel = os.getenv("MODEL")
url = os.getenv("BASE-URL")

In [7]:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai.chat_models import ChatOpenAI

In [8]:
model = ChatOpenAI(
    base_url=url,
    api_key=apikey,
    model=model_sel
)

### Messages Types

In [9]:
sys_message = SystemMessage('''You are a helpful assistant that responds to questions with three 
        exclamation marks.'''
)
human_message = HumanMessage('What is the capital of France?')

model.invoke([sys_message, human_message])

AIMessage(content='Paris!!!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 3, 'prompt_tokens': 30, 'total_tokens': 33, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'mistralai/mistral-small-3.2-24b-instruct:free', 'system_fingerprint': None, 'id': 'gen-1753275965-tpI1U61K81bKtNDHg7Iv', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--eb1fd8f3-0578-4940-97bc-9b68f5d44360-0', usage_metadata={'input_tokens': 30, 'output_tokens': 3, 'total_tokens': 33, 'input_token_details': {}, 'output_token_details': {}})

### Reusable Prompts (Templates)

In [10]:
from langchain_core.prompts import PromptTemplate

template = PromptTemplate.from_template("""Answer the question based on the
    context below. If the question cannot be answered using the information 
    provided, answer with "I don't know".

Context: {context}

Question: {question}

Answer: """)

template.invoke({
    "context": """The most recent advancements in NLP are being driven by Large 
        Language Models (LLMs). These models outperform their smaller 
        counterparts and have become invaluable for developers who are creating 
        applications with NLP capabilities. Developers can tap into these 
        models through Hugging Face's `transformers` library, or by utilizing 
        OpenAI and Cohere's offerings through the `openai` and `cohere` 
        libraries, respectively.""",
    "question": "Which model providers offer LLMs?"
})

StringPromptValue(text='Answer the question based on the\n    context below. If the question cannot be answered using the information \n    provided, answer with "I don\'t know".\n\nContext: The most recent advancements in NLP are being driven by Large \n        Language Models (LLMs). These models outperform their smaller \n        counterparts and have become invaluable for developers who are creating \n        applications with NLP capabilities. Developers can tap into these \n        models through Hugging Face\'s `transformers` library, or by utilizing \n        OpenAI and Cohere\'s offerings through the `openai` and `cohere` \n        libraries, respectively.\n\nQuestion: Which model providers offer LLMs?\n\nAnswer: ')

context and question are parameters, prompt is dynamic

In [12]:
template = PromptTemplate.from_template("""Answer the question based on the 
    context below. If the question cannot be answered using the information 
    provided, answer with "I don't know".

Context: {context}

Question: {question}

Answer: """)

model = ChatOpenAI(
    base_url=url,
    api_key=apikey,
    model=model_sel
)

# `prompt` and `completion` are the results of using template and model once

prompt = template.invoke({
    "context": """The most recent advancements in NLP are being driven by Large
        Language Models (LLMs). These models outperform their smaller 
        counterparts and have become invaluable for developers who are creating 
        applications with NLP capabilities. Developers can tap into these 
        models through Hugging Face's `transformers` library, or by utilizing 
        OpenAI and Cohere's offerings through the `openai` and `cohere` 
        libraries, respectively.""",
    "question": "Which model providers offer LLMs?"
})

completion = model.invoke(prompt)

In [13]:
completion.content

'The model providers that offer LLMs mentioned in the context are Hugging Face, OpenAI, and Cohere.'

### Prompt templates + Roles

In [15]:
from langchain_core.prompts import ChatPromptTemplate
template = ChatPromptTemplate.from_messages([
    ('system', '''Answer the question based on the context below. If the 
        question cannot be answered using the information provided, answer with 
        "I don\'t know".'''),
    ('human', 'Context: {context}'),
    ('human', 'Question: {question}'),
])

prompt = template.invoke({
    "context": """The most recent advancements in NLP are being driven by Large 
        Language Models (LLMs). These models outperform their smaller 
        counterparts and have become invaluable for developers who are creating 
        applications with NLP capabilities. Developers can tap into these 
        models through Hugging Face's `transformers` library, or by utilizing 
        OpenAI and Cohere's offerings through the `openai` and `cohere` 
        libraries, respectively.""",
    "question": "Which model providers offer LLMs?"
})

In [16]:
prompt

ChatPromptValue(messages=[SystemMessage(content='Answer the question based on the context below. If the \n        question cannot be answered using the information provided, answer with \n        "I don\'t know".', additional_kwargs={}, response_metadata={}), HumanMessage(content="Context: The most recent advancements in NLP are being driven by Large \n        Language Models (LLMs). These models outperform their smaller \n        counterparts and have become invaluable for developers who are creating \n        applications with NLP capabilities. Developers can tap into these \n        models through Hugging Face's `transformers` library, or by utilizing \n        OpenAI and Cohere's offerings through the `openai` and `cohere` \n        libraries, respectively.", additional_kwargs={}, response_metadata={}), HumanMessage(content='Question: Which model providers offer LLMs?', additional_kwargs={}, response_metadata={})])

In [17]:
model.invoke(prompt)

AIMessage(content='Based on the context provided, the model providers that offer LLMs are Hugging Face, OpenAI, and Cohere.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 152, 'total_tokens': 178, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'mistralai/mistral-small-3.2-24b-instruct:free', 'system_fingerprint': None, 'id': 'gen-1753276766-Tpr2wO0eGw1euikVYDHE', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--52858b6b-7f92-40b5-8311-41ff83eae63c-0', usage_metadata={'input_tokens': 152, 'output_tokens': 26, 'total_tokens': 178, 'input_token_details': {}, 'output_token_details': {}})

### Structured Outputs

In [19]:
#Pydantic
from pydantic import BaseModel

class AnswerWithJustification(BaseModel):
    '''An answer to the user's question along with justification for the 
        answer.'''
    answer: str
    '''The answer to the user's question'''
    justification: str
    '''Justification for the answer'''

In [20]:
structured_llm = model.with_structured_output(AnswerWithJustification)

response = structured_llm.invoke("""What weighs more, a pound of bricks or a pound 
    of feathers""")

In [29]:
dict(response)

{'answer': 'A pound of bricks and a pound of feathers weigh the same---one pound. The difference is in the volume they occupy. Bricks are much denser than feathers, so a pound of bricks takes up less space than a pound of feathers.',
 'justification': 'Both objects are measured in pounds, which is a unit of weight. Therefore, by definition, they weigh the same, even though they have very different densities and volumes.'}

### Testing with Ollama

In [30]:
from langchain_ollama import ChatOllama

ollama_model = ChatOllama(
    base_url='http://127.0.0.1:11434',
    model="deepseek-r1:1.5b"
)

ollama_structured = ollama_model.with_structured_output(AnswerWithJustification)
response = ollama_structured.invoke("""What weighs more, a pound of bricks or a pound 
    of feathers""")

dict(response)

{'answer': 'Both weigh the same. The weight is not dependent on the volume or material.',
 'justification': 'In both cases, one pound of mass corresponds to approximately 450 grams of matter. The difference lies in their density and composition; feathers are much lighter per unit volume compared to bricks made of heavier materials like brick or concrete.'}

## Other Formats

In [31]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
parser = CommaSeparatedListOutputParser()
items = parser.invoke("apple, banana, cherry")
items

['apple', 'banana', 'cherry']

In [36]:
print("Only Hi")
completion = ollama_model.invoke('Hi there!') 
print(completion.content)
print()
print()
print("Hi and Bye Lists")
completions = ollama_model.batch(['Hi there!', 'Bye!'])
print(completions[0].content)
print(completions[1].content)
print()
print()
print("Stream")
for token in ollama_model.stream('Bye!'):
    print(token.content, end=' ', flush=True)

Only Hi
<think>

</think>

Hello! How can I assist you today? 😊


Hi and Bye Lists
<think>

</think>

Hello! How can I assist you today? 😊
<think>

</think>

Hey! How's it going? Anything on your mind or need help with anything? 😊


Stream
<think> 

 </think> 

 Hey !  👋  How 's  it  going ?  Anything  on  your  mind  or  need  help  with  anything ?  

## Imperative vs Declarative


In [37]:
# Imperative
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import chain

# the building blocks

template = ChatPromptTemplate.from_messages([
    ('system', 'You are a helpful assistant.'),
    ('human', '{question}'),
])

model = ChatOpenAI(
    base_url=url,
    api_key=apikey,
    model=model_sel
)

# combine them in a function
# @chain decorator adds the same Runnable interface for any function you write

@chain
def chatbot(values):
    prompt = template.invoke(values)
    return model.invoke(prompt)

# use it

chatbot.invoke({"question": "Which model providers offer LLMs?"})

AIMessage(content='There are several providers that offer Large Language Models (LLMs). Here are some of the notable ones:\n\n1. **OpenAI**: They are known for models like GPT-3, GPT-3.5, and GPT-4.\n\n2. **Google (DeepMind)**: Google offers models like PaLM (Pathways Language Model) and LaMDA (Language Model for Dialogue Applications).\n\n3. **Microsoft**: Microsoft has partnered with OpenAI and also offers models like the one powering Bing Chat.\n\n4. **Meta (formerly Facebook)**: Meta has released LLaMA (Large Language Model Meta AI), a foundational large language model designed to help researchers advance their work in the field of AI.\n\n5. **Anthropic**: They have developed models like Claude, which is designed to be helpful, honest, and harmless.\n\n6. **Mistral AI**: They offer models like Mixtral 8x7B and Mixtral 8x22B.\n\n7. **Cohere**: They provide models that are optimized for specific use cases and industries.\n\n8. **NVIDIA**: NVIDIA offers models like NeMo, which is desi

In [39]:
## Streaming
@chain
def chatbot(values):
    prompt = template.invoke(values)
    for token in model.stream(prompt):
        yield token

for part in chatbot.stream({
    "question": "Which model providers offer LLMs?"
}):
    print(part)

content='' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content='Several' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' companies' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' and' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' organizations' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' offer' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' large' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' language' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' models' additional_kwargs={} response_metadata={} id='run--8f992055-0a3e-495d-8f30-ca8e769ec702'
content=' (' additional_kwargs={} res

In [None]:
# Declarative
chatbot = template | model
chatbot.invoke({"question": "Which model providers offer LLMs?"})
##Same with stream

AIMessage(content="There are several providers that offer large language models (LLMs). Here are some notable ones:\n\n1. **OpenAI**: Known for models like GPT-3, GPT-3.5, and GPT-4.\n2. **Anthropic**: Offers models like Claude.\n3. **Google (DeepMind)**:** Provides models such as PaLM and LaMDA.\n4. **Meta (formerly Facebook)**: Known for LLaMA(Large Language Model Meta AI) and OPT (Open Pre-trained Transformers).\n5. **Microsoft**: Offers models like those based on OpenAI's technology, integrated into Azure.\n6. **AWS (Amazon Web Services)**: Provides Amazon Bedrock with models like Anthropic's Claude and AI21 Labs' Jurassic.\n7. **Mistral AI**: Known for models such as Mistral Large and Mixtral 8x7B.\n8. **AI21 Labs**: Provides models like Jurassic.\n9. **NVIDIA**: Offers models such as Megatron-Turing NLG.\n10. **Hugging Face**: Provides a wide range of open-source models and tools for working with LLMs.\n\nThese providers offer a mix of open-source and proprietary models, catering