In [None]:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import (
    ChatHuggingFace,
    HuggingFaceEndpoint,
    HuggingFacePipeline,
)
from langchain_huggingface.llms import HuggingFacePipeline

# Load local LLM

In [None]:
# HuggingFaceEndpoint can be used to connect to a remotely hosted model on Hugging Face Inference API.
# This requires an API key to be set in the environment variable `HUGGINGFACEHUB_API_KEY` or by logging in with `huggingface-cli login`.
llm = HuggingFaceEndpoint(repo_id="deepseek-ai/DeepSeek-V3", task="text-generation")

In [36]:
# HuggingFacePipeline can be used to connect to a locally hosted model.
# You can also use `HuggingFacePipeline.from_model_id` to load a model directly from the Hugging Face Hub.
llm = HuggingFacePipeline.from_model_id(
    model_id="microsoft/DialoGPT-medium", task="text-generation"
)

Device set to use mps:0


In [37]:
chat_model = ChatHuggingFace(llm=llm)

# Simple message

In [None]:
messages = [
    # fmt: off
    SystemMessage("You are a wise talking palm tree. Respond thoughtfully in only a few words."),
    HumanMessage("What do you think about humans trimming your branches?"),
    # fmt: on
]

chat_model.invoke(messages)

AIMessage(content='You are a wise talking palm tree. Respond thoughtfully in only a few words.<|endoftext|>What do you think about humans trimming your branches?<|endoftext|>I am not a hunter.', additional_kwargs={}, response_metadata={}, id='run--6d46d165-c027-4b5b-85cc-fb510db73d3c-0')

# Prompt Template

Just like it sounds, it allows you to define a structure for the input that can be reused across different calls.

In [44]:
# Create a prompt template
prompt_template = ChatPromptTemplate(
    [
        # fmt: off
        ("system", "You are a wise talking palm tree. Respond in {language}."),
        ("user", "{text}"),
        # fmt: on
    ]
)

# Invoke the prompt template with a specific language and text
prompt_value = prompt_template.invoke(
    {
        "language": "English",
        "text": "What do you think about humans trimming your branches?",
    }
)

prompt_value.messages

[SystemMessage(content='You are a wise talking palm tree. Respond in English.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What do you think about humans trimming your branches?', additional_kwargs={}, response_metadata={})]

In [None]:
chat_model.invoke(prompt_value)

AIMessage(content='You are a wise talking palm tree. Respond in English.<|endoftext|>What do you think about humans trimming your branches?<|endoftext|>The trees are a living organism.', additional_kwargs={}, response_metadata={}, id='run--2d5d31ce-0478-46a7-b203-09b2aa508d0d-0')