In [1]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv  

In [2]:
# Load environment variables from a .env file.
# This practice helps in securely managing API keys and other sensitive information
# by keeping them out of the source code.
load_dotenv()

True

In [3]:
# Initialize the HuggingFaceEndpoint.
# This component allows us to connect to models hosted on the Hugging Face Hub.
# - 'repo_id': Specifies the exact model repository on Hugging Face Hub to use.
#              "TinyLlama/TinyLlama-1.1B-Chat-v1.0" is a small, efficient chat model.
# - 'task': Defines the task the model is intended for, in this case, "text-generation".
llm = HuggingFaceEndpoint(
    repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
### HF added featherless.ai as inference provider, I will research and update this code later.
# Wrap the HuggingFaceEndpoint model with ChatHuggingFace.
# ChatHuggingFace adapts the raw HuggingFace model (llm) to work with
# LangChain's chat-oriented interfaces, making it easier to send and receive
# chat messages.
# model = ChatHuggingFace(llm=llm)

In [5]:
# Invoke the chat model with a prompt.
# We are asking a factual question: "What is the capital of France".
# The 'invoke' method sends this prompt to the underlying Hugging Face model
# via the ChatHuggingFace wrapper and awaits its response.
# Since featherless.ai if inference provider, so we may use featherless.ai API key to make it work.
# result = model.invoke("What is the capital of France")

In [6]:
# Print the content of the model's response.
# For chat models, the actual text response is typically contained within
# the 'content' attribute of the returned message object.
# print(result.content)