In [1]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser # For parsing JSON output

# Load environment variables from a .env file. 🌍
# This securely loads your Hugging Face API token or other credentials.
load_dotenv()

True

In [2]:
# Define the HuggingFaceEndpoint model. 🤖
# This connects to 'google/gemma-2-2b-it' via the Hugging Face Inference API.
# Gemma 2B is a relatively small, instruction-tuned model.
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation" # Standard text generation task
)

In [3]:
# Wrap the HuggingFaceEndpoint with ChatHuggingFace. 💬
# This makes the Hugging Face model compatible with LangChain's chat interfaces.
model = ChatHuggingFace(llm=llm)

In [4]:
# Initialize a JsonOutputParser. ⚙️
# This parser expects the LLM's raw text output to be a valid JSON string
# and will attempt to convert it into a Python dictionary.
parser = JsonOutputParser()

In [5]:
# Define the prompt template. ✍️
# It asks the LLM to give 5 facts about a topic.
# `format_instruction` will inject text instructions from the parser,
# telling the LLM to output its response as a JSON object (e.g., {"fact_1": "...", ...}).
template = PromptTemplate(
    template='Give me 5 facts about {topic} \n {format_instruction}',
    input_variables=['topic'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
)

In [6]:
# Create a LangChain Expression Language (LCEL) chain. 🔗
# 1. `template`: Generates the full prompt string, including JSON formatting instructions.
# 2. `model`: Sends the prompt to the Gemma 2B model. The model will try to generate
#    text that looks like JSON, but its success in perfectly formatting it is limited.
# 3. `parser`: Receives the raw text output from the model. It then attempts to parse
#    this text as a JSON string into a Python dictionary. If the text isn't valid JSON,
#    this step will likely raise an error (e.g., `json.decoder.JSONDecodeError`).
chain = template | model | parser

In [7]:
# Invoke the chain with the topic 'black hole'. 🚀
# This executes the full chain.
result = chain.invoke({'topic':'black hole'})

# Print the result. 📊
# If the JSON parsing is successful (which is a big 'if' for this model),
# `result` will be a Python dictionary containing the extracted facts.
# Otherwise, you'll likely see an error message indicating a parsing failure.
print(result)

[{'fact': 'Black holes are formed when massive stars collapse at the end of their lives.', 'explanation': 'When a star much larger than our Sun runs out of fuel, it cannot support itself against its own gravity. This leads to a supernova and the formation of a black hole: a region where gravity is so strong that nothing, not even light, can escape.'}, {'fact': "Black holes have a 'singularity' at their center", 'explanation': "The singularity is a point of infinite density where all the mass of the black hole is concentrated. Our laws of physics break down at this point, and we can't fully describe what it is like."}, {'fact': "It's impossible to see a black hole directly.", 'explanation': 'The lack of visible light from black holes arises from their intense gravitational pull, which traps and prevents any light from escaping.  However, astronomers can detect them by observing their effects on surrounding matter.'}, {'fact': 'Black holes can have different sizes, ranging from stellar-m