In [1]:
import os
from huggingface_hub import InferenceClient
from rich.console import Console # For better display (optional in notebook, but good practice)
from rich.panel import Panel
from rich.text import Text

console = Console()

# Get Hugging Face token from environment variable
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    console.print(Panel(
        Text("HF_TOKEN environment variable is not set. Please set it in your terminal before launching Jupyter.",
             justify="center", style="bold yellow"),
        title="[red]Authentication Required[/red]", border_style="red"
    ))
    # You might want to exit or raise an error if token is crucial
    raise ValueError("HF_TOKEN environment variable not set.")

# Initialize Hugging Face Inference Client
# Using a model suitable for "chat_completion" as it's generally better for instruction following
# Zephyr-7B-beta is a good, freely available option on HF Inference API
LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta"
llm_client = InferenceClient(model=LLM_MODEL, token=HF_TOKEN)

console.print(f"[bold green]LLM Client initialized for model: {LLM_MODEL}[/bold green]")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# --- Configuration for the Sweep ---
temperatures = [0.0, 0.4, 0.8, 1.0] # Added 1.0 for more variety
prompt = "Write a two-line haiku about Dallas in spring."

# List to store outputs for review
outputs = []

console.print(f"[bold blue]Running temperature sweep for prompt:[/bold blue] '[italic]{prompt}[/italic]'\n")

for temp in temperatures:
    console.print(f"\n[bold yellow]--- Generating with Temperature: {temp} ---[/bold yellow]")
    
    # Construct messages for chat_completion
    messages = [
        {"role": "system", "content": "You are a poetic assistant. Generate creative and concise responses."},
        {"role": "user", "content": prompt}
    ]

    try:
        # Use chat_completion for Zephyr model
        response = llm_client.chat_completion(
            messages=messages,
            temperature=temp, # Apply the current temperature
            max_tokens=50, # Keep response concise for haiku # Keep response concise for haiku
            stop=["\n\n"] # Often helps prevent running off too long
        )
        
        generated_text = ""
        if response and response.choices and response.choices[0] and response.choices[0].message:
            generated_text = response.choices[0].message.content
        
        console.print(Panel(
            Text(generated_text, style="cyan"),
            title=f"[cyan]Output (Temp: {temp})[/cyan]",
            border_style="cyan"
        ))
        
        # Store for later review
        outputs.append({"temperature": temp, "output": generated_text})

    except Exception as e:
        console.print(f"[bold red]Error at Temperature {temp}: {e}[/bold red]")
        outputs.append({"temperature": temp, "output": f"Error: {e}"})

console.print("\n[bold green]Temperature sweep complete.[/bold green]")

# You can print all outputs at the end if you want to see them summarized
# for item in outputs:
#     print(f"Temp: {item['temperature']}\nOutput:\n{item['output']}\n---")

## Preferred Haiku Output

After reviewing the generated haikus for different temperatures, I found the output at **Temperature 0.X** (replace 0.X with the actual temperature you liked best) to be my preferred one.

**Reasoning:**
* (Type your explanation here. For example: "At 0.0, the haiku was too literal and lacked poetic flair." or "At 0.8, the generated text became too random and didn't fit the haiku structure." or "Temperature 0.4 provided the best balance, offering a touch of creativity while still adhering to the haiku's essence.")