# ChatOpenAI

In [None]:
# Run the line of code below to check the version of langchain in the current environment.
# Substitute "langchain" with any other package name to check their version.

In [8]:
# (Keep your imports and .env loading if you plan to use other services,
# but it's not strictly needed for a local Hugging Face model if no API key is involved)

# pip install transformers torch
# Make sure to run the above in your environment if you haven't already.

from transformers import pipeline, set_seed

# For reproducibility with Hugging Face pipelines
set_seed(365) # Same seed as you used before

# Load a text generation pipeline with a free, open-source model
# Using distilgpt2 as an example. You can try other models from the Hugging Face Hub.
# Popular options: 'gpt2', 'EleutherAI/gpt-neo-1.3B' (larger, more capable, needs more resources)
# For very small models, you might explore TinyLlama or similar.
try:
    # Using a more specific task for the pipeline
    # For something like name generation, 'text-generation' is appropriate.
    # Some models are instruction-tuned, others are better at completing text.
    # distilgpt2 is more of a text completer.
    generator = pipeline('text-generation', model='distilgpt2')
except Exception as e:
    print(f"Error loading the model or pipeline: {e}")
    print("Make sure you have an internet connection for the first download.")
    print("You might also need to install PyTorch: pip install torch")
    generator = None # Set to None if loading fails

if generator:
    prompt = "I've recently adopted a dog. Could you suggest some dog names? Here are a few: "
    # Note: Smaller models like distilgpt2 work best with more direct prompts or "few-shot" examples.
    # Giving it a starting pattern can help.

    # Parameters for generation:
    # max_length: Maximum number of tokens in the generated output (including prompt).
    # num_return_sequences: How many different suggestions to generate.
    # temperature: Controls randomness. Lower is more deterministic. 0.7-0.9 is common for creative tasks.
    # For more deterministic output with smaller models, you might keep it low or experiment.
    # top_k, top_p: Other sampling strategies.
    try:
        generated_responses = generator(
            prompt,
            max_length=100,       # Adjust as needed
            num_return_sequences=3, # Get a few options
            temperature=0.7,      # A bit of creativity
            do_sample=True        # Necessary for temperature, top_k, top_p to have an effect
        )

        print("Suggested Dog Names (and surrounding text from model):")
        for i, response in enumerate(generated_responses):
            print(f"\nSuggestion {i+1}:")
            # The output is a dictionary, and the generated text is in 'generated_text'
            print(response['generated_text'])

            # You might want to parse the actual names from the generated text.
            # This often requires some post-processing.
            # For example, splitting the text and looking for capitalized words after the prompt.

    except Exception as e:
        print(f"Error during text generation: {e}")
else:
    print("Text generation pipeline not loaded. Cannot proceed.")

# Example of how you might try to extract names (very basic):
# (This part is highly dependent on the model's output format)
# if generator and 'generated_responses' in locals():
#     print("\n--- Parsed Name Ideas (Example) ---")
#     for response in generated_responses:
#         # Get text after the initial prompt
#         text_after_prompt = response['generated_text'][len(prompt):].strip()
#         # Split by common delimiters (commas, newlines, spaces)
#         potential_names = [name.strip() for name in text_after_prompt.replace(',', ' ').replace('\n', ' ').split(' ') if name.strip() and name.strip()[0].isupper()]
#         print(potential_names[:5]) # Print first few potential names

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Suggested Dog Names (and surrounding text from model):

Suggestion 1:
I've recently adopted a dog. Could you suggest some dog names? Here are a few:   "Dawny"  
What is Dawny? Dawny is a small, bright, bright, green, red, white, purple, white, red, pink, white, blue, yellow, red, white, red, white, red, blue, white, blue, white, blue, yellow, yellow, white, red, white, red, white, yellow,

Suggestion 2:
I've recently adopted a dog. Could you suggest some dog names? Here are a few: ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~ - 

Suggestion 3:
I've recently adopted a dog. Could you suggest some dog names? Here are a few: _____
Cobra dog
Cat who was born without a collar

In [14]:
# (Original imports for dotenv if you're still using it for other purposes)
# %load_ext dotenv
# %dotenv

# Core libraries
import torch # Explicitly import torch to check its availability early
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed

# %pip install langchain_community
# Updated LangChain import for HuggingFace
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

# For reproducibility
SEED = 365
set_seed(SEED)

# --- Configuration ---
# MODEL_ID = "distilgpt2" # Sticking with distilgpt2 for now to debug
MODEL_ID = "gpt2" # Alternative to try if distilgpt2 remains problematic

# --- Utility Function for Clean Code ---
def create_llm_pipeline(model_id: str, max_new_tokens: int = 50, temperature: float = 0.7, do_sample: bool = True):
    """
    Creates a HuggingFace text generation pipeline and wraps it for LangChain.

    Args:
        model_id (str): The Hugging Face model ID.
        max_new_tokens (int): Maximum number of new tokens to generate.
        temperature (float): Controls randomness. 0.0 is deterministic.
        do_sample (bool): Whether to use sampling; must be True for temperature to have an effect.

    Returns:
        langchain_huggingface.HuggingFacePipeline: The LangChain LLM wrapper.
        None: If an error occurs during pipeline creation.
    """
    try:
        print(f"Attempting to load tokenizer for model: {model_id}")
        tokenizer = AutoTokenizer.from_pretrained(model_id)

        print(f"Attempting to load model: {model_id}")
        # Ensure pad_token_id is set if tokenizer doesn't have one (common for GPT-2 family)
        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id
            print(f"Set tokenizer.pad_token_id to tokenizer.eos_token_id ({tokenizer.eos_token_id})")

        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",  # Uses GPU if available (needs `accelerate` library)
            pad_token_id=tokenizer.pad_token_id # Explicitly pass pad_token_id
        )
        model.config.pad_token_id = model.config.eos_token_id # Another place to ensure it's set for generation

        print("Creating Hugging Face text-generation pipeline...")
        hf_pipeline = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=max_new_tokens,
            temperature=temperature if do_sample else None, # Temperature only if sampling
            do_sample=do_sample,
            # top_k=50, # Optional: Consider for sampling
            # top_p=0.95, # Optional: Consider for sampling
            # repetition_penalty=1.2 # Optional: To discourage repetition
        )
        
        print("Wrapping pipeline with LangChain's HuggingFacePipeline...")
        # Using the new import
        llm = HuggingFacePipeline(pipeline=hf_pipeline)
        return llm

    except ImportError as e:
        print(f"A required library is missing: {e}")
        print("Please ensure langchain-huggingface, transformers, torch, and accelerate are installed.")
        return None
    except Exception as e:
        print(f"An error occurred during pipeline creation for {model_id}: {e}")
        print("This could be due to model download issues, resource limitations, or configuration errors.")
        return None

In [None]:
# --- Main Logic ---
llm_instance = create_llm_pipeline(MODEL_ID, max_new_tokens=60, temperature=0.5, do_sample=True)

if llm_instance:
    # Let's try a simpler prompt first to see if basic generation works
    # prompt_text = "Once upon a time, in a land far away,"
    # If the simple prompt works, then we can try the original one again or a variation
    prompt_text = "Suggest three fun names for a newly adopted puppy: "

    print(f"\nUsing LangChain with model: {MODEL_ID}")
    print(f"Sending prompt: \"{prompt_text}\"")

    try:
        # Using .invoke for LangChain
        response = llm_instance.invoke(prompt_text)
        
        print("\n--- Response from LangChain wrapped HuggingFace model ---")
        # The response from HuggingFacePipeline is typically a string
        # Let's ensure we're handling potential encoding issues at the print stage as well,
        # though the primary fix should be in generation.
        if isinstance(response, str):
            print(response.encode('utf-8', 'ignore').decode('utf-8'))
        else:
            print(response)

    except Exception as e:
        print(f"An error occurred during LLM invocation: {e}")
else:
    print(f"LLM Pipeline for {MODEL_ID} could not be created. Cannot proceed.")

# --- Test Writing (Conceptual) ---
# def test_llm_output_not_garbled(response_text):
#    assert "???" not in response_text, "Output contains garbled characters."
#    # More sophisticated checks could involve regex for expected language characters.

# def test_llm_suggests_names(response_text):
#    # This is harder, as "names" is subjective, but we can check for patterns.
#    # For instance, check if there are capitalized words after the prompt.
#    pass

Attempting to load tokenizer for model: gpt2
Attempting to load model: gpt2
Set tokenizer.pad_token_id to tokenizer.eos_token_id (50256)
Creating Hugging Face text-generation pipeline...
Wrapping pipeline with LangChain's HuggingFacePipeline...

Using LangChain with model: gpt2
Sending prompt: "Suggest three fun names for a newly adopted puppy: "

--- Response from LangChain wrapped HuggingFace model ---
Suggest three fun names for a newly adopted puppy:  "Puppy" (pronounced "puppy-oo") and "Puppy-Pee" (pronounced "puppy-pee-ee-uh").  Puppy-Pee is a very cute dog that is very happy to be around
