In [7]:
# # Use a pipeline as a high-level helper
# from transformers import pipeline

# pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")

from transformers import pipeline


# Load the correct LLaMA model from Hugging Face
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")

# Use the pipeline
output = pipe("You are a QNA assistant. \n\nExplain the theory of relativity in simple terms. in less than 500 words")
print(output)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


ValueError: Input length of input_ids is 26, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [9]:
import kagglehub

# Download latest version
path = kagglehub.model_download("metaresearch/llama-3.2/transformers/1b")

print("Path to model files:", path)

KaggleApiHTTPError: 403 Client Error.

You don't have permission to access resource at URL: https://www.kaggle.com/models/metaresearch/llama-3.2/transformers/1b/1
Please make sure you are authenticated if you are trying to access a private resource or a resource requiring consent.

In [None]:
pip install kagglehub

In [None]:
import os
import tarfile
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

def load_llama_model(models_dir, model_tar_filename, pipeline_task="text-generation"):
    """
    Load a LLaMA model from a tar.gz file, extract it, and create a Hugging Face pipeline.

    Args:
        models_dir (str): Path to the directory containing the model tar.gz file.
        model_tar_filename (str): Name of the tar.gz file (e.g., 'llama-1b.tar.gz').
        pipeline_task (str): Task for the Hugging Face pipeline (default: 'text-generation').

    Returns:
        pipeline: Hugging Face pipeline for the specified task.
    """
    # Path to the tar.gz file
    tar_path = os.path.join(models_dir, model_tar_filename)
    # Path to extract the model
    extract_path = os.path.join(models_dir, os.path.splitext(model_tar_filename)[0])
    
    # Extract the tar.gz file if not already extracted
    if not os.path.exists(extract_path):
        print(f"Extracting {tar_path}...")
        with tarfile.open(tar_path, "r:gz") as tar:
            tar.extractall(path=extract_path)
        print(f"Model extracted to {extract_path}")
    else:
        print(f"Model already extracted at {extract_path}")
    
    # Load tokenizer and model
    print("Loading tokenizer and model...")
    tokenizer = AutoTokenizer.from_pretrained(extract_path)
    model = AutoModelForCausalLM.from_pretrained(extract_path)
    print("Tokenizer and model loaded successfully!")

    # Create and return the pipeline
    print(f"Creating Hugging Face pipeline for task: {pipeline_task}...")
    hf_pipeline = pipeline(task=pipeline_task, model=model, tokenizer=tokenizer)
    print("Pipeline created successfully!")
    
    return hf_pipeline

# Example usage:
models_directory = "./models"  # Directory containing your model tar.gz file
model_filename = "llama-3.2-transformers-1b-v1.tar.gz"  # Replace with your tar.gz filename

llama_pipeline = load_llama_model(models_directory, model_filename)

# Test the pipeline
output = llama_pipeline("Once upon a time", max_length=200)
print(output)


Model already extracted at ./models\llama-3.2-transformers-1b-v1.tar
Loading tokenizer and model...


In [7]:
import os
import tarfile
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load environment variables with default fallback values
MODELS_DIR = os.getenv("MODELS_DIR", "./models")  # Default to './models' if not set
PIPELINE_TASK = os.getenv("PIPELINE_TASK", "text-generation")  # Default to 'text-generation'


def extract_model_tar_if_needed(model_tar_filename, model_name=None):
    """
    Extracts a tar.gz file if the model directory doesn't already exist.

    Args:
        model_tar_filename (str): Name of the tar.gz file (e.g., 'llama-1b.tar.gz').
        model_name (str, optional): Custom name for the extracted directory. Defaults to None.

    Returns:
        str: Path to the extracted model directory.
    """
    # Derive the extraction path
    if model_name is None:
        model_name = model_tar_filename.replace(".tar.gz", "")
    extract_path = os.path.join(MODELS_DIR, model_name)

    # Check if the model directory already exists
    if os.path.exists(extract_path):
        print(f"Model directory already exists at {extract_path}. Skipping extraction.")
    else:
        print(f"Extracting {model_tar_filename}...")
        tar_path = os.path.join(MODELS_DIR, model_tar_filename)
        with tarfile.open(tar_path, "r:gz") as tar:
            tar.extractall(path=extract_path)
        print(f"Model extracted to {extract_path}")
    
    return extract_path


def load_model_and_pipeline(extract_path):
    """
    Loads a Hugging Face model and tokenizer from an extracted directory and creates a pipeline.

    Args:
        extract_path (str): Path to the extracted model directory.

    Returns:
        pipeline: Hugging Face pipeline for the specified task.
    """
    print("Loading tokenizer and model...")
    tokenizer = AutoTokenizer.from_pretrained(extract_path)
    model = AutoModelForCausalLM.from_pretrained(extract_path)
    print("Tokenizer and model loaded successfully!")

    # Create and return the pipeline
    print(f"Creating Hugging Face pipeline for task: {PIPELINE_TASK}...")
    hf_pipeline = pipeline(task=PIPELINE_TASK, model=model, tokenizer=tokenizer, chat_template = True)
    print("Pipeline created successfully!")
    
    return hf_pipeline


# Wrapper function
def load_llama_model(model_tar_filename, model_name=None):
    """
    End-to-end function to extract a LLaMA model from tar.gz and load it into a pipeline.

    Args:
        model_tar_filename (str): Name of the tar.gz file (e.g., 'llama-1b.tar.gz').
        model_name (str, optional): Custom name for the extracted directory. Defaults to None.

    Returns:
        pipeline: Hugging Face pipeline for the specified task.
    """
    # Extract the model if needed
    extract_path = extract_model_tar_if_needed(model_tar_filename, model_name)
    # Load the model and create a pipeline
    return load_model_and_pipeline(extract_path)


# Example usage
model_filename = "llama-3.2-transformers-1b-v1.tar.gz"  # Replace with your tar.gz filename
custom_model_name = "llama-1b-model"  # Optional custom name for the extracted model

llama_pipeline = load_llama_model(model_filename, model_name=custom_model_name)


message = [
            {"role": "user", "content": "Explain the concept of gravity in simple terms."},
           {"role": "system", "content": "Gravity is a force that pulls objects towards each other based on their mass."}
          ]

# Test the pipeline
output = llama_pipeline(message, max_length=300)
print(output)


Model directory already exists at ./models\llama-1b-model. Skipping extraction.
Loading tokenizer and model...
Tokenizer and model loaded successfully!
Creating Hugging Face pipeline for task: text-generation...
Pipeline created successfully!


ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [5]:
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
import os

load_dotenv()

huggingface_api_key = os.getenv('Huggingface_api_key')




client = InferenceClient(api_key=huggingface_api_key)

messages = [
	{ "role": "user", "content": "give me 5 fruits\n" },
]

completion = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct", 
	messages=messages, 
	temperature=0.5,
	max_tokens=2048,
	top_p=0.7,
)


print(completion.choices[0].message.content)


Here are 5 fruits:

1. Apple
2. Banana
3. Mango
4. Strawberry
5. Pineapple


In [2]:
stream

<generator object _stream_chat_completion_response at 0x00000239F8046CE0>