In [None]:
# Install the required Python libraries quietly (without showing detailed logs):
# - transformers: for working with state-of-the-art NLP models,
# - accelerate: for optimizing and managing model training/inference across devices,
# - torch: PyTorch, the main deep learning framework used here.
# The exclamation mark (!) allows running this shell command inside the notebook.
!pip install -q transformers accelerate torch


In [None]:
# Import the login function from the huggingface_hub library,
# which is used to authenticate your Hugging Face account.
from huggingface_hub import login

# Run the login function to sign in.
# When prompted, paste your Hugging Face access token.
# This allows access to private models and resources on Hugging Face.
login()  # Paste your HF token here when prompted


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Import the necessary classes from the transformers library to handle tokenization and model loading
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Specify the model ID to use.
# Options include Meta LLaMA 3 8B Instruct, Mistral 7B Instruct, or Google's Gemma 7B IT.
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"  # You can switch to other models by changing this string

# Load the tokenizer associated with the chosen model.
# Tokenizer converts input text into tokens that the model understands.
# The 'use_auth_token=True' parameter allows access to private or gated models (requires login).
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)

# Load the causal language model for text generation.
# 'device_map="auto"' assigns model parts automatically to available GPUs/CPUs.
# 'torch_dtype="auto"' automatically chooses the best data precision.
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")

# Create a pipeline for text generation that combines the model and tokenizer.
# This pipeline simplifies generating text based on input prompts.
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)


In [None]:
# Define the input prompt for the model to generate text about New York
prompt = "Describe about New York"

# Use the text generation pipeline to create a continuation or response based on the prompt
# max_new_tokens=500 limits the generated output to 500 tokens maximum
# do_sample=True enables sampling, which makes output more creative/random instead of deterministic
# temperature=0.7 controls randomness — lower values make output more focused, higher values more diverse
outputs = generator(prompt, max_new_tokens=500, do_sample=True, temperature=0.7)

# Print the generated text from the first (and usually only) result
print(outputs[0]["generated_text"])
