In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Path to the directory where the model is stored locally
model_name = "./gpt2-large"

# Load the tokenizer and model from the local directory
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Example: Text Generation
input_text = "Once upon a time in a faraway land,"
inputs = tokenizer(input_text, return_tensors="pt")

# Generate text
outputs = model.generate(
    inputs.input_ids,
    max_length=50,          # Maximum length of the generated text
    num_return_sequences=1, # Number of sequences to generate
    no_repeat_ngram_size=2, # Avoid repeating the same n-grams
    top_k=50,               # Limit the number of next words to consider
    top_p=0.95,             # Use nucleus sampling
    temperature=0.7,        # Control the randomness of predictions
    do_sample=True,         # Enable sampling to use top_p and temperature
    pad_token_id=tokenizer.eos_token_id,  # Set pad token to eos token
)

# Decode the generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the generated text
print("Generated Text:\n", generated_text)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Text:
 Once upon a time in a faraway land, a man named James was a well-known and respected member of society. He was well off and well known for his skill with the sword. One day, he was approached by a group of mysterious
