In [1]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
HUGGINGFACE_API_KEY = os.environ.get('API_KEY')

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_id = 'ibm-granite/granite-8b-code-base-128k'

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir="./models/")

In [5]:
torch.cuda.empty_cache()

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, cache_dir="./models/")
model.eval()

In [7]:
# Change input text
input_text = "def fibonacci():"

In [8]:
# Tokenize the text
input_tokens = tokenizer(input_text, return_tensors="pt") # Return set for PyTorch

In [9]:
# Transfer tokenized inputs to the device
for i in input_tokens:
    input_tokens[i] = input_tokens[i].to(device)

In [None]:
# Generate output tokens with a specified maximum number of new tokens
output = model.generate(
    **input_tokens,
    max_new_tokens=50,  # Limit the number of tokens to generate
    do_sample=False,    # Use greedy decoding for faster results
    num_beams=1,        # Beam search size; 1 means no beam search
    early_stopping=True # Stop early when possible
)

In [None]:
# Decode tokens into text
output = tokenizer.batch_decode(output)

In [None]:
# Loop over the batch to print, in this example the batch size is 1
for i in output:
    print(i)