In [None]:
This notebook is for inference, both the causal and fine-tuned models.

## Base model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_path = "./fineweb-gpt2-final"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

In [None]:
input_text = "Once upon a time"

input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

output_ids = model.generate(
    input_ids,
    max_new_tokens=20,
    do_sample=True,
    temperature=0.8,
    top_k=50,
    top_p=0.95
)

output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(output_text)

## Instruct model

In [None]:
FINAL_MODEL_PATH = './gpt2-instruct'

In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model=FINAL_MODEL_PATH, tokenizer=FINAL_MODEL_PATH, device=0 if torch.cuda.is_available() else -1)


In [None]:
messages = [
   {"role": "user", "content": "Define the job accounting"},
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=200, num_return_sequences=1,eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, repetition_penalty=1.1)
print(outputs[0]["generated_text"])