# Tokenizers, LLMs and heads

In [None]:
from datasets import load_dataset, DatasetDict, Dataset

from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer, AutoModel, AutoModelForCausalLM)

from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np
import pandas as pd

## Tokenizers

Let's use the gpt2 model as an example

In [None]:
# Load the gpt2 tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Load the gpt2 model with the text generation head
model_with_task_head = AutoModelForCausalLM.from_pretrained("gpt2")

### Try out the loaded tokenizer

In [None]:
# Encoding can be done with encode method or via calling the tokenizer callable
input_text = "Hello, this is my test sentence."
encoded_input = tokenizer.encode(input_text)

print(encoded_input)
print(tokenizer(input_text))

# Decoding can be done with the decode method
print(tokenizer.decode(encoded_input))

### Try out the loaded model

In [None]:
input_text = "Hello, this is my test sentence and I want to continue it with"

# Inference can be done by calling .generate method of the model
model_output = model_with_task_head.generate(**tokenizer(input_text, return_tensors="pt"), max_new_tokens=20)
print("Model output: ", tokenizer.decode(model_output[0]))


### TODO
What is the gpt2 model predicting, when loaded with `AutoModelForCausalLM`?

Test the gpt2 model a bit more. What do you think about the quality of its output? 

### Let's see what the base model output is like

In [None]:
# Load the gpt2 base model without task specific head
base_model = AutoModel.from_pretrained("gpt2")

# Try calling the model. Note: We call the model directly with the input ids instead of the .generate method now.
input_text = "Hello, this is my test sentence and I want to continue it with"
base_model_output = base_model(**tokenizer(input_text, return_tensors="pt"))

base_model_output

### TODO
Explain to your pair the difference between the above `model_with_task_head` and `base_model`.

### TODO exploration
* Try out different tokenizer. Do you notice some differences? You can try for instance "gpt2", "distilbert-base-uncased" or "facebook/galactica-1.3b" and convert text to tokens and tokens back to text.
* How do you find gpt2 based models that are fine-tuned to different tasks? Try some of those out.