In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

In [18]:
model_name = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name)

print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
print(f"Is MPS available? {torch.backends.mps.is_available()}")
   
# check if GPU is available or not, if it is mount model on GPU
# Set the device
device = "mps" if torch.backends.mps.is_available() else "cpu"
device = torch.device(device)
print(f"Using device: {device}")

model.to(device)

Is MPS (Metal Performance Shader) built? True
Is MPS available? True
Using device: mps


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

In [19]:
#using tokenzier.encode/decode() and model.generate()
def generate_response(input):

    tokenized_input = tokenizer.encode(input, return_tensors="pt")
    
  # Create an attention mask
    attention_mask = torch.ones(tokenized_input.shape)

    tokenized_output = model.generate(
        tokenized_input,
        max_length=150,
        num_return_sequences=1,
        attention_mask=attention_mask,  # Pass the attention mask
        pad_token_id=tokenizer.eos_token_id  # Set pad_token_id
    )
    
    return tokenizer.decode(tokenized_output[0])
    
def generate_response_using_pipeline(input):
    generator = pipeline("text-generation", model="gpt2")
    response = generator(input, max_length=100, num_return_sequences=1)
    return response[0]["generated_text"]

In [21]:
print(generate_response_using_pipeline("Please respond to the following succinctly in one sentence without repeating yourself - this isn't a question to be answered: what day is it tomorrow?"))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Please respond to the following succinctly in one sentence without repeating yourself - this isn't a question to be answered: what day is it tomorrow?

The first day of your internship (it's the first year, if you make your job choice first). This is what your goal is.

How quickly do you decide that it's time to go? How long do you need to spend on your job search before it finishes?

What is the first day each month to get into
