<a href="https://colab.research.google.com/github/himanshuraimau/hands-llm/blob/main/ch1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers>=4.40.1 accelerate>=0.27.2

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

In [None]:
# print the tokenizer vocab size
print(f"Vocab size: {tokenizer.vocab_size}")

In [None]:
# Test the tokenizer with some example words
words=["hello","world","aI","learning"]
for word in words:
  print(word,tokenizer.encode(word))

# test the tokenizer with a sentence
sentence="hello world aI learning"
print(sentence,tokenizer.encode(sentence))
print(sentence,tokenizer.decode(tokenizer.encode(sentence)))


In [None]:
# Encode the sentence to get the token ids
token_ids = tokenizer.encode(sentence)

# decode each token id back to its token
tokens = [tokenizer.decode(token_id) for token_id in token_ids]

# create a mapping of token id to tokens
token_id_map = list(zip(token_ids, tokens))

# print the mapping
for token_id, token in token_id_map:
    print(f"Token ID: {token_id}, Token: {token}")

In [None]:
from transformers import pipeline

# Create a pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=False #make sure the next word generated has the highest probab
)

In [None]:
# The prompt (user input / query)
messages = [
    {"role": "system", "content": "You are a stand-up comedian."},
    {"role": "user", "content": "Create a funny joke about chickens."}
]

# Generate output
output = generator(messages)
print(output[0]["generated_text"])