## Phi-3

# 1. Setup: Import Libraries

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# 2. Manual Path

## 2.1. Load Model and Tokenizer

In [2]:
model_name = "microsoft/Phi-3-mini-4k-instruct"

# Load the causal language model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=False,
)

# Load the tokenizer for the model
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## 2.2. Tokenization Example

In [16]:
# Encode text into input IDs
encoding = tokenizer(text)

# Convert input IDs to readable tokens
tokens = tokenizer.convert_ids_to_tokens(encoding['input_ids'])

# Print token IDs and their corresponding tokens
for token_id, token_str in zip(encoding['input_ids'], tokens):
    print(f"{token_id} --> {token_str}")

15043 --> ▁Hello
3186 --> ▁world


## 2.3. Model Inference (Logits and Prediction)

In [25]:
# Tokenize and move input to same device as model
inputs = tokenizer(text, return_tensors="pt").to(model.device)

# Get raw output logits from model
outputs = model(**inputs)

# Print max logit values (confidence per token position)
print(outputs.logits[0][0].max())  # For first token
print(outputs.logits[0][1].max())  # For second token

# Print predicted token IDs (most likely next token at each position)
print(outputs.logits[0][0].argmax())  # ID of most likely first token
print(outputs.logits[0][1].argmax())  # ID of most likely second token

# Decode a couple of token IDs manually
print(tokenizer.decode([29892, 29991]))
print(tokenizer.decode(outputs.logits[0].argmax(-1)))

# Full predicted token sequence from logits
predicted_token_id = outputs.logits.argmax(dim=-1)
print(tokenizer.decode(predicted_token_id[0]))

tensor(37.7500, device='cuda:0', dtype=torch.bfloat16, grad_fn=<MaxBackward1>)
tensor(48., device='cuda:0', dtype=torch.bfloat16, grad_fn=<MaxBackward1>)
tensor(29892, device='cuda:0')
tensor(29991, device='cuda:0')
,!
,!
,!


## 2.4. Generate continuation

In [33]:
# Generate continuation
generated_ids = model.generate(
    inputs["input_ids"],
    #max_new_tokens=50,       # Number of tokens to generate
    do_sample=True,         # Use greedy decoding (set True for randomness)
    pad_token_id=tokenizer.eos_token_id  # Avoid warning if eos is not set
)

# Decode and print result
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

Hello world! This is a simple function demonstrating how you can print "Hello World!" in Python


# 3. Pipeline Path

## 3.1. Pipeline Preparation

In [12]:
# Create a text-generation pipeline using the same model and tokenizer
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=500,   # Limit length of generated text
    do_sample=False       # Deterministic (no sampling)
)

## 3.2. Text Generation

In [26]:
# Prompt for the model
messages = [
    {"role": "user", "content": "Hello World"}
]

# Generate text using the prompt
output = generator(messages)

# Print generated output
print(output[0]["generated_text"])

 This instruction is a simple greeting in English, mirroring the format and difficulty of the given Chinese instruction. The task is to generate a similar greeting in English.


In [28]:
# The prompt (user input / query)
messages = [
    {"role": "user", "content": "Create a funny joke about chickens."}
]

# Generate output
output = generator(messages)
print(output[0]["generated_text"])

 Why did the chicken join the band? Because it had the drumsticks!


## 3.3. Small Chatbot! Enjoy!

In [36]:
# Store chat history
messages = []

print("Chatbot is ready! Type 'exit' to quit.\n")

while True:
    # Get user input
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print('Conversation has ended!')
        break

    # Add user message to history
    messages.append({"role": "user", "content": user_input})

    # Generate model response
    response = generator(messages)

    # Get the assistant reply
    reply = response[0]["generated_text"]
    print("Bot:", reply)

    # Add assistant reply to history
    messages.append({"role": "assistant", "content": reply})

Chatbot is ready! Type 'exit' to quit.



You:  exit


Conversation has ended!
