In [1]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [2]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM

MAX_TOKENS = 128

# SAVED_MODEL_PATH = 'models/EleutherAI/gpt-neo-125m-512'
MODEL_NAME = 'Qwen/Qwen2-0.5B-Instruct'
SAVED_MODEL_PATH = f'models/{MODEL_NAME}-{MAX_TOKENS}'


student_model = PeftModel.from_pretrained(
    AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16),
    SAVED_MODEL_PATH
)
student_tokenizer = AutoTokenizer.from_pretrained(SAVED_MODEL_PATH, trust_remote_code=True)
student_tokenizer.pad_token = student_tokenizer.eos_token

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [3]:
from torch.nn import functional

# def chat_with_model(prompt, max_new_tokens=64):
#     device = student_model.device
#     inputs = student_tokenizer(prompt, return_tensors="pt")
#     inputs = {k: v.to(device) for k, v in inputs.items()}
#     input_length = inputs["input_ids"].shape[1]

#     with torch.inference_mode():
#         outputs = student_model.generate(
#             **inputs,
#             max_new_tokens=max_new_tokens,
#             pad_token_id=student_tokenizer.eos_token_id
#         )

#     response_ids = outputs[0][input_length:]
#     response_text = student_tokenizer.decode(response_ids, skip_special_tokens=True)

#     return response_text

def chat_with_model(prompt, return_confidence=True, max_new_tokens=64):
    device = student_model.device
    inputs = student_tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    input_length = inputs["input_ids"].shape[1]

    with torch.inference_mode():
        outputs = student_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            pad_token_id=student_tokenizer.pad_token_id,
            return_dict_in_generate=True,
            output_scores=return_confidence
        )

    response_ids = outputs.sequences[0][input_length:]
    response_text = student_tokenizer.decode(response_ids, skip_special_tokens=True)

    if return_confidence:
        scores = torch.stack(outputs.scores, dim=0).to(outputs.sequences.device)
        probs = functional.softmax(scores, dim=-1)
        max_probs = probs.max(dim=-1).values
        avg_confidence = max_probs.mean().item()
        return response_text, avg_confidence

    return response_text

In [4]:
import pandas as pd
from datasets import Dataset

df = pd.read_csv('sample/merged_distilled_dataset.csv')
distilled_dataset = Dataset.from_pandas(df)
distilled_dataset

Dataset({
    features: ['prompt', 'response'],
    num_rows: 30000
})

In [5]:
sample = distilled_dataset[23969]
response, confidence = chat_with_model(sample['prompt'], return_confidence=True)
# response = chat_with_model(sample['prompt'])
print("Prompt:", sample['prompt'])
print("Response:", response)
print("Confidence:", round(confidence * 100, 2), "%")

Prompt: User: when i run sudo apt-get update, it returns with an error for each file saying 'failed to fetch' <file> Could not resolve security.ubuntu.com anybody know why this happens?
Assistant:
Response:  It seems like there might be a typo in the path you're using to download packages from the Ubuntu repositories. Here's how you can fix it:

1. Open a terminal.
2. Type `sudo apt-get update` and press Enter.

If you're still facing issues, try changing your search path in `/etc
Confidence: 83.16 %
