In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig,
    set_seed,
)
from tqdm import tqdm
from trl import SFTTrainer
import torch
import time
import pandas as pd
import numpy as np
from huggingface_hub import interpreter_login

set_seed(42)
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



In [2]:
print(torch.cuda.get_device_name(torch.cuda.current_device()))

NVIDIA GeForce RTX 4060


In [3]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [4]:
model_name='microsoft/phi-2'
device_map = {"": 0}
original_model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map=device_map,
    quantization_config=bnb_config,
    trust_remote_code=True,
    token=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
    use_fast=False
)
tokenizer.pad_token = tokenizer.eos_token

In [11]:
def gen(model, prompt, max_length=200, tokenizer=tokenizer):
    """
    Generate text using the provided model and prompt
    
    Args:
        model: The language model
        prompt: Input text prompt
        max_length: Maximum length of generated text
        tokenizer: Pre-instantiated tokenizer (uses global tokenizer by default)
    
    Returns:
        List containing generated text
    """
    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
    
    # Move to same device as model
    device = next(model.parameters()).device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Generate text
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=len(inputs["input_ids"][0]) + max_length,
            num_return_sequences=1,
            temperature=0.4,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return generated_text[len(prompt):].strip()

In [None]:
prompt = "Compare TCP and UDP. Which one is better for real-time applications like video conferencing and online gaming, and why?"

output = gen(original_model, prompt)

print(f'INPUT [{len(prompt)}]:\n{prompt}')
print(100*'-')
print(f'OUTPUT [{len(output)}]:\n{output}')
print(100*'-')

INPUT [119]:
Compare TCP and UDP. Which one is better for real-time applications like video conferencing and online gaming, and why?
----------------------------------------------------------------------------------------------------
OUTPUT [1048]:
Answer: TCP (Transmission Control Protocol) and UDP (User Datagram Protocol) are two widely used protocols in computer networking. While both have their advantages and disadvantages, TCP is generally considered to be more suitable for real-time applications like video conferencing and online gaming, while UDP is better suited for applications that require faster, more efficient data transfer.

TCP is a connection-oriented protocol that ensures reliable data transmission between two devices. It establishes a connection between the sender and receiver, and then breaks it when the data is received. This ensures that all the data is delivered correctly and in the correct order. However, because TCP establishes a connection before transmitting da