In [32]:
!pip install -U transformers



In [33]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")

In [34]:
sentences=  ["Hello world", "Haha whats up"]
tokenizer.pad_token = tokenizer.eos_token
tokenized = tokenizer(sentences, return_tensors="pt", padding=True)

In [35]:
tokenized

{'input_ids': tensor([[  9707,   1879, 151645, 151645],
        [    39,  13546,  40109,    705]]), 'attention_mask': tensor([[1, 1, 0, 0],
        [1, 1, 1, 1]])}

In [36]:
import pandas as pd

In [38]:
df = pd.read_csv('QandA.csv')

In [39]:
df

Unnamed: 0,Question,Answer
0,How do I take a screenshot on an iPhone?,"To take a screenshot on an iPhone, press and h..."
1,How do I change my wallpaper on an iPhone?,"To change your wallpaper on an iPhone, go to S..."
2,How do I make a phone call on an iPhone?,"To make a phone call on an iPhone, open the Ph..."
3,How do I send a text message on an iPhone?,"To send a text message on an iPhone, open the ..."
4,How do I use Siri on an iPhone?,"To use Siri on an iPhone, press and hold the H..."
...,...,...
1000,How to clear cache on Safari on my Mac for imp...,Go to Safari > Preferences > Privacy > Manage ...
1001,What to do if my AirPods Pro have low volume?,"Clean the earbuds, ensure they are correctly s..."
1002,Can I use a wireless trackpad with my Mac for ...,"Yes, you can use a wireless Bluetooth trackpad..."
1003,How to transfer files from my Mac to my iPhone...,"Use apps like Files, iCloud Drive, or third-pa..."


In [40]:
question_dataset = tokenizer(df['Question'].tolist(), return_tensors="pt", padding=True)

In [41]:
answer_dataset = tokenizer(df['Answer'].fillna('').tolist(), return_tensors="pt", padding=True)

In [42]:
tokenizer.decode(question_dataset['input_ids'][0])

'How do I take a screenshot on an iPhone?<|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|><|im_end|>'

In [43]:
from sklearn.model_selection import train_test_split

In [44]:
X_train, X_test, y_train, y_test = train_test_split(question_dataset, answer_dataset, test_size=0.3)

In [45]:
from sklearn.model_selection import train_test_split

X_input_ids = question_dataset['input_ids']
X_attention_mask = question_dataset['attention_mask']
y_input_ids = answer_dataset['input_ids']
y_attention_mask = answer_dataset['attention_mask']

X_train_input_ids, X_test_input_ids, \
X_train_attention_mask, X_test_attention_mask, \
y_train_input_ids, y_test_input_ids, \
y_train_attention_mask, y_test_attention_mask = train_test_split(
    X_input_ids, X_attention_mask, y_input_ids, y_attention_mask,
    test_size=0.3, random_state=42
)

In [46]:
from torch.optim import AdamW
from transformers import get_scheduler

In [47]:
epochs = 10
training_steps = 0

optimizer = AdamW(model.parameters(), lr=5e-5)
lr_scheduler = get_scheduler("linear",
                             optimizer=optimizer,
                             num_warmup_steps=0,
                             num_training_steps=1
                            )

In [48]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [49]:
model.to(device)
device

device(type='cuda', index=0)

In [50]:
from torch.utils.data import TensorDataset, DataLoader
from tqdm.auto import tqdm
import torch

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)

train_dataset = TensorDataset(
    X_train_input_ids,
    X_train_attention_mask,
    y_train_input_ids,
    y_train_attention_mask
)


batch_size = 8
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Recalculate training_steps using the correct number of batches
training_steps = epochs * len(train_dataloader)

# Update lr_scheduler with the correct num_training_steps
lr_scheduler = get_scheduler("linear",
                             optimizer=optimizer,
                             num_warmup_steps=0,
                             num_training_steps=training_steps
                            )

# Initialize progress bar
progress_bar = tqdm(range(training_steps))

model.train()
for epoch in range(epochs):
    for batch_idx, batch_data in enumerate(train_dataloader):
        X_input_ids_batch, X_attention_mask_batch, y_input_ids_batch, y_attention_mask_batch = batch_data

        X_input_ids_batch = X_input_ids_batch.to(device)
        X_attention_mask_batch = X_attention_mask_batch.to(device)
        y_input_ids_batch = y_input_ids_batch.to(device)
        y_attention_mask_batch = y_attention_mask_batch.to(device)
        max_len_x = X_input_ids_batch.shape[1]
        max_len_y = y_input_ids_batch.shape[1]

        labels = torch.full_like(X_input_ids_batch, -100, dtype=torch.long, device=device)
        combined_input_ids = torch.cat((X_input_ids_batch, y_input_ids_batch), dim=1)
        combined_attention_mask = torch.cat((X_attention_mask_batch, y_attention_mask_batch), dim=1)

        combined_labels = torch.full(combined_input_ids.shape, -100, dtype=torch.long, device=device)
        combined_labels[:, max_len_x:] = y_input_ids_batch

        outputs = model(
            input_ids=combined_input_ids,
            attention_mask=combined_attention_mask,
            labels=combined_labels
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

    print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item():.4f}")

  0%|          | 0/880 [00:00<?, ?it/s]

Epoch 1/10 Loss: 0.2162
Epoch 2/10 Loss: 0.0716
Epoch 3/10 Loss: 0.0414
Epoch 4/10 Loss: 0.0351
Epoch 5/10 Loss: 0.0150
Epoch 6/10 Loss: 0.0016
Epoch 7/10 Loss: 0.0034
Epoch 8/10 Loss: 0.0037
Epoch 9/10 Loss: 0.0025
Epoch 10/10 Loss: 0.0005


### Try Fine-Tuned Model


In [51]:
model.eval()

device = next(model.parameters()).device

def generate_answer(question):
    inputs = tokenizer(question, return_tensors='pt')
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)

    output_sequences = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=100,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7
    )

    generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)


    if generated_text.startswith(question):
        answer = generated_text[len(question):].strip()
    else:
        answer = generated_text.strip()

    return answer

user_question = "How do I use the Maps app?"
answer = generate_answer(user_question)
print(f"\nQuestion: {user_question}")
print(f"Answer: {answer}")


Question: How do I use the Maps app?
Answer: To use the Maps app, open it and enter a destination in the search bar. You can choose between driving, walking, cycling, or public transportation options. View detailed directions, explore street-level imagery, and discover nearby restaurants, shops, and attractions.


In [52]:
model.save_pretrained("finetuned_apple_qwen")