In [2]:
from datasets import load_dataset, DatasetDict

# Load the dataset from Hugging Face Hub
dataset_id = "ucirvine/sms_spam"
print(f"Loading dataset: {dataset_id}")
raw_datasets = load_dataset(dataset_id)

# --- Check dataset structure ---
print("\nDataset structure:")
print(raw_datasets)
print("\nSample data:")
print(raw_datasets['train'][0])
# Expected columns: 'label' (int: 0=ham, 1=spam), 'sms' (string)

# --- Split the dataset ---
# The dataset only has a 'train' split, so we create train/test splits
train_test_split = raw_datasets['train'].train_test_split(test_size=0.2, seed=42, stratify_by_column='label') # 80% train, 20% test

# Create a new DatasetDict with 'train' and 'test' splits
split_datasets = DatasetDict({
    'train': train_test_split['train'],
    'test': train_test_split['test']
})
print("\nDataset structure after splitting:")
print(split_datasets)

# Define label mapping (matches the dataset's 0/1 scheme)
num_labels = 2
id2label = {0: "ham", 1: "spam"}
label2id = {"ham": 0, "spam": 1}

print(f"\nNumber of labels: {num_labels}")
print(f"id2label mapping: {id2label}")

Loading dataset: ucirvine/sms_spam

Dataset structure:
DatasetDict({
    train: Dataset({
        features: ['sms', 'label'],
        num_rows: 5574
    })
})

Sample data:
{'sms': 'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\n', 'label': 0}

Dataset structure after splitting:
DatasetDict({
    train: Dataset({
        features: ['sms', 'label'],
        num_rows: 4459
    })
    test: Dataset({
        features: ['sms', 'label'],
        num_rows: 1115
    })
})

Number of labels: 2
id2label mapping: {0: 'ham', 1: 'spam'}


In [3]:
from transformers import AutoTokenizer

model_id = "Qwen/Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

# Set padding token if not set
if tokenizer.pad_token is None:
    if tokenizer.eos_token:
        tokenizer.pad_token = tokenizer.eos_token
        print(f"Set pad_token to eos_token: {tokenizer.pad_token}")
    else:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        print("Added new [PAD] token.")

def tokenize_function(examples):
    # Use the 'sms' column from the dataset
    return tokenizer(examples["sms"], padding="max_length", truncation=True, max_length=128) # Increased max_length slightly just in case

print("\nTokenizing dataset...")
tokenized_datasets = split_datasets.map(tokenize_function, batched=True)

# Remove original sms column and any other unnecessary columns
tokenized_datasets = tokenized_datasets.remove_columns(["sms"])
tokenized_datasets.set_format("torch")

print("Sample tokenized data:", tokenized_datasets['train'][0])


Tokenizing dataset...
Sample tokenized data: {'label': tensor(0), 'input_ids': tensor([    35,    544,    498,    614,   1943,   3010,    198, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 

In [4]:
from transformers import AutoTokenizer

model_id = "Qwen/Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

# Set padding token if not set
if tokenizer.pad_token is None:
    if tokenizer.eos_token:
        tokenizer.pad_token = tokenizer.eos_token
        print(f"Set pad_token to eos_token: {tokenizer.pad_token}")
    else:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        print("Added new [PAD] token.")

def tokenize_function(examples):
    # Use the 'sms' column from the dataset
    return tokenizer(examples["sms"], padding="max_length", truncation=True, max_length=128) # Increased max_length slightly just in case

print("\nTokenizing dataset...")
tokenized_datasets = split_datasets.map(tokenize_function, batched=True)

# Remove original sms column and any other unnecessary columns
tokenized_datasets = tokenized_datasets.remove_columns(["sms"])
tokenized_datasets.set_format("torch")

print("Sample tokenized data:", tokenized_datasets['train'][0])


Tokenizing dataset...
Sample tokenized data: {'label': tensor(0), 'input_ids': tensor([    35,    544,    498,    614,   1943,   3010,    198, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 

In [5]:
import torch
from transformers import AutoModelForSequenceClassification, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

# QLoRA configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

print(f"\nLoading base model: {model_id} with QLoRA config...")
# Load the base model with quantization
model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    num_labels=num_labels, # Pass the number of labels (2)
    id2label=id2label,     # Pass the mappings
    label2id=label2id,
    trust_remote_code=True,
)

# Ensure the model's pad_token_id is set
if model.config.pad_token_id is None:
    model.config.pad_token_id = tokenizer.pad_token_id
    print(f"Set model pad_token_id to: {model.config.pad_token_id}")

# LoRA configuration (verify target_modules if possible, defaults are often okay)
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=[ # Common targets, verify for Qwen2 if necessary
        "q_proj", "k_proj", "v_proj", "o_proj",
        # "gate_proj", "up_proj", "down_proj" # Less common to include MLP layers
    ],
     modules_to_save = ["score"] # Train the classification head
)

print("Applying LoRA adapter to the model...")
# Create the PEFT model
peft_model = get_peft_model(model, lora_config)

# Print trainable parameters
peft_model.print_trainable_parameters()


Loading base model: Qwen/Qwen2-0.5B-Instruct with QLoRA config...


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Set model pad_token_id to: 151643
Applying LoRA adapter to the model...
trainable params: 2,164,480 || all params: 496,199,040 || trainable%: 0.4362


In [None]:
import numpy as np
import evaluate
from transformers import TrainingArguments, Trainer
import math # Needed for ceiling division

# (Keep the metric definition and compute_metrics function as before)
# Define metrics - Including precision, recall, f1 which are important for spam detection
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
    precision = precision_metric.compute(predictions=predictions, references=labels, average="binary", pos_label=1)["precision"]
    recall = recall_metric.compute(predictions=predictions, references=labels, average="binary", pos_label=1)["recall"]
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="binary", pos_label=1)["f1"]

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }

# --- Calculate steps per epoch ---
# Define parameters needed for calculation (adjust if you changed them)
PER_DEVICE_TRAIN_BATCH_SIZE = 8
GRADIENT_ACCUMULATION_STEPS = 4 # Match the value below
NUM_EPOCHS = 2 # Match the value below
# Assumes single GPU, adjust if using multiple GPUs (world_size)
world_size = 1

# Calculate total training steps and steps per epoch
num_training_samples = len(tokenized_datasets["train"])
effective_batch_size = PER_DEVICE_TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS * world_size
steps_per_epoch = math.ceil(num_training_samples / effective_batch_size)
max_steps = steps_per_epoch * NUM_EPOCHS # Optional: Can set max_steps instead of num_train_epochs

print(f"Dataset size: {num_training_samples}")
print(f"Effective batch size: {effective_batch_size}")
print(f"Steps per epoch: {steps_per_epoch}")
print(f"Total training steps: {max_steps}")
# ---------------------------------


output_dir = "./models/qwen2-0.5b-sms-spam-ucirvine-qlora"
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=2e-4,
    per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE, # Make sure this is defined
    per_device_eval_batch_size=8,
    num_train_epochs=NUM_EPOCHS,           # Make sure this is defined
    weight_decay=0.01,
    # --- Use Epoch Strategy for Evaluation and Saving ---
    # evaluation_strategy="epoch", # Set evaluation strategy to epoch
    save_strategy="epoch",       # Set save strategy to epoch (matches eval_strategy)
    # eval_steps=... ,           # Not needed for epoch strategy
    # save_steps=... ,           # Not needed for epoch strategy
    # ----------------------------------------------------
    # load_best_model_at_end=True,    # This should now work
    metric_for_best_model="f1",     # Optimize for F1-score
    logging_strategy="steps",       # Keep logging based on steps (or change to "epoch")
    logging_steps=50,               # Log every 50 steps (adjust as needed)
    fp16=False,
    bf16=torch.cuda.is_bf16_supported(),
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, # Make sure this is defined
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    push_to_hub=False,
    report_to="none",
)

# Initialize Trainer (remains the same)
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)



  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Dataset size: 4459
Effective batch size: 32
Steps per epoch: 140
Total training steps: 280


In [7]:
# Start training
print("\nStarting training...")
trainer.train()
print("Training finished.")

# Save the best adapter weights
adapter_output_dir = f"{output_dir}/best_adapter"
trainer.save_model(adapter_output_dir) # Saves only the adapter config and weights

# Optional: Save tokenizer
tokenizer.save_pretrained(adapter_output_dir)

print(f"Best adapter weights saved to {adapter_output_dir}")

# Evaluate the best model on the test set
print("\nEvaluating best model on test set...")
eval_results = trainer.evaluate(eval_dataset=tokenized_datasets["test"])
print("Evaluation results:", eval_results)


Starting training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
50,0.5828
100,0.0804
150,0.0611
200,0.051
250,0.0435




Training finished.
Best adapter weights saved to ./qwen2-0.5b-sms-spam-ucirvine-qlora/best_adapter

Evaluating best model on test set...


Evaluation results: {'eval_loss': 0.06340605765581131, 'eval_accuracy': 0.9802690582959641, 'eval_precision': 0.9205298013245033, 'eval_recall': 0.9328859060402684, 'eval_f1': 0.9266666666666666, 'eval_runtime': 54.4333, 'eval_samples_per_second': 20.484, 'eval_steps_per_second': 2.572, 'epoch': 1.989247311827957}


In [8]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch

# Load the base model (quantized) and tokenizer
base_model_id = "Qwen/Qwen2-0.5B-Instruct"
adapter_model_id = "./models/qwen2-0.5b-sms-spam-ucirvine-qlora/best_adapter"
print("Loading base model for inference...")
# Need to load with the same quantization config used during training
bnb_config_inf = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)
base_model = AutoModelForSequenceClassification.from_pretrained(
    base_model_id,
    quantization_config=bnb_config_inf,
    device_map="auto",
    num_labels=num_labels, # Make sure num_labels matches training
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)

# Set padding token if necessary (should match training)
if tokenizer.pad_token is None:
    if tokenizer.eos_token:
        tokenizer.pad_token = tokenizer.eos_token
    else:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
if base_model.config.pad_token_id is None:
    base_model.config.pad_token_id = tokenizer.pad_token_id


print("Loading PEFT adapter...")
# Load the adapter configuration and merge it with the base model
model_with_adapter = PeftModel.from_pretrained(base_model, adapter_model_id)
model_with_adapter.eval() # Set the model to evaluation mode

print("Model ready for inference.")



Loading base model for inference...


Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading PEFT adapter...
Model ready for inference.


In [9]:
# Example Inference
text = """
you need to send me your bank account details to complete the purchase immediately i have images of you
"""
# text = "Hey, are we still on for lunch tomorrow?"

inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(model_with_adapter.device)

with torch.no_grad():
    outputs = model_with_adapter(**inputs)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1).item()

predicted_label = id2label[predictions] # Use the id2label mapping from training

print(f"Input text: {text}")
print(f"Predicted label ID: {predictions}")
print(f"Predicted label: {predicted_label}") # Should be 'scam' or 'ham'

Input text: 
you need to send me your bank account details to complete the purchase immediately i have images of you

Predicted label ID: 0
Predicted label: ham


In [10]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import PeftModel
import torch

# --- Configuration ---
base_model_id = "Qwen/Qwen2-0.5B-Instruct"
adapter_model_id = "./models/qwen2-0.5b-sms-spam-ucirvine-qlora/best_adapter" # Path to your saved adapter
merged_model_output_path = "./models/qwen2-0.5b-sms-spam-merged" # Directory to save the merged model
num_labels = 2 # Ensure this matches your fine-tuning task

# --- Load Base Model (NOT quantized this time) ---
# Load in a standard precision like float16 for merging
print(f"Loading base model ({base_model_id}) in float16...")
base_model = AutoModelForSequenceClassification.from_pretrained(
    base_model_id,
    num_labels=num_labels,
    torch_dtype=torch.float16, # Use float16 or float32
    low_cpu_mem_usage=True, # Helps load large models on less RAM
    trust_remote_code=True,
    # device_map="auto" # Or load on CPU first: device_map={'': 'cpu'}
)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)

# Set padding token if necessary (redundant if already done, but safe)
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
if base_model.config.pad_token_id is None: base_model.config.pad_token_id = tokenizer.pad_token_id

# --- Load PEFT Adapter ---
print(f"Loading adapter from {adapter_model_id}...")
# Load the PeftModel directly onto the base model
# Ensure the base model is on the correct device (e.g., CPU or GPU) before loading adapter
model_with_adapter = PeftModel.from_pretrained(base_model, adapter_model_id)
# If base_model was loaded on CPU: model_with_adapter.to('cpu')

# --- Merge Adapter and Unload ---
print("Merging adapter weights...")
merged_model = model_with_adapter.merge_and_unload()
print("Adapter merged.")

# --- Save the Merged Model ---
print(f"Saving merged model and tokenizer to {merged_model_output_path}...")
merged_model.save_pretrained(merged_model_output_path)
tokenizer.save_pretrained(merged_model_output_path)
print("Merged model saved.")

# Optional: Clean up memory
# del base_model, model_with_adapter, merged_model
# torch.cuda.empty_cache()

Loading base model (Qwen/Qwen2-0.5B-Instruct) in float16...


Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading adapter from ./qwen2-0.5b-sms-spam-ucirvine-qlora/best_adapter...
Merging adapter weights...
Adapter merged.
Saving merged model and tokenizer to ./qwen2-0.5b-sms-spam-merged...
Merged model saved.


In [11]:
import coremltools as ct
import torch
import transformers # Ensure transformers is imported

# --- Load your MERGED and potentially QUANTIZED PyTorch model ---
# If you applied PyTorch quantization, load that model.
# If aiming for quantization during conversion, load the merged FP16 model.
model_path = "./models/qwen2-0.5b-sms-spam-merged" # Path to the merged FP16 model
output_coreml_path = "./models/qwen2-0.5b-sms-spam.mlpackage"

print(f"Loading model from {model_path} for Core ML conversion...")
# It's often best to explicitly load the model class
pytorch_model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path, torch_dtype=torch.float16)
pytorch_model.eval() # Set to evaluation mode

tokenizer = transformers.AutoTokenizer.from_pretrained(model_path)

# --- Prepare Example Input for Tracing ---
# Core ML conversion often uses tracing, requiring example inputs.
# The shape should match what the model expects during inference.
example_text = "This is a sample input text for tracing."
# max_length should match tokenizer settings used during training/inference
tokenized_input = tokenizer(example_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)

# Extract tensors required by the model's forward pass
# Typically 'input_ids' and 'attention_mask'
example_input_ids = tokenized_input['input_ids']
example_attention_mask = tokenized_input['attention_mask']
# Put example inputs on CPU for tracing usually
example_inputs_tuple = (example_input_ids.cpu(), example_attention_mask.cpu())
# Or as a dictionary if the converter prefers that
# example_inputs_dict = {"input_ids": example_input_ids.cpu(), "attention_mask": example_attention_mask.cpu()}


# --- Trace the PyTorch model ---
print("Tracing the PyTorch model...")
try:
    # Use torch.jit.trace with the model's forward method inputs
    traced_model = torch.jit.trace(pytorch_model, example_inputs_tuple, strict=False)
    print("Model traced successfully.")
except Exception as e:
    print(f"Error during tracing: {e}")
    # Tracing can be tricky; alternative might be script conversion or specific exporters
    # For Hugging Face models, sometimes direct conversion works better
    traced_model = None # Indicate failure

# --- Convert using coremltools ---
# This is a simplified example. You'll need to configure inputs/outputs
# and crucially, quantization options.
if traced_model: # Proceed if tracing worked
    print("Converting traced model to Core ML...")

    # Define input types (match the traced inputs)
    input_ids = ct.TensorType(name="input_ids", shape=example_input_ids.shape, dtype=np.int32)
    attention_mask = ct.TensorType(name="attention_mask", shape=example_attention_mask.shape, dtype=np.int32)

    # --- Quantization during Conversion (Example: FP16 weights) ---
    # For INT8 or other types, explore ct.optimize.coreml.* or specific compute_precision options
    # Example: weights quantized to FP16
    quantization_options = ct.OptimizeDevice.QuantizationOptions(
        global_quantization_type='linear', # or 'kmeans' etc.
        quantization_mode='weight_symmetric',
        weight_dtype=ct.precision.FLOAT16 # Example: FP16 weights. For INT8, explore options.
    )
    # Apply optimization profile targeting ANE if possible
    optimization_profile = ct.OptimizeDevice(quantization_options=quantization_options, allow_low_precision_weights=True)


    # Convert the model
    # minimum_deployment_target is important for compatibility and features
    mlmodel = ct.convert(
        traced_model,
        inputs=[input_ids, attention_mask],
        # outputs=[ct.TensorType(name="logits")], # Define output name if needed
        convert_to="mlprogram", # Recommended modern format
        minimum_deployment_target=ct.target.iOS16, # Or newer, e.g., iOS17
        compute_units=ct.ComputeUnit.ALL, # Allow CPU, GPU, ANE
        # optimization_profile=optimization_profile, # Apply quantization/optimization profile
        # Pass compute_precision=ct.precision.FLOAT16 or other options here if applicable
    )

    print(f"Saving Core ML model to {output_coreml_path}...")
    mlmodel.save(output_coreml_path)
    print("Core ML model saved.")

else: # Alternative if tracing failed (might work better for HF models)
    print("Attempting direct conversion (may have limitations)...")
    try:
        mlmodel = ct.convert(
            pytorch_model, # Pass the original HF model
            source="pytorch",
            inputs=[
                ct.TensorType(name="input_ids", shape=(1, ct.RangeDim(1, 128)), dtype=np.int32), # Use dynamic shape
                ct.TensorType(name="attention_mask", shape=(1, ct.RangeDim(1, 128)), dtype=np.int32)
            ],
            convert_to="mlprogram",
            minimum_deployment_target=ct.target.iOS16,
            compute_units=ct.ComputeUnit.ALL,
            # Add quantization/optimization options here as well
        )
        print(f"Saving Core ML model to {output_coreml_path}...")
        mlmodel.save(output_coreml_path)
        print("Core ML model saved.")
    except Exception as e:
        print(f"Direct conversion also failed: {e}")

scikit-learn version 1.6.1 is not supported. Minimum required version: 0.17. Maximum required version: 1.5.1. Disabling scikit-learn conversion API.
Torch version 2.6.0+cu124 has not been tested with coremltools. You may run into unexpected errors. Torch 2.5.0 is the most recent version that has been tested.
Failed to load _MLModelProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLCPUComputeDeviceProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLGPUComputeDeviceProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLNeuralEngineComputeDeviceProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLModelProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLComputePlanProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLModelProxy: No module named 'coremltools.libcoremlpython'
Failed to load _MLModelAssetProxy: No module named 'coremltools.libcoremlpython'


Loading model from ./qwen2-0.5b-sms-spam-merged for Core ML conversion...


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Tracing the PyTorch model...


  elif sliding_window is None or key_value_length < sliding_window:
  if attention_mask.shape[-1] > target_length:
  is_causal = query.shape[2] > 1 and causal_mask is None


Error during tracing: Tracer cannot infer type of SequenceClassifierOutputWithPast(loss=None, logits=tensor([[ 1.9775, -0.8584]], dtype=torch.float16, grad_fn=<IndexBackward0>), past_key_values=<transformers.cache_utils.DynamicCache object at 0x7f0d367eaaa0>, hidden_states=None, attentions=None)
:Only tensors and (possibly nested) tuples of tensors, lists, or dicts are supported as inputs or outputs of traced functions, but instead got value of type DynamicCache.
Attempting direct conversion (may have limitations)...
Direct conversion also failed: Model must either be a TorchScript object (or .pt or .pth file) or an ExportedProgram object (if using torch.export based API), received: <class 'transformers.models.qwen2.modeling_qwen2.Qwen2ForSequenceClassification'>
