In [None]:
!pip install torch transformers coremltools

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer
import coremltools as ct

# Load model and prepare dummy input
model_id = "facebook/opt-125m"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

input_ids = tokenizer("hello", return_tensors="pt").input_ids

In [None]:
# Model wrapper for tracing
class OPTWrapper(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, input_ids):
        outputs = self.model(input_ids)
        return outputs.logits

# Trace the model
wrapped_model = OPTWrapper(model)
wrapped_model.eval()

traced_model = torch.jit.trace(
    wrapped_model,
    input_ids,
    strict=False
)

# Tokenizer must be imported in IOS app to interpret input/output
tokenizer.save_pretrained("./tokenizer-opt-125m")

In [None]:
frozen_model = torch.jit.freeze(traced_model)

# Convert to CoreML
input_shape = ct.Shape(shape=(1, ct.RangeDim(1, 100)))
mlmodel = ct.convert(
    frozen_model,
    inputs=[ct.TensorType(name="input_ids", shape=input_shape)],
    compute_units=ct.ComputeUnit.CPU_AND_NE,
    convert_to="mlprogram",
    minimum_deployment_target=ct.target.iOS16
)

mlmodel.save("opt-125m.mlpackage")

In [None]:
!zip -r opt-125m.mlpackage.zip /content/opt-125m.mlpackage

In [None]:
from google.colab import files
files.download("opt-125m.mlpackage.zip")