In [1]:
import os
from transformers import MarianTokenizer, MarianMTModel
import coremltools as ct
import torch
import json

  from .autonotebook import tqdm as notebook_tqdm
Torch version 2.6.0 has not been tested with coremltools. You may run into unexpected errors. Torch 2.5.0 is the most recent version that has been tested.


## Conversion

In [None]:
import torch
from transformers import MarianMTModel, MarianTokenizer

model_path = "/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Models/opus-mt-en-hi"

tokenizer = MarianTokenizer.from_pretrained(model_path)
model = MarianMTModel.from_pretrained(model_path)

# Wrap encoder (same as before)
class EncoderWrapper(torch.nn.Module):
    def __init__(self, encoder):
        super().__init__()
        self.encoder = encoder

    def forward(self, input_ids):
        outputs = self.encoder(input_ids = input_ids)
        return outputs.last_hidden_state

# Wrap decoder + lm_head
class DecoderWithLMHeadWrapper(torch.nn.Module):
    def __init__(self, decoder, lm_head):
        super().__init__()
        self.decoder = decoder
        self.lm_head = lm_head

    def forward(self, input_ids, encoder_hidden_states):
        outputs = self.decoder(input_ids=input_ids, encoder_hidden_states=encoder_hidden_states)
        hidden_states = outputs.last_hidden_state
        logits = self.lm_head(hidden_states)   # <-- vocab logits
        return logits

encoder_wrapper = EncoderWrapper(model.model.encoder)
decoder_wrapper = DecoderWithLMHeadWrapper(model.model.decoder, model.lm_head)
encoder_wrapper.eval()
decoder_wrapper.eval()

In [None]:
batch_size = 1
seq_len = 10   # number of tokens
vocab_size = 61950

dummy_input = torch.randint(0, vocab_size, (batch_size, seq_len))

with torch.no_grad():
    hidden_states = encoder_wrapper(dummy_input)

print("Dummy input to encoder:", dummy_input)
print("Hidden states shape:", hidden_states.shape)
print("Hidden states sample:", hidden_states[0, 0, :10])  # print first 10 dims of first token

start_token_id = 61949
decoder_input_ids = torch.tensor([[start_token_id]])  # shape: [1, 1]

# 4️⃣ Forward pass through decoder
with torch.no_grad():
    logits = decoder_wrapper(decoder_input_ids, hidden_states)
print("Logits output from decoder:", logits)
print(logits.shape)

In [3]:
import coremltools as ct

max_src_len = 50
dummy_encoder_input = torch.ones((1, max_src_len), dtype=torch.long)

# ---- Encoder ----
encoder_traced = torch.jit.trace(encoder_wrapper, dummy_encoder_input)
encoder_mlmodel = ct.convert(
    encoder_traced,
    inputs=[ct.TensorType(name="input_ids", shape=(1, ct.RangeDim(1, max_src_len)))],
    compute_units=ct.ComputeUnit.ALL,
)
encoder_mlmodel.save("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/opus-mt-en-hi Converted/MarianEncoder.mlpackage")

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.
  is_causal = query.shape[2] > 1 and attention_mask is None and getattr(module, "is_causal", True)
When both 'convert_to' and 'minimum_deployment_target' not specified, 'convert_to' is set to "mlprogram" and 'minimum_deployment_target' is set to ct.target.iOS15 (which is same as ct.target.macOS12). Note: the model will not run on systems older than iOS15/macOS12/watchOS8/tvOS15. In order to make your model run on older system, please set the 'minimum_deployment_target' to iOS14/iOS13. Details please see the link: https://apple.github.io/coremltools/docs-guides/source/target-conversion-formats.html
Converting PyTorch Frontend ==> MIL Ops:   9%|▉         | 20/228 [00:00<00:01, 126.94 ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.
Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 227/228 [00:00<0

In [None]:

# ---- Decoder ----
max_tgt_len = 50
hidden_size = model.config.d_model

decoder_input_ids = torch.ones((1, max_tgt_len), dtype=torch.long)   # decoding 5 tokens
encoder_hidden_states = torch.ones((1, max_src_len, hidden_size), dtype=torch.float32)

decoder_traced = torch.jit.trace(decoder_wrapper, (decoder_input_ids, encoder_hidden_states))
decoder_mlmodel = ct.convert(
    decoder_traced,
    inputs=[
        ct.TensorType(name="decoder_input_ids", shape=(1, ct.RangeDim(1, max_tgt_len))),
        ct.TensorType(name="encoder_hidden_states", shape=(1, ct.RangeDim(1, max_src_len), hidden_size)),
        # ct.TensorType(name="decoder_input_ids", shape=(1, 50)),
        # ct.TensorType(name="encoder_hidden_states", shape=(1, 50, hidden_size)),
    ],
    compute_units=ct.ComputeUnit.ALL,
)
decoder_mlmodel.save("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/opus-mt-en-hi Converted/MarianDecoder.mlpackage")

# Checking the exported models

In [2]:
import coremltools as ct
from coremltools.models import MLModel
import numpy as np

In [2]:
encoder = ct.models.MLModel("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/opus-mt-en-hi Converted/MarianEncoder.mlpackage")
decoder = ct.models.MLModel("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/opus-mt-en-hi Converted/MarianDecoder.mlpackage")

In [None]:
decoder_input = np.ones((1, 50), dtype=np.float32)
encoder_hidden = np.random.randn(1, 50, 512).astype(np.float32)

decoder_input_array = ct.TensorType(shape=decoder_input.shape, dtype=np.float32)
encoder_hidden_array = ct.TensorType(shape=encoder_hidden.shape, dtype=np.float32)

out = decoder.predict({
    "decoder_input_ids": decoder_input_array,
    "encoder_hidden_states": encoder_hidden_array
})

In [3]:
tokens = [244, 23, 4, 8496, 765, 3, 0]  # your encoded token IDs
tokens_array = np.array(tokens, dtype=np.float32).reshape(1, -1)  # shape [1, seq_len]

# For decoder, first token is <pad>
start_token = np.array([61949], dtype=np.float32).reshape(1, 1)  # shape [1,1]encoder.predict(["input_ids": tokens])

In [4]:
encoder_input = {"input_ids": tokens_array}
encoder_output = encoder.predict(encoder_input)

# Usually output key is 'var_396' or check encoder.output_description
hidden_states = encoder_output["var_396"]
print("Encoder output shape:", hidden_states.shape)

Encoder output shape: (1, 7, 512)


loc("/Users/arnabdey/Library/Caches/python/com.apple.e5rt.e5bundlecache/24E263/8E3EADF9EA3EBC25650E1BA07543095B9FCADF437A4BAB9CE1E6B263A84A8E3B/02E24F99A828CE575EB4193AA01D5B26C2D6948FDBDD178FA46746899EA5BFBE.bundle/H14G.bundle/main/main_mps_graph/main_mps_graph.mpsgraphpackage/model_0.mpsgraph":0:0): error: attempting to parse a byte at the end of the bytecode


## Convert Newly

In [84]:
batch_size = 1
seq_len = 128
hidden_dim = 512
vocab_size = 61950

dummy_input_ids = torch.randint(0, vocab_size, (batch_size, seq_len))
dummy_encoder_hidden = torch.randn(batch_size, seq_len, hidden_dim)

In [91]:
import coremltools as ct

encoder_wrapper.eval()

traced_encoder = torch.jit.trace(encoder_wrapper, dummy_input_ids)

encoder_model = ct.convert(
    traced_encoder,
    inputs=[ct.TensorType(name="input_ids", shape=(1, ct.RangeDim(1, 128)), dtype=np.int32)],
    convert_to="mlprogram",
    minimum_deployment_target=ct.target.iOS17,
    compute_precision=ct.precision.FLOAT32
)

encoder_model.save("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/Converted MLPackages (new)/MarianEncoder.mlpackage")
print("Saved MarianEncoder.mlpackage")

Converting PyTorch Frontend ==> MIL Ops:   0%|          | 0/228 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.
Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 227/228 [00:00<00:00, 513.39 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 122.24 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 90.98 passes/s] 
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 283.27 passes/s]


Saved MarianEncoder.mlpackage


In [96]:
decoder_wrapper.eval()

traced_decoder = torch.jit.trace(decoder_wrapper, (dummy_input_ids, dummy_encoder_hidden))

decoder_model = ct.convert(
    traced_decoder,
    inputs=[
        ct.TensorType(name="decoder_input_ids", shape=(1, 128), dtype=np.int32),
        ct.TensorType(name="encoder_hidden_states", shape=(1, 128, 512), dtype=np.float32)
    ],
    convert_to="mlprogram",
    minimum_deployment_target=ct.target.iOS17,
    compute_precision=ct.precision.FLOAT32
)

decoder_model.save("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/Converted MLPackages (new)/MarianDecoder.mlpackage")
print("Saved MarianDecoder.mlpackage")

Converting PyTorch Frontend ==> MIL Ops:   0%|          | 0/561 [00:00<?, ? ops/s]Core ML embedding (gather) layer does not support any inputs besides the weights and indices. Those given will be ignored.
Saving value type of int64 into a builtin type of int32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 560/561 [00:00<00:00, 3695.71 ops/s]
Running MIL frontend_pytorch pipeline:   0%|          | 0/5 [00:00<?, ? passes/s]Saving value type of int64 into a builtin type of int32, might lose precision!
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 81.33 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:01<00:00, 75.42 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 139.17 passes/s]


Saved MarianDecoder.mlpackage


Test the converted models

In [97]:
ml_encoder = ct.models.MLModel("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/Converted MLPackages (new)/MarianEncoder.mlpackage")
ml_decoder = ct.models.MLModel("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Convert to coreML/Converted MLPackages (new)/MarianDecoder.mlpackage")

with open("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Models/opus-mt-en-hi/vocab.json", "r") as f:
    vocab = json.load(f)
id_to_token = {v: k for k, v in vocab.items()}


In [93]:
enc_input = dummy_input_ids.numpy().astype(np.int32)

enc_out = ml_encoder.predict({"input_ids": enc_input})

for k, v in enc_out.items():
    print(k, type(v), getattr(v, "shape", None))

var_396 <class 'numpy.ndarray'> (1, 128, 512)


loc("/Users/arnabdey/Library/Caches/python/com.apple.e5rt.e5bundlecache/24E263/36B4862F069ADC74E420276BE9921F6ADE8E6493594C4AF2DCCF663F5AD0DC30/DE31E3B0A783900602BBA4774C406682DAEE61A582245BAA721ACD5675F4BFDE.bundle/H14G.bundle/main/main_mps_graph/main_mps_graph.mpsgraphpackage/model_0.mpsgraph":0:0): error: attempting to parse a byte at the end of the bytecode


In [None]:
dec_input = dummy_input_ids.numpy().astype(np.int32)
encoder_hidden = enc_out["var_396"].astype(np.float32)

dec_out = ml_decoder.predict({
    "decoder_input_ids": dec_input,
    "encoder_hidden_states": encoder_hidden
})

for k, v in dec_out.items():
    print(k, type(v), getattr(v, "shape", None))

In [77]:
print(dec_out['var_829'].shape)

(1, 50, 61950)


In [99]:
text = "I am a man with a big dick."
decoder_start_token = np.array([[61949 for _ in range(128)]], dtype=np.float32)
tokenizer = MarianTokenizer(source_spm="/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Models/opus-mt-en-hi/source.spm", target_spm="/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Models/opus-mt-en-hi/target.spm", vocab="/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Models/opus-mt-en-hi/vocab.json")

enc_input = np.array([tokenizer.encode(text)], dtype=np.float32)
# print(enc_input)

enc_out = ml_encoder.predict({"input_ids": enc_input})
encoder_hidden = np.pad(enc_out['var_396'], ((0, 0), (0, 128 - enc_input.shape[1]), (0, 0)), mode='constant', constant_values=0)
# print(encoder_hidden.shape)
dec_out = ml_decoder.predict({"decoder_input_ids": decoder_start_token, "encoder_hidden_states": np.array(encoder_hidden, dtype=np.float16)})
last_token_logits = np.exp(dec_out['var_829'][:, -1, :])
probabilities = last_token_logits[0] / np.sum(last_token_logits)
# print(dec_out['var_829'])

loc("/Users/arnabdey/Library/Caches/python/com.apple.e5rt.e5bundlecache/24E263/9AD997F09B45F6F92BC492ECFAE7C92D25FDCA1C670D6F6C037BE1ECAAB7B15A/C690509FC1C5576073E7FDD04FE200A05BF8DC8138A64145CF0B044236E8FED1.bundle/H14G.bundle/main/main_mps_graph/main_mps_graph.mpsgraphpackage/model_0.mpsgraph":0:0): error: attempting to parse a byte at the end of the bytecode


In [101]:
output_till_now = [61949]

for i in range(20):
    decoder_inputs = np.array([output_till_now + [61949 for _ in range(128 - len(output_till_now))]], dtype=np.float32)
    dec_out = ml_decoder.predict({"decoder_input_ids": np.array(decoder_inputs, dtype=np.int32), "encoder_hidden_states": np.array(encoder_hidden, dtype=np.float16)})
    last_token_logits = np.exp(dec_out['var_829'][:, i, :])
    probabilities = last_token_logits[0] / np.sum(last_token_logits)
    index = np.argmax(probabilities)
    print(id_to_token[index])
    output_till_now.append(index)
    if(id_to_token[index] == '</s>'):
        break

▁मैं
▁एक
▁बड़े
▁डिक
▁के
▁साथ
▁एक
▁आदमी
▁हूँ
।
</s>


In [64]:
print(enc_input)

[[2.440e+02 2.300e+01 4.000e+00 8.496e+03 7.650e+02 2.200e+01 0.000e+00]]


# Get Start Token ID

In [16]:
import json

with open("/Users/arnabdey/Documents/MyCodes/Python Folders/Personal Projects/FastVLM/Models/opus-mt-en-hi/vocab.json", encoding="utf-8") as f:
    vocab = json.load(f)

In [17]:
matches = {k: v for k, v in vocab.items() if "pad" in k.lower()}

if matches:
    for token, idx in matches.items():
        print(f"{token}: {idx}")

▁spades: 2939
pad: 14586
▁padded: 31332
▁Pad: 34687
▁pads: 41055
▁padding: 42866
▁Cappadocia: 45741
▁Padding: 51723
▁Launchpad: 57832
▁Paddan: 59225
<pad>: 61949
