# Convert Spacy Model to CoreML
Google Search "Spacy export model to coreml python"

## References
- [CoreML Tools](https://apple.github.io/coremltools/docs-guides/)
- [CoreML Tools API](https://apple.github.io/coremltools/index.html)
- [Converting Models to Core ML](https://huggingface.co/blog/fguzman82/frompytorch-to-coreml)
- [PyTorch to CoreML Conversion](https://coremltools.readme.io/v6.3/docs/pytorch-conversion)

### Imports

In [69]:
import spacy
from spacy import displacy
import torch
import torch.nn as nn

### Load the spaCy Model

In [82]:
# nlp = spacy.load('./training/ner_and_textcat')
nlp = spacy.load("en_core_web_lg")
doc = nlp("Hide all walls")
displacy.render(doc, style="ent", jupyter=True)
print(doc.cats)
print(doc._.trf_data)

{}
None


### Wrap the spaCy model in a PyTorch Model

In [87]:
# Extract word vectors
word_vectors = torch.FloatTensor(nlp.vocab.vectors.data)
vocab_size = len(nlp.vocab) # Size of the dictionary of embeddings
embedding_dim = 50265 # the size of each embedding vector

dim = nlp.vocab.vectors_length
print(vocab_size, dim)

# Define a PyTorch model
class PyTorchWrapper(nn.Module):
    def __init__(self, vocab_size, embedding_dim, pretrained_embeddings):
        super(PyTorchWrapper, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim).from_pretrained(pretrained_embeddings)
        # Add other layers as needed, e.g., LSTM, Linear, etc.
        
    def forward(self, x):
        embedded = self.embedding(x)
        # Pass through other layers
        return embedded
    
# Instantiate the model
vocab_size, embedding_dim = word_vectors.shape
torch_model = PyTorchWrapper(vocab_size, embedding_dim, word_vectors)
print(word_vectors, vocab_size, embedding_dim)
# Set the model in evaluation mode.
torch_model.eval()
torch_model

766 300
tensor([[-1.9705, -0.5765,  0.0749,  ...,  0.9095, -0.2952, -0.6983],
        [-1.7670, -0.2134, -0.4022,  ..., -0.3434,  0.3903,  0.0737],
        [-1.7242, -0.3706,  0.9299,  ..., -0.3086,  0.0218, -1.2368],
        ...,
        [ 2.3345, -1.5065, -0.2194,  ...,  0.5823, -0.6457,  0.3587],
        [ 2.4056,  0.0566,  0.4939,  ..., -1.0263, -1.5310,  0.9550],
        [ 2.8327, -0.4540,  0.7826,  ..., -0.5384, -0.0881, -0.1755]]) 342918 300


PyTorchWrapper(
  (embedding): Embedding(342918, 300)
)

In [86]:
# Trace the model
import torchvision

vocab_size = len(nlp.vocab) # Size of the dictionary of embeddings
dummy_input = torch.randint(0, vocab_size, (1, 128))
traced_model = torch.jit.trace(torch_model, dummy_input)
traced_model

PyTorchWrapper(
  original_name=PyTorchWrapper
  (embedding): Embedding(original_name=Embedding)
)

### Script the Model

In [None]:
from coreml import (
    script_model,
    convert_to_coreml
)
scripted = script_model(torch_model)
scripted

### Convert the scripted model to CoreML

In [None]:
coreml_model = convert_to_coreml(scripted, (1, 1))
coreml_model

### Save the model

In [None]:
coreml_model.save("construction.mlpackage")

In [45]:
nlp.vocab
components = nlp.components
for component in components:
    print(component)

('transformer', <spacy_curated_transformers.pipeline.transformer.CuratedTransformer object at 0x15768de20>)
('tagger', <spacy.pipeline.tagger.Tagger object at 0x15784d760>)
('parser', <spacy.pipeline.dep_parser.DependencyParser object at 0x157691cf0>)
('attribute_ruler', <spacy.pipeline.attributeruler.AttributeRuler object at 0x1579ac180>)
('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer object at 0x1578495c0>)
('ner', <spacy.pipeline.ner.EntityRecognizer object at 0x157691f20>)
('entity_ruler', <spacy.pipeline.entityruler.EntityRuler object at 0x157849740>)
('textcat', <spacy.pipeline.textcat.TextCategorizer object at 0x15784da00>)


In [49]:
vocab_size = len(nlp.vocab)
dummy_input = torch.randint(0, vocab_size, (1, 128))
dummy_input

tensor([[397, 123, 566, 221, 648,  49, 100,  17, 350, 455, 569, 130, 411, 101,
          97, 506, 325, 631, 267, 519, 110, 190,  13, 178, 513, 615, 739, 113,
         717, 685, 114,  97, 411, 676, 264,  34, 437,   4, 649, 540, 263, 150,
         157, 566, 140, 730, 137, 202,  84, 249, 514,  22, 615, 690,   1, 640,
         174,  37, 156, 296, 227, 622, 197, 124,  31, 472, 293, 481,  25, 217,
         107, 615, 620, 289, 483, 225, 302, 424,  10, 455, 658, 215, 624, 616,
         395, 449, 508, 458, 281, 436,  29, 233, 526, 695, 737, 288, 307, 247,
         219, 323, 261, 632, 129,  70, 581, 732, 548, 480, 208, 580,  64, 573,
         377, 160, 570, 281, 481, 385, 582, 295, 569, 516, 558, 278, 122, 338,
         407, 597]])

In [74]:
dim = nlp.vocab.vectors_length
dim

0