# MobileNetV2: PyTorch to CoreML Conversion

In [1]:
pip install torch torchvision coremltools numpy pillow

Collecting torch
  Downloading torch-2.6.0-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.6.0-cp312-none-macosx_11_0_arm64.whl (66.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.5/66.5 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading sympy-1.13.1-py3-none-any.whl (6.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading torchvision-0.21.0-cp312-cp312-macosx_11_0_arm64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sympy, torch, torchvision
  Attempting uninstall: sympy
    Found existing installation:

In [3]:
pip install --upgrade "optree>=0.13.0"

Collecting optree>=0.13.0
  Downloading optree-0.14.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (47 kB)
Downloading optree-0.14.0-cp312-cp312-macosx_11_0_arm64.whl (335 kB)
Installing collected packages: optree
  Attempting uninstall: optree
    Found existing installation: optree 0.12.1
    Uninstalling optree-0.12.1:
      Successfully uninstalled optree-0.12.1
Successfully installed optree-0.14.0
Note: you may need to restart the kernel to use updated packages.


In [35]:
import torch
import torchvision
import torchvision.transforms as transforms
import os
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision.models import MobileNet_V2_Weights

In [37]:
# Define ASL dataset path
dataset_path = "/Users/vivek/Developer/Python-Workspace/SignSpeak/Datasets/Raw/AmericanSignLanguage_Dataset"  # Path to ASL dataset containing subfolders

In [39]:
# ✅ Step 2: Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])


In [41]:
# ✅ Step 3: Load the dataset
full_dataset = ImageFolder(root=dataset_path, transform=transform)

In [43]:
# ✅ Step 4: Split dataset (from .ipynb)
train_size = int(0.8 * len(full_dataset))  
val_size = len(full_dataset) - train_size  
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

In [45]:
# ✅ Step 5: Create DataLoaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [47]:
# ✅ Step 6: Print class names
num_classes = len(full_dataset.classes)
print(f"Number of classes: {num_classes}, Train Samples: {len(train_dataset)}, Validation Samples: {len(val_dataset)}")

Number of classes: 36, Train Samples: 113808, Validation Samples: 28453


In [49]:
# ✅ Step 1: Set up the device (M1 Pro GPU)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)
print(f"Using device: {device}")


Using device: mps


In [51]:
# ✅ Step 2: Load MobileNetV2 (Pretrained & Modify)
model = models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)
model.classifier[1] = nn.Linear(1280, num_classes)  
model = model.to(device)

In [53]:
# ✅ Step 3: Define Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [55]:
import sys
import torch

num_epochs = 2  # Adjust as needed

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        # ✅ Print real-time batch progress on the same line
        print(f"\rEpoch [{epoch+1}/{num_epochs}] | Batch [{batch_idx+1}/{len(train_loader)}] "
              f"| Loss: {loss.item():.4f} | Accuracy: {100 * correct / total:.2f}%", end="", flush=True)

    # ✅ Print final epoch summary on a new line
    accuracy = 100 * correct / total
    print(f"\n✅ Epoch [{epoch+1}/{num_epochs}] Complete! Avg Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

print("🎉 Training complete.")


Epoch [1/2] | Batch [7113/7113] | Loss: 0.0180 | Accuracy: 97.95%
✅ Epoch [1/2] Complete! Avg Loss: 0.0753, Accuracy: 97.95%
Epoch [2/2] | Batch [7113/7113] | Loss: 0.0006 | Accuracy: 99.36%
✅ Epoch [2/2] Complete! Avg Loss: 0.0233, Accuracy: 99.36%
🎉 Training complete.


In [57]:
model_path = "mobilenetv2_asl_trained.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved at {model_path}")

Model saved at mobilenetv2_asl_trained.pth


In [59]:
# ✅ Load the trained model
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()  

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [61]:
# ✅ Create a sample input tensor
example_input = torch.rand(1, 3, 224, 224).to(device)

# ✅ Convert the model to TorchScript
traced_model = torch.jit.trace(model, example_input)
torchscript_model_path = "mobilenetv2_asl_traced.pt"
traced_model.save(torchscript_model_path)
print(f"TorchScript model saved at {torchscript_model_path}")


TorchScript model saved at mobilenetv2_asl_traced.pt


In [63]:
import coremltools as ct

# ✅ Convert TorchScript model to Core ML
mlmodel = ct.convert(
    traced_model,
    convert_to="mlprogram",
    inputs=[ct.TensorType(shape=example_input.shape)]
)

# ✅ Save Core ML Model
mlmodel_path = "mobilenetv2_asl.mlpackage"
mlmodel.save(mlmodel_path)
print(f"Core ML model saved at {mlmodel_path}")


TensorFlow version 2.16.2 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.
Torch version 2.6.0 has not been tested with coremltools. You may run into unexpected errors. Torch 2.5.0 is the most recent version that has been tested.
Converting PyTorch Frontend ==> MIL Ops: 100%|██████████████████████████████████████████████████████████████████████████████▊| 384/385 [00:00<00:00, 6754.37 ops/s]
Running MIL frontend_pytorch pipeline: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 166.68 passes/s]
Running MIL default pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 89/89 [00:00<00:00, 127.19 passes/s]
Running MIL backend_mlprogram pipeline: 100%|████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 285.80 passes/s]


Core ML model saved at mobilenetv2_asl.mlpackage


In [71]:
# Get the first input name from Core ML model
correct_input_name = list(mlmodel.input_description)[0]

# ✅ Use the correct input name in the prediction
test_input = torch.rand(1, 3, 224, 224).numpy()
coreml_out_dict = mlmodel.predict({correct_input_name: test_input})  # Use correct input name
coreml_out = list(coreml_out_dict.values())[0]

print("Core ML Model Output:", coreml_out)


Core ML Model Output: [[ 0.02851868 -0.12524414 -0.14526367 -0.29736328 -0.13061523 -0.32373047
  -0.16699219 -0.15222168 -0.18017578 -0.14343262 -0.2849121  -0.1204834
  -0.17565918 -0.25634766 -0.19311523 -0.13293457 -0.03512573 -0.07391357
  -0.07781982 -0.20715332 -0.24523926 -0.2775879  -0.06622314 -0.1262207
  -0.02119446 -0.01434326 -0.02287292  0.04129028  0.12469482  0.05697632
   0.0826416   0.01629639  0.18151855  0.00340271 -0.06530762  0.07891846]]
