In [1]:
# Step 1: Install dependencies (if not already installed)
!pip install torch torchvision timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
# Step 2: Import necessary libraries
import torch
import timm
import torchvision.transforms as transforms
from PIL import Image
import requests

In [32]:
# Step 3: Load Pretrained MobileViT v2 from TIMM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = timm.create_model("mobilevitv2_100", pretrained=True).to(device)  # 100 means 1.0x width scaling
model.head.fc = torch.nn.Linear(in_features=512, out_features=1)  # Model should output only 2 values
model.eval()

ByobNet(
  (stem): ConvNormAct(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNormAct2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
  )
  (stages): Sequential(
    (0): Sequential(
      (0): BottleneckBlock(
        (conv1_1x1): ConvNormAct(
          (conv): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
        )
        (conv2_kxk): ConvNormAct(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
   

In [33]:
# Get total number of parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total Parameters: {total_params / 1e6:.2f}M")  # Convert to million (M)

Total Parameters: 4.39M


In [34]:
# Check the model's size

import os

# Save the model to a temporary file
torch.save(model.state_dict(), "mobilevitv2.pth")

# Get the file size
model_size = os.path.getsize("mobilevitv2.pth") / (1024 * 1024)  # Convert bytes to MB
print(f"Model Size: {model_size:.2f} MB")

Model Size: 16.90 MB


In [35]:
# Step 4: Load a Sample Image for Testing
image_path = "//content/images.jpeg"
image = Image.open(image_path)

In [36]:
image.show()

In [37]:
# Step 5: Preprocess the Image
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to match model input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard ImageNet normalization
])
input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

In [38]:
# Step 6: Run Inference
with torch.no_grad():
    output = model(input_tensor)
    predictions = torch.nn.functional.softmax(output[0], dim=0)

In [39]:
predictions.shape

torch.Size([1])

In [40]:
predictions

tensor([1.])

In [42]:
!pip install pygoogle_image

Collecting pygoogle_image
  Downloading pygoogle_image-1.0.0.tar.gz (3.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
INFO: pip is looking at multiple versions of pygoogle-image to determine which version is compatible with other requirements. This could take a while.
[31mERROR: Could not find a version that satisfies the requirement python-magic-bin==0.4.14 (from pygoogle-image) (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for python-magic-bin==0.4.14[0m[31m
[0m

In [43]:
from pygoogle_image import image as pi

def download_images(keyword, num_images=5):
    # Create a downloader object
    downloader = pi.download

    # Download images
    downloader(keyword, limit=num_images)

# Example usage
download_images("good morning", num_images=5)


ModuleNotFoundError: No module named 'pygoogle_image'