In [1]:
from azure.ai.ml import MLClient, Input, command, PyTorchDistribution
from azure.ai.ml.entities import Environment
from azure.ai.ml import MpiDistribution

from azure.identity import DefaultAzureCredential

# SUBSCRIPTION_ID = "2630c4b9-c627-494d-8a87-a5002e5e7f8e"
# RESOURCE_GROUP = "openai-ngt"
# WORKSPACE_NAME = "coraca"
# TRAIN_MLTABLE = "mltable-silver-validation"
# VAL_MLTABLE = "mltable-silver-validation"
# TRAIN_MLTABLE_VERSION = "2023.05.19.130010" 
# VAL_MLTABLE_VERSION = "2023.05.19.130010"

SUBSCRIPTION_ID = "2630c4b9-c627-494d-8a87-a5002e5e7f8e"
RESOURCE_GROUP = "GODZILLA"
WORKSPACE_NAME = "GODZILLA"
TRAIN_MLTABLE = "godzilla_mae_train"
VAL_MLTABLE = "godzilla_mae_val"
TRAIN_MLTABLE_VERSION = "1" 
VAL_MLTABLE_VERSION = "1"

credential = DefaultAzureCredential(
    exclude_shared_token_cache_credential=True,
    exclude_visual_studio_code_credential=False,
)
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WORKSPACE_NAME,
)

In [2]:
train_data_asset = ml_client.data.get(name=TRAIN_MLTABLE, version=TRAIN_MLTABLE_VERSION) #ml_client.data.get(name="toy_mltable_train",version="1")
val_data_asset = ml_client.data.get(name=VAL_MLTABLE, version=VAL_MLTABLE_VERSION) #ml_client.data.get(name="toy_mltable_val",version="1")
TRAIN_ASSET_ID = train_data_asset.id
VAL_ASSET_ID = val_data_asset.id

In [3]:
TRAIN_ASSET_ID, VAL_ASSET_ID

('/subscriptions/2630c4b9-c627-494d-8a87-a5002e5e7f8e/resourceGroups/GODZILLA/providers/Microsoft.MachineLearningServices/workspaces/GODZILLA/data/godzilla_mae_train/versions/1',
 '/subscriptions/2630c4b9-c627-494d-8a87-a5002e5e7f8e/resourceGroups/GODZILLA/providers/Microsoft.MachineLearningServices/workspaces/GODZILLA/data/godzilla_mae_val/versions/1')

In [6]:
from torch.utils.data import Dataset, get_worker_info
import mltable
import numpy as np

class ImageDatasetMP(Dataset):
    def __init__(
        self, 
        data_asset_id, 
        transform=None

        ):
        super(Dataset).__init__()

        self.data_asset_id = data_asset_id
        self.transform = transform
        self.first_time=True

    def _lazy_init(self):            
        #print(f"Trying to load mltable in lazy_init for worker {get_worker_info().id}...")
        training_data = mltable.load(f"azureml:/{self.data_asset_id}")
        self.df = training_data.to_pandas_dataframe() 

        print(f"Finished lazy_init.")
    
    def __len__(self):
        from multiprocessing import Process, Queue

        q = Queue()

        def __len(q):                
            data = mltable.load(f"azureml:/{self.data_asset_id}")
            data = data.to_pandas_dataframe()
            q.put((data['filename_fixed'].nunique()))

        p = Process(target=__len, args=(q, ))
        p.start()
        p.join()

        length = q.get()          
        return length
    
    def __getitem__(self, idx):
        
        if self.first_time:                        
            self._lazy_init()
            self.first_time = False
        img_path = self.df.loc[idx, 'filename_fixed']
        image = Image.open(img_path.open()).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return {'image' : image}

In [7]:
from torch.utils.data import Dataset, DataLoader,get_worker_info
from PIL import Image
import mltable
import torchvision
import torchvision.transforms as transforms

import numpy as np
import torchvision.transforms.functional as F
img_size = 224


class SquarePad:
    def __call__(self, image):
        """
        Square pads an image using whitespace.

        Returns:
            PIL Image or Tensor
        """

        w, h = image.size
        max_wh = np.max([w, h])
        hp = int((max_wh - w) / 2)
        vp = int((max_wh - h) / 2)
        padding = (hp, vp, hp, vp)

        return F.pad(image, padding, 0, "constant")

tfs = transforms.Compose([
            SquarePad(),
            transforms.Resize((img_size, img_size)),
            transforms.RandAugment(),
            transforms.RandomVerticalFlip(0.1),
            transforms.RandomHorizontalFlip(0.1),
            transforms.GaussianBlur(3),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
        ])

In [8]:
ds = ImageDatasetMP(data_asset_id=TRAIN_ASSET_ID,transform=tfs)

In [14]:
ds[0]

{'image': tensor([[[-0.9686, -0.9686, -0.9686,  ..., -0.9765, -0.9765, -0.9765],
          [-0.9686, -0.9608, -0.9686,  ..., -0.9686, -0.9686, -0.9686],
          [-0.9765, -0.9686, -0.9686,  ..., -0.9686, -0.9686, -0.9686],
          ...,
          [-0.9922, -0.9922, -0.9922,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]],
 
         [[-0.9686, -0.9686, -0.9686,  ..., -0.9765, -0.9765, -0.9765],
          [-0.9686, -0.9608, -0.9686,  ..., -0.9686, -0.9686, -0.9686],
          [-0.9765, -0.9686, -0.9686,  ..., -0.9686, -0.9686, -0.9686],
          ...,
          [-0.9922, -0.9922, -0.9922,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]],
 
         [[-0.9686, -0.9686, -0.9686,  ..., -0.9765, -0.9765, -0.9765],
          [-0.9686,

In [2]:
import timm 

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
timm.create_model('resnet18')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, m