<a href="https://colab.research.google.com/github/Ashishgy/MapReduce_Flight/blob/main/ShoeDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

def tiled_matrix_multiply(A, B, tile_size):
    """
    Perform tiled matrix multiplication on two matrices A and B.
    Args:
        A (numpy.ndarray): Input matrix of shape (M, N)
        B (numpy.ndarray): Input matrix of shape (N, P)
        tile_size (int): Size of the tiles for blocking

    Returns:
        numpy.ndarray: Resultant matrix after tiled multiplication
    """
    M, N = A.shape
    _, P = B.shape
    C = np.zeros((M, P))

    for i in range(0, M, tile_size):
        for j in range(0, P, tile_size):
            for k in range(0, N, tile_size):
                A_tile = A[i:i+tile_size, k:k+tile_size]
                B_tile = B[k:k+tile_size, j:j+tile_size]
                if A_tile.shape[1] == B_tile.shape[0]:
                    C[i:i+tile_size, j:j+tile_size] += np.dot(A_tile, B_tile)
    return C


In [None]:
import torch
import torch.nn as nn

class Kn2RowConv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, tile_size=4):
        super(Kn2RowConv2D, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.tile_size = tile_size
        self.weight = nn.Parameter(
            torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        )
        self.bias = nn.Parameter(torch.zeros(out_channels))

    def forward(self, x):
        batch_size, _, height, width = x.shape
        output_height = height - self.kernel_size + 1
        output_width = width - self.kernel_size + 1
        output = torch.zeros((batch_size, self.out_channels, output_height, output_width)).to(x.device)

        for i in range(output_height):
            for j in range(output_width):
                patch = x[:, :, i:i+self.kernel_size, j:j+self.kernel_size]
                patch_flat = patch.reshape(batch_size, -1).cpu().numpy()
                kernel_flat = self.weight.view(self.out_channels, -1).cpu().numpy()
                result = np.zeros((batch_size, self.out_channels))
                for b in range(batch_size):
                    result[b, :] = tiled_matrix_multiply(patch_flat[b:b+1], kernel_flat.T, self.tile_size)
                output[:, :, i, j] = torch.from_numpy(result).to(x.device) + self.bias
        return output


In [None]:
!git clone https://github.com/KLab-AI3/ai3.git

Cloning into 'ai3'...
remote: Enumerating objects: 2895, done.[K
remote: Counting objects: 100% (549/549), done.[K
remote: Compressing objects: 100% (227/227), done.[K
Receiving objects: 100% (2895/2895), 2.25 MiB | 1.17 MiB/s, done.
remote: Total 2895 (delta 314), reused 508 (delta 283), pack-reused 2346 (from 1)[K
Resolving deltas: 100% (1852/1852), done.


In [None]:
%cd ai3

/content/ai3


In [None]:
!pip install .


Processing /content/ai3
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: aithree
  Building wheel for aithree (pyproject.toml) ... [?25l[?25hdone
  Created wheel for aithree: filename=aithree-0.1.1.dev43+gba1f94a-cp310-cp310-linux_x86_64.whl size=210162 sha256=557accaa29c45ab90fb2445f52435c926f93d71128608e2855c2a5cdcde63210
  Stored in directory: /tmp/pip-ephem-wheel-cache-z63_pdip/wheels/ce/ca/00/fcb63f9c96e189c6705081053bd64df2530a49bd2a1ab75052
Successfully built aithree
Installing collected packages: aithree
Successfully installed aithree-0.1.1.dev43+gba1f94a


In [None]:
import ai3
print("SYCL available:", ai3.using_sycl())

AttributeError: module 'ai3' has no attribute 'using_sycl'

In [None]:
try:
    import ai3
    print("AI3 module imported successfully!")
except ImportError as e:
    print(f"Error importing AI3: {e}")


AI3 module imported successfully!


In [None]:
import ai3
print(dir(ai3))  # Lists top-level modules in ai3


['AlgorithmicSelector', 'DEFAULT_ALGOS', 'FROM_BACKEND', 'Mapping', 'Model', 'Optional', 'SUPPORTED_ALGORITHMS', 'SUPPORTED_FROM_BACKENDS', 'Sequence', 'Tensor', 'Type', 'Union', '__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_core', '_version', 'convert', 'errors', 'layers', 'swap_conv2d', 'swap_operation', 'tensor', 'using_cublas', 'using_cudnn', 'using_mps_and_metal', 'using_sycl', 'utils']


In [None]:
!pip show aithree  # Check the version of AI3
!pip show ai3      # Sometimes, AI3 might have different names installed


Name: aithree
Version: 0.1.1.dev41+g55d79a3
Summary: Enables Algorithmic Selection and Customization in Deep Neural Networks
Home-page: 
Author: Timothy Cronin
Author-email: 
License: 
Location: /usr/local/lib/python3.10/dist-packages
Requires: packaging
Required-by: 
[0m

In [None]:
# Inspect all available attributes in the ai3 module
print(dir(ai3))

# Replace 'submodule_name' with any submodule or class name you suspect may contain CustomLayerBase
try:
    print(dir(ai3.Model))  # Example, replace 'Model' with another element from dir(ai3)
except AttributeError as e:
    print(f"Error: {e}")


['AlgorithmicSelector', 'DEFAULT_ALGOS', 'FROM_BACKEND', 'Mapping', 'Model', 'Optional', 'SUPPORTED_ALGORITHMS', 'SUPPORTED_FROM_BACKENDS', 'Sequence', 'Tensor', 'Type', 'Union', '__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_core', '_version', 'convert', 'errors', 'layers', 'swap_conv2d', 'swap_operation', 'tensor', 'using_cublas', 'using_cudnn', 'using_mps_and_metal', 'using_sycl', 'utils']
['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'predict']


In [None]:
def auto_selector(orig: torch.nn.Conv2d, input_shape) -> str:
    # Replace conditions with logic for selecting Kn2RowConv2D
    if orig.weight.shape[0] < 64:  # Example condition based on out_channels
        return 'kn2row'  # Your custom layer identifier
    return 'default'


In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device Name:", torch.cuda.get_device_name(0))
else:
    print("No GPU available.")


CUDA Available: True
Device Name: Tesla T4


In [None]:
x = torch.randn(1, 3, 224, 224).cuda()  # Allocate a tensor on the GPU
print(x)


tensor([[[[-1.1333, -0.6225,  1.2589,  ...,  0.3850, -0.1571,  1.3608],
          [-0.8926,  0.0979, -1.1809,  ..., -0.4242, -0.9301,  1.2949],
          [ 0.7827,  0.0434, -0.4755,  ...,  0.4912,  1.2967,  1.9732],
          ...,
          [ 0.6648,  3.5491, -1.2949,  ..., -0.1719,  0.3528,  0.6527],
          [ 1.5201,  0.4444, -0.2348,  ..., -0.9853,  0.0140, -1.6299],
          [-0.3857,  0.0757, -0.8600,  ..., -0.1934, -0.4202,  0.2986]],

         [[ 0.5053,  1.0663, -1.5253,  ..., -0.7076,  0.1849, -1.0714],
          [ 0.6356, -2.0651, -0.4954,  ..., -0.3259,  0.9171, -0.0283],
          [ 0.5759,  1.2492, -0.1867,  ..., -0.1382,  0.1565, -1.2226],
          ...,
          [ 0.4618, -0.4020,  1.4548,  ...,  0.3646, -0.4321,  0.1837],
          [ 0.4040, -1.7106, -1.0397,  ..., -0.2079,  0.0117, -0.0307],
          [ 0.0465, -2.1221, -1.9187,  ..., -1.0234,  2.3839,  1.1323]],

         [[-0.1619,  1.0789,  1.6024,  ..., -0.9592, -0.8061, -0.5897],
          [-1.8202, -0.5129, -

In [None]:
import torch
import numpy as np

class Kn2RowConv2D(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, tile_size=4):
        super(Kn2RowConv2D, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.tile_size = tile_size
        self.weight = torch.nn.Parameter(
            torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        )
        self.bias = torch.nn.Parameter(torch.zeros(out_channels))

    def forward(self, x):
        batch_size, _, height, width = x.shape
        output_height = height - self.kernel_size + 1
        output_width = width - self.kernel_size + 1
        output = torch.zeros((batch_size, self.out_channels, output_height, output_width)).to(x.device)

        for i in range(output_height):
            for j in range(output_width):
                patch = x[:, :, i:i+self.kernel_size, j:j+self.kernel_size]
                patch_flat = patch.reshape(batch_size, -1).cpu().numpy()
                kernel_flat = self.weight.view(self.out_channels, -1).cpu().numpy()
                result = np.zeros((batch_size, self.out_channels))
                for b in range(batch_size):
                    result[b, :] = self.tiled_matrix_multiply(patch_flat[b:b+1], kernel_flat.T)
                output[:, :, i, j] = torch.from_numpy(result).to(x.device) + self.bias
        return output

    def tiled_matrix_multiply(self, A, B):
        M, N = A.shape
        _, P = B.shape
        C = np.zeros((M, P))

        for i in range(0, M, self.tile_size):
            for j in range(0, P, self.tile_size):
                for k in range(0, N, self.tile_size):
                    A_tile = A[i:i+self.tile_size, k:k+self.tile_size]
                    B_tile = B[k:k+self.tile_size, j:j+self.tile_size]
                    C[i:i+self.tile_size, j:j+self.tile_size] += np.dot(A_tile, B_tile)
        return C


In [None]:
def auto_selector(orig: torch.nn.Conv2d, input_shape):
    # Replace conv2d with your custom Kn2RowConv2D
    if orig.out_channels < 64:  # Example condition
        return 'custom'
    return 'default'


In [None]:
# Import required libraries
import torch
import torchvision.models as models
import ai3

# Load the VGG16 model and ensure it is on GPU
vgg16 = models.vgg16(weights=models.VGG16_Weights.DEFAULT).eval().cuda()

# Generate input data and move it to the GPU
input_data = torch.randn(1, 3, 224, 224).cuda()

# Confirm that the model is on the GPU
print("Model device:", next(vgg16.parameters()).device)

# Confirm that the input is on the GPU
print("Input device:", input_data.device)

# Test the model to ensure it works
output = vgg16(input_data)
print("Output shape:", output.shape)


Model device: cuda:0
Input device: cuda:0
Output shape: torch.Size([1, 1000])


In [None]:
pip install --upgrade aithree




In [None]:
import torch
import torchvision.models as models
import ai3

# Load the VGG16 model and move it to GPU
vgg16 = models.vgg16(weights=models.VGG16_Weights.DEFAULT).eval().cuda()

# Define layer selector for AI3 conversion
def auto_selector(orig: torch.nn.Conv2d, input_shape):
    if orig.out_channels < 64:  # Example condition
        return 'custom'
    return 'default'

# Convert the model using AI3
model = ai3.convert(
    vgg16,
    {'conv2d': auto_selector},  # Replacement logic
    sample_input_shape=(1, 3, 224, 224)
)

# Move the entire converted model to GPU
model = model.cuda()

# Create input tensor and move it to GPU
input_data = torch.randn(1, 3, 224, 224).cuda()

# Ensure both input and model are on the same device
print("Input device:", input_data.device)
print("Model device:", next(model.parameters()).device)

# Run the converted model
output = model(input_data)
print("Output shape:", output.shape)


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/fx/passes/shape_prop.py", line 154, in run_node
    result = super().run_node(n)
  File "/usr/local/lib/python3.10/dist-packages/torch/fx/interpreter.py", line 203, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/fx/interpreter.py", line 320, in call_module
    return submod(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 554, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 549, in _co

RuntimeError: ShapeProp error for: node=%features_0 : [num_users=1] = call_module[target=features.0](args = (%x,), kwargs = {}) with meta={'nn_module_stack': OrderedDict([('features', ('features', <class 'torch.nn.modules.container.Sequential'>)), ('features.0', ('features.0', <class 'torch.nn.modules.conv.Conv2d'>))])}

While executing %features_0 : [num_users=1] = call_module[target=features.0](args = (%x,), kwargs = {})
Original traceback:
None