In [1]:
# the custom model
import torch
import torch.nn as nn
import my_extension_cpp
from my_extension import (
    CustomLinear, 
    CustomReLU
)

In [2]:
# test the matrix_multiply function
a = torch.tensor(
    [[1., 2., 3.],
     [2., 3., 4.]]
).to('mps')
b = torch.tensor(
    [[4., 5.],
     [6., 7.],
     [8., 9.]]
).to('mps')

result = my_extension_cpp.matrix_multiply(a, b)
result_2 = a @ b

result == result_2

tensor([[True, True],
        [True, True]], device='mps:0')

In [3]:
result

tensor([[40., 46.],
        [58., 67.]], device='mps:0')

In [4]:
import torch
from torch.autograd import gradcheck
from my_extension import CustomLinearFunction  # Assuming this is your custom function

# Convert to float32 for single precision
input_features = torch.randn((10, 3), dtype=torch.float64, requires_grad=True)
weight = torch.randn((2, 3), dtype=torch.float64, requires_grad=True)

# Move your tensors to the appropriate device
# = input_features#.to('mps')
#weight = weight#.to('mps')

# Use a higher epsilon and atol because float32 is less precise than float64
test = gradcheck(CustomLinearFunction.apply, (input_features, weight), eps=1e-3, atol=1e-2, raise_exception=True)
print(test)


RuntimeError: input must be a MPS tensor

In [9]:
# try the Relus

sr = nn.functional.relu(a)

cr = my_extension_cpp.relu(a)

In [10]:
cr

tensor([[1., 2., 3.],
        [2., 3., 4.]], device='mps:0')

In [11]:
(cr == sr).sum()

tensor(6, device='mps:0')

In [12]:
sr.numel()

6

In [14]:
a = torch.tensor(
    [[1., 2., 3.],
     [2., 3., 4.],
     [2., 3., 4.],
     [2., 3., 4.],
     [-2., -3., -4.],
     [2., 3., 4.]]
).to('mps')


sr = nn.functional.relu(a)
test_length = 1000

wrong = []
right = []
for i in range(0, test_length):
    cr = my_extension_cpp.relu(a)
    if (cr == sr).sum() != sr.numel():
        wrong.append(1)
    else: 
        right.append(1)

print(f"{(len(wrong)/test_length)*100}% wrong, and {(len(right)/test_length)*100}% right")

0.0% wrong, and 100.0% right


In [20]:
cr = my_extension_cpp.relu(a)

In [24]:
cr

tensor([[1., 2., 3.],
        [2., 3., 4.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], device='mps:0')

tensor([[1., 2., 3.],
        [2., 3., 4.]], device='mps:0')

In [2]:
import torch

# Settings
num_features = 1
num_samples = 100  # Number of data points
noise_factor = 0.1  # Noise factor for output data

# Generate data for a single feature
inputs = torch.linspace(-1, 1, steps=num_samples).unsqueeze(1)  # Shape: [num_samples, 1]

# Add a little noise to inputs
inputs += torch.randn(inputs.shape) * noise_factor

# Normalize and center the input data
inputs_normalized = (inputs - inputs.mean()) / inputs.std()

# Create a simple linear relationship (y = mx + b) with some noise
m = torch.tensor([2.0])  # Slope
b = torch.tensor([1.0])  # Intercept

# Generate the target output with noise
targets = m * inputs_normalized + b
targets += torch.randn(targets.shape) * noise_factor  # Adding noise

target_mean = targets.mean()
shifted_targets = targets - target_mean

# Split into training and testing sets
train_inputs = inputs_normalized[:10]  # 80% for training
train_outputs = shifted_targets[:10]
test_inputs = inputs_normalized[90:]  # 20% for testing
test_outputs = shifted_targets[90:]

train_inputs = train_inputs.to('mps')
train_outputs = train_outputs.to('mps')


In [3]:
c = CustomReLU()

In [4]:
c.to('mps')

CustomReLU(MPS-based ReLU)

In [5]:
t = c(train_inputs)

In [6]:
train_inputs

tensor([[-1.8692],
        [-1.6171],
        [-1.3498],
        [-1.5872],
        [-1.7129],
        [-1.6019],
        [-1.6590],
        [-1.3591],
        [-1.0200],
        [-1.4112]], device='mps:0')

In [7]:
t

tensor([[-0.0187],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000]], device='mps:0')

In [8]:
from torch.nn import ReLU

In [9]:
r = ReLU()
r.to('mps')

ReLU()

In [11]:
tt = r(train_inputs)

In [12]:
tt

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='mps:0')

In [14]:
tt.dtype

torch.float32

In [26]:
# the custom model
import torch
import torch.nn as nn
from my_extension import CustomReLU

cr = CustomReLU()
cr.to('mps')

sr = nn.LeakyReLU()
sr.to('mps')

test_input = torch.tensor([[-1., 1., -1., 1., -1]], requires_grad=True).to('mps')

cr_output = cr(test_input)

sr_output = sr(test_input)

cr_output == sr_output

In [24]:
sr_output

tensor([[-0.0100,  1.0000, -0.0100,  1.0000, -0.0100]], device='mps:0',
       grad_fn=<LeakyReluBackward0>)

In [39]:
import torch
import torch.nn as nn
from my_extension import CustomReLU

# Initialize both ReLU implementations and move them to the appropriate device
cr = CustomReLU().to('mps')
sr = nn.LeakyReLU().to('mps')

# Prepare a test input tensor with requires_grad=True to track gradients
test_input = torch.tensor([[-8000., 1000.6556, -.0005643, 8., 1.000]], requires_grad=True).to('mps')
test_input.retain_grad()
# Forward pass through CustomReLU
cr_output = cr(test_input)
# Perform a backward pass through CustomReLU
cr_output.sum().backward()  # Use sum() to ensure scalar output for backward

# Save the gradient of the input tensor after CustomReLU backward pass
cr_grad = test_input.grad.clone()

# Zero out gradients in test_input for a fresh backward pass
test_input.grad.zero_()

# Forward pass through PyTorch's LeakyReLU
sr_output = sr(test_input)
# Perform a backward pass through PyTorch's LeakyReLU
sr_output.sum().backward()  # Use sum() to ensure scalar output for backward

# Save the gradient of the input tensor after LeakyReLU backward pass
sr_grad = test_input.grad.clone()

# Compare the gradients from both backward passes
are_gradients_equal = torch.equal(cr_grad, sr_grad)
print(f"Are gradients equal? {are_gradients_equal}")


Are gradients equal? True


In [41]:
cr_grad

tensor([[0.0100, 1.0000, 0.0100, 1.0000, 1.0000]], device='mps:0')