# try basic network on mnist

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Reshape and add channel dimension
train_images = train_images.reshape((60000, 28, 28, 1))  # [B, H, W, C]
test_images = test_images.reshape((10000, 28, 28, 1))    # [B, H, W, C]

# Convert to [B, C, H, W] format  # [B, C, H, W]

# Normalize pixel values to the range [0, 1]
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255

train_images = train_images.transpose(0, 3, 1, 2)
test_images = test_images.transpose(0, 3, 1, 2)

# Convert labels to one-hot encoding
train_labels = tf.keras.utils.to_categorical(train_labels, 10)
test_labels = tf.keras.utils.to_categorical(test_labels, 10)

# Print shapes
print("Train images shape:", train_images.shape)  # Should be (60000, 1, 28, 28)
print("Train labels shape:", train_labels.shape)
print("Test images shape:", test_images.shape)    # Should be (10000, 1, 28, 28)
print("Test labels shape:", test_labels.shape)

In [None]:
print(type(train_labels))

In [None]:
import numpy as np
from core.Models import Model
from core.nn import Linear, Conv2d, MaxPool2d , batchnorm2d, Softmax, Relu,Flatten
from core.optim import sgd, adam
from core.loss import get_loss_fn

class ResNetWithResiduals(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2d(input_channels=1, output_channels=8, kernel_size=3, stride=1, padding=1, initialize_type='xavier',bias=False)
        self.bn1 = batchnorm2d(8)
        self.relu1 = Relu()
        self.conv2 = Conv2d(input_channels=8, output_channels=16, kernel_size=3, stride=1, padding=1, initialize_type='xavier')
        self.bn2 = batchnorm2d(16)
        self.relu2 = Relu()
        self.res1 = Conv2d(input_channels=8, output_channels=16, kernel_size=1, stride=1, padding=0, initialize_type='xavier')  # Adjust channels
        self.max1 = MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = Conv2d(input_channels=16, output_channels=32, kernel_size=3, stride=1, padding=1, initialize_type='xavier')
        self.bn3 = batchnorm2d(32)
        self.relu3 = Relu()
        self.conv4 = Conv2d(input_channels=32, output_channels=64, kernel_size=3, stride=1, padding=1, initialize_type='xavier')
        self.bn4 = batchnorm2d(64)
        self.relu4 = Relu()
        self.res2 = Conv2d(input_channels=16, output_channels=64, kernel_size=1, stride=1, padding=0, initialize_type='xavier')  # Adjust channels
        self.max2 = MaxPool2d(kernel_size=2, stride=2)
        self.flatten = Flatten()
        self.linear1 = Linear(64 * 7 * 7, 100, initialize_type='xavier', activation='relu')
        self.linear2 = Linear(100, 10, initialize_type='xavier', activation='softmax')


    def forward(self, x):
        x1 = self.conv1(x)
        x1 = self.bn1(x1)
        x1 = self.relu1(x1)
        x2 = self.conv2(x1)
        x2 = self.bn2(x2)
        x2 = self.relu2(x2)
        res1 = self.res1(x1)  # Match channel dimensions
        x2 = x2 + res1  # Add residual connection
        x2 = self.max1(x2)
        x3 = self.conv3(x2)
        x3 = self.bn3(x3)
        x3 = self.relu3(x3)
        x4 = self.conv4(x3)
        x4 = self.bn4(x4)
        x4 = self.relu4(x4)
        res2 = self.res2(x2)  # Match channel dimensions
        x4 = x4 + res2  # Add residual connection
        x4 = self.max2(x4)
        x4 = self.flatten(x4)
        x4 = self.linear1(x4)
        x4 = self.linear2(x4)
        return x4

# Instantiate model
model_resnet_residuals = ResNetWithResiduals()


In [None]:
from core.Datasets import Dataset
train_dataset = Dataset(train_images, train_labels, batch_size=32, shuffle=True)

In [None]:
#feed one example to check the model
train_dataset.reset()
x, y = train_dataset.__next__()
out = model_resnet_residuals(x)

In [None]:
print("Output shape:", out.shape)  # Should be (batch_size, 10)

In [None]:
out.backward()

In [None]:
out.view_graph()

In [None]:
optimizer = adam(model_resnet_residuals.parameters(), learning_rate=0.001)
loss_fn = get_loss_fn('categorical_bce')
num_epochs = 1
def train():
    for epoch in range(num_epochs):
        train_dataset.reset()  # Reset dataset iterator and reshuffle if needed
        epoch_loss = 0.0
        num_batches = 0
        for X_batch, y_batch in train_dataset:
            optimizer.zero_grad()
            out = model_resnet_residuals(X_batch)
            loss_tensor = loss_fn.sparse_categorical_cross_entropy(y_batch, out)
            epoch_loss += loss_tensor.data
            num_batches += 1
            loss_tensor.backward()
            optimizer.step()
        avg_loss = epoch_loss / num_batches
        
        print(f"Epoch {epoch + 1}/{num_epochs} - Loss: {avg_loss:.4f}")




In [None]:
train()

In [None]:
def test():
    model_resnet_residuals.test()
    x = model_resnet_residuals(test_images)
    y_pred = x.data
    y_pred_classes = np.argmax(y_pred, axis=1, keepdims=True)
    y_true_classes = np.argmax(test_labels, axis=1, keepdims=True)  # Assuming test_labels are integers
    accuracy = np.mean(y_pred_classes == y_true_classes)
    print(f"Accuracy on test set: {accuracy:.4f}")

test()

In [None]:

model_resnet_residuals.view_graph(input_data=test_images[:1],filename="model_graph", view=True)

# Augmentation Module


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import cifar10

# Load CIFAR-10 (only training data for demonstration)
(x_train, y_train), _ = cifar10.load_data()

# Select a sample image (CIFAR-10 images are 32x32 RGB)
sample_img = x_train[0]

# Import Compose and all Preprocessing transforms from your package.
from Preprocessing.transforms import (
    Compose, GaussianBlur, GaussianNoise, Normalize, 
    RandomAffine, RandomCrop, RandomErasing, RandomHorizontalFlip, 
    RandomRotation, RandomVerticalFlip, Resize, ToTensor
)

# Create a dictionary mapping transformation names to their instances.
# For demonstration, we force application by setting probabilities to 1.0.
aug_transforms = {
    "RandomHorizontalFlip": RandomHorizontalFlip(p=1.0),
    "RandomVerticalFlip":   RandomVerticalFlip(p=1.0),
    "RandomCrop":           RandomCrop((28, 28)),  # Crop to 28x28 (from 32x32)
    "RandomRotation":       RandomRotation(degrees=30),
    "RandomAffine":         RandomAffine(degrees=20, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
    "GaussianBlur":         GaussianBlur(sigma=(0.5, 1.5)),
    "GaussianNoise":        GaussianNoise(std=(0.01, 0.05)),
    "RandomErasing":        RandomErasing(p=1.0, scale=(0.02, 0.15), ratio=(0.3, 3.3), value=0)
}

# Prepare the plot: one row per Preprocessing (each row shows Original and Augmented)
n_transforms = len(aug_transforms)
fig, axes = plt.subplots(nrows=n_transforms, ncols=2, figsize=(8, n_transforms * 3))

for idx, (name, transform) in enumerate(aug_transforms.items()):
    # Create a Compose pipeline with a single transform.
    pipeline = Compose([transform])
    
    # Left column: display the original image.
    axes[idx, 0].imshow(sample_img)
    axes[idx, 0].set_title("Original")
    axes[idx, 0].axis("off")
    
    # Right column: apply the Compose pipeline and display the result.
    aug_img = pipeline(sample_img)
    if hasattr(aug_img, "data"):
        aug_img = aug_img.data
    axes[idx, 1].imshow(np.clip(aug_img, 0, 255).astype(np.uint8))
    axes[idx, 1].set_title(name)
    axes[idx, 1].axis("off")

plt.tight_layout()
plt.show()


# testing pretrained models

In [None]:
#https://deeplearning.cms.waikato.ac.nz/user-guide/class-maps/IMAGENET/

from pretrained.resnet18 import resnet18
from core.Models import Model
model = resnet18(pretrained=True)
model.test()

In [None]:
from PIL import Image
import numpy as np
from core.tensor import Tensor
label = 153
imagepath = r"C:\Users\ahmed\Downloads\n02085936_Maltese_dog (1).JPEG"

img = Image.open(imagepath)
img = img.resize((224, 224))
img = np.array(img)
img = np.expand_dims(img, axis=0)
img = np.transpose(img, (0, 3, 1, 2))  # convert to (batch_size, channels, height, width)
img = img / 255.0
# img = Tensor(img)

# output = model(img)
# y_pred = output.data
# print(y_pred.shape)
# #print top 5 predictions
# print("Top 5 predictions:", np.argsort(y_pred[0])[::-1][:5])


In [None]:
from pretrained.vgg16 import VGG16
vgg = VGG16(pretrained=True)

In [None]:
from PIL import Image
import numpy as np
from core.tensor import Tensor
label = 153
imagepath = r"C:\Users\ahmed\Downloads\n02085936_Maltese_dog (1).JPEG"

img = Image.open(imagepath)
img = img.resize((224, 224))
img = np.array(img)
img = np.expand_dims(img, axis=0)
img = np.transpose(img, (0, 3, 1, 2))  # convert to (batch_size, channels, height, width)
img = img / 255.0
img = Tensor(img)

output = vgg(img)
y_pred = output.data
pred_label = np.argmax(y_pred[0], axis=0, keepdims=False)
print("Top 5 predictions:", np.argsort(y_pred[0])[::-1][:5])


# new layers

In [None]:
from core.Models import Model
from core.nn import PositionalEmbedding , PatchEmbedding , LayerNorm,MultiHeadAttention

class model(Model):
    def __init__(self):
        super().__init__()
        self.patch_embedding = PatchEmbedding()
        self.positional_embedding = PositionalEmbedding(self.patch_embedding.n_patches, 768)
        self.layer_norm = LayerNorm(768)
        # self.attention = MultiHeadAttention(768, 8,masked=True)


    def forward(self, x):
        x = self.patch_embedding(x)
        x = self.positional_embedding(x)
        x = self.layer_norm(x)
        # x = self.attention(x)
        return x

In [None]:
from core.tensor import Tensor
import numpy as np
x = Tensor(np.random.rand(1, 3, 224, 224))
model = model()
output = model(x)
output.backward()
print("Output shape:", output.shape)
print(f"output gradient shape: {output.grad.shape}")

In [None]:
output.view_graph(filename="model_graph", view=True)

# Autograd

In [None]:
from core.tensor import Tensor
import numpy as np

In [None]:

x = Tensor(np.random.rand(1))
y = -x
y.backward()
print(y.grad) 
print(x.grad) 

In [None]:
a = Tensor([2.0], requires_grad=True)
b = Tensor([3.0], requires_grad=True)
c = a + b
c.backward()

print("a.grad:", a.grad)  # should be 1.0
print("b.grad:", b.grad)  # should be 1.0


In [None]:
a = Tensor([2.0], requires_grad=True)
b = Tensor([3.0], requires_grad=True)
c = a * b
c.backward()

print("a.grad:", a.grad)  # should be b.data = 3.0
print("b.grad:", b.grad)  # should be a.data = 2.0


In [None]:
a = Tensor([6.0], requires_grad=True)
b = Tensor([2.0], requires_grad=True)
c = a / b
c.backward()

print("a.grad:", a.grad)  # should be 1 / b = 0.5
print("b.grad:", b.grad)  # should be -a / b^2 = -6 / 4 = -1.5


In [None]:
a = Tensor([5.0], requires_grad=True)
b = Tensor([3.0], requires_grad=True)
c = a - b
c.backward()

print("a.grad:", a.grad)  # should be 1
print("b.grad:", b.grad)  # should be -1


In [None]:
a = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
c = a.mean()
c.backward()

print("a.grad:")  # should be a tensor with 0.25 in all positions (1/4)
print(a.grad)


In [None]:
a = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
c = a.sum()
c.backward()

print("a.grad:")  # should be all ones
print(a.grad)


In [None]:
a = Tensor([1.0, 2.0, 3.0], requires_grad=True)
c = a.std()
c.backward()

print("a.grad:")  # should compute ∂std/∂x for each element
print(a.grad)


In [None]:
from core.tensor import Tensor
a = Tensor(1)
b = Tensor(2)
c = a + b 


In [None]:
c.backward()
# c.backward()


In [None]:
print("a.grad:", a.grad)  # should be 1
print("c.grad:", c.grad)  # should be None, as c is not a leaf node

In [None]:
c.view_graph()

# 

# test

In [None]:
import numpy as np
from core.tensor import Tensor
import time

start_time = time.time()

x = Tensor(np.random.rand(32, 64, 224, 224), requires_grad=True)
kernel = Tensor(np.random.rand(128, 64, 3, 3))

h_out = 224 - 3 + 1
w_out = 224 - 3 + 1

batch_stride, channel_stride, height_stride, width_stride = x.data.strides

# Fixed shape to match the actual tensor dimensions
shape = (32, 64, h_out, w_out, 3, 3)
strides = (
    batch_stride,
    channel_stride,
    height_stride,
    width_stride,
    height_stride,
    width_stride,
)

patches = x.as_strided(shape, strides)
# shape: (32, 64, 222, 222, 3, 3)

# Fixed reshape dimensions
col_matrix = patches.transpose(0, 2, 3, 1, 4, 5).reshape(32 * h_out * w_out, 64 * 3 * 3)
# (N*H_out*W_out, C*KH*KW)

# Fixed kernel reshape dimensions
reshaped_kernel = kernel.reshape(128, 64 * 3 * 3).T
# (C*KH*KW, num_filters) = (576, 128)

out = col_matrix @ reshaped_kernel
# (N*H_out*W_out, num_filters)

output = out.reshape(32, h_out, w_out, 128).transpose(0, 3, 1, 2)
# (N, num_filters, H_out, W_out)

end_time = time.time()
total_time = end_time - start_time

print("Time taken for convolution:", total_time, "seconds")
print("Output shape:", output.shape)  # (32, 128, 222, 222)

In [None]:
import cupy as cp
import time

# Start timing
start_time = time.time()

# Create tensors on GPU
x = cp.random.rand(32, 64, 224, 224)
kernel = cp.random.rand(128, 64, 3, 3)

# Calculate output dimensions
h_out = 224 - 3 + 1
w_out = 224 - 3 + 1

# Get strides from the CuPy array
batch_stride, channel_stride, height_stride, width_stride = x.strides

# Define shape and strides for as_strided
shape = (32, 64, h_out, w_out, 3, 3)
strides = (
    batch_stride,
    channel_stride,
    height_stride,
    width_stride,
    height_stride,
    width_stride,
)

# Create patches using as_strided
patches = cp.lib.stride_tricks.as_strided(x, shape, strides)
# shape: (32, 64, 222, 222, 3, 3)

# Reshape to column matrix
col_matrix = patches.transpose(0, 2, 3, 1, 4, 5).reshape(32 * h_out * w_out, 64 * 3 * 3)
# (N*H_out*W_out, C*KH*KW)

# Reshape kernel
reshaped_kernel = kernel.reshape(128, 64 * 3 * 3).T
# (C*KH*KW, num_filters) = (576, 128)

# Matrix multiplication
out = col_matrix @ reshaped_kernel
# (N*H_out*W_out, num_filters)

# Reshape output
output = out.reshape(32, h_out, w_out, 128).transpose(0, 3, 1, 2)
# (N, num_filters, H_out, W_out)

# Synchronize GPU to ensure all operations are complete
cp.cuda.Stream.null.synchronize()

total_time = time.time() - start_time

print("Output shape:", output.shape)  # (32, 128, 222, 222)
print(f"Total execution time: {total_time:.4f} seconds")

In [None]:
output.backward()
print(f"out grad shape: {output.grad.shape}")
print(f"y grad shape: {kernel.grad.shape}")
print(f"x grad shape: {x.grad.shape}")
output.view_graph(filename="conv_graph", view=True)

In [None]:
from core.tensor import Tensor
import numpy as np
from core.nn import Conv2d

x = Tensor(np.random.rand(1, 3, 224, 224), requires_grad=True)
kernel = Tensor(np.random.rand(64, 3, 3, 3), requires_grad=True)
conv = Conv2d(input_channels=3,output_channels= 64, kernel_size=3,bias=True,padding=1)
y = conv(x)

In [None]:
print("Output shape:", y.shape)  # (1, 64, 222, 222)
y.backward()

In [None]:
y.view_graph(filename="conv_graph", view=True)

In [None]:
import numpy as np
import time
from core.tensor import Tensor

def convolution_im2col(x, kernel, device='cpu'):
    """
    Perform convolution using im2col approach
    """
    # Move tensors to specified device
    if device == 'gpu':
        x = x.cuda()
        kernel = kernel.cuda()
    else:
        x = x.cpu()
        kernel = kernel.cpu()
    
    # Calculate output dimensions
    h_out = x.shape[2] - kernel.shape[2] + 1
    w_out = x.shape[3] - kernel.shape[3] + 1
    
    # Get strides for the input tensor
    batch_stride, channel_stride, height_stride, width_stride = x.data.strides
    
    # Create sliding window view using as_strided
    shape = (x.shape[0], x.shape[1], h_out, w_out, kernel.shape[2], kernel.shape[3])
    strides = (
        batch_stride,
        channel_stride,
        height_stride,
        width_stride,
        height_stride,
        width_stride,
    )
    
    patches = x.as_strided(shape, strides)
    
    # Reshape patches to column matrix (im2col)
    # shape: (N*H_out*W_out, C*KH*KW)
    col_matrix = patches.transpose(0, 2, 3, 1, 4, 5).reshape(
        x.shape[0] * h_out * w_out, 
        x.shape[1] * kernel.shape[2] * kernel.shape[3]
    )
    
    # Reshape kernel to (C*KH*KW, num_filters)
    reshaped_kernel = kernel.reshape(kernel.shape[0], -1).T
    
    # Matrix multiplication (this is where the convolution happens)
    out = col_matrix @ reshaped_kernel
    
    # Reshape back to proper output format
    output = out.reshape(x.shape[0], h_out, w_out, kernel.shape[0]).transpose(0, 3, 1, 2)
    
    return output

def benchmark_convolution():
    """
    Benchmark convolution on CPU vs GPU
    """
    print("=== Convolution Benchmark: CPU vs GPU ===\n")
    
    # Create input data
    print("Creating input tensors...")
    x_data = np.random.rand(1, 3, 224, 224).astype(np.float32)
    kernel_data = np.random.rand(64, 3, 3, 3).astype(np.float32)
    
    # Create tensors
    x_cpu = Tensor(x_data, requires_grad=True, device='cpu')
    kernel_cpu = Tensor(kernel_data, requires_grad=False, device='cpu')
    
    x_gpu = Tensor(x_data, requires_grad=True, device='gpu')
    kernel_gpu = Tensor(kernel_data, requires_grad=False, device='gpu')
    
    print(f"Input shape: {x_cpu.shape}")
    print(f"Kernel shape: {kernel_cpu.shape}")
    print(f"Expected output shape: (1, 64, 222, 222)\n")
    
    # Warm up runs (important for GPU)
    print("Performing warm-up runs...")
    try:
        _ = convolution_im2col(x_gpu, kernel_gpu, device='gpu')
        _ = convolution_im2col(x_cpu, kernel_cpu, device='cpu')
        print("Warm-up completed.\n")
    except Exception as e:
        print(f"GPU warm-up failed: {e}")
        print("Continuing with CPU-only benchmark.\n")
    
    # CPU Benchmark
    print("=== CPU Benchmark ===")
    cpu_times = []
    num_runs = 5
    
    for i in range(num_runs):
        start_time = time.time()
        output_cpu = convolution_im2col(x_cpu, kernel_cpu, device='cpu')
        end_time = time.time()
        
        cpu_time = end_time - start_time
        cpu_times.append(cpu_time)
        print(f"Run {i+1}: {cpu_time:.4f} seconds")
    
    avg_cpu_time = np.mean(cpu_times)
    std_cpu_time = np.std(cpu_times)
    print(f"CPU Average: {avg_cpu_time:.4f} ± {std_cpu_time:.4f} seconds")
    print(f"CPU Output shape: {output_cpu.shape}\n")
    
    # GPU Benchmark
    print("=== GPU Benchmark ===")
    try:
        gpu_times = []
        
        for i in range(num_runs):
            start_time = time.time()
            output_gpu = convolution_im2col(x_gpu, kernel_gpu, device='gpu')
            end_time = time.time()
            
            gpu_time = end_time - start_time
            gpu_times.append(gpu_time)
            print(f"Run {i+1}: {gpu_time:.4f} seconds")
        
        avg_gpu_time = np.mean(gpu_times)
        std_gpu_time = np.std(gpu_times)
        print(f"GPU Average: {avg_gpu_time:.4f} ± {std_gpu_time:.4f} seconds")
        print(f"GPU Output shape: {output_gpu.shape}\n")
        
        # Speedup calculation
        speedup = avg_cpu_time / avg_gpu_time
        print(f"=== Performance Comparison ===")
        print(f"CPU Time: {avg_cpu_time:.4f} ± {std_cpu_time:.4f} seconds")
        print(f"GPU Time: {avg_gpu_time:.4f} ± {std_gpu_time:.4f} seconds")
        print(f"Speedup: {speedup:.2f}x {'faster on GPU' if speedup > 1 else 'faster on CPU'}")
        
        # Verify results are similar (accounting for floating point precision)
        try:
            output_gpu_cpu = output_gpu.cpu()
            max_diff = np.max(np.abs(output_cpu.data - output_gpu_cpu.data))
            print(f"Maximum difference between CPU and GPU results: {max_diff:.2e}")
            if max_diff < 1e-5:
                print("✓ Results are numerically equivalent")
            else:
                print("⚠ Results differ significantly")
        except Exception as e:
            print(f"Could not compare results: {e}")
            
    except Exception as e:
        print(f"GPU benchmark failed: {e}")
        print("This might be because:")
        print("1. CuPy is not installed")
        print("2. No CUDA-compatible GPU is available")
        print("3. GPU memory is insufficient")
        
    print("\n=== Memory Usage Info ===")
    input_size_mb = x_data.nbytes / (1024**2)
    kernel_size_mb = kernel_data.nbytes / (1024**2)
    output_size_mb = (1 * 64 * 222 * 222 * 4) / (1024**2)  # 4 bytes per float32
    
    print(f"Input tensor: {input_size_mb:.2f} MB")
    print(f"Kernel tensor: {kernel_size_mb:.2f} MB")
    print(f"Output tensor: {output_size_mb:.2f} MB")
    print(f"Total memory usage: ~{input_size_mb + kernel_size_mb + output_size_mb:.2f} MB")

if __name__ == "__main__":
    benchmark_convolution()

In [None]:
import numpy as np
from core.tensor import Tensor
from core.nn import batchnorm2d

# Create a random input tensor
x = Tensor(np.random.rand(32, 64, 224, 224), requires_grad=True)
# Create a batch normalization layer
bn = batchnorm2d(64)

# Forward pass
y = bn(x)

print("Output shape:", y.shape)  # Should be (32, 64, 224, 224)

In [None]:
import numpy as np
from core.tensor import Tensor

# Create a random input tensor
x = Tensor(2)
y = x**3
y.backward()
print("x:", x.grad)  # Should be 2

In [None]:
# PyTorch
from torch import nn
from core.tensor import Tensor
from core.nn import ConvBatchNorm2D
import torch
import numpy as np
np.random.seed(42)
torch.manual_seed(42)

# Create test input for 2D CNN-like data
x_np = np.random.randn(4, 8, 16, 16).astype(np.float32)
x_torch = torch.tensor(x_np, dtype=torch.float32)


bn = nn.BatchNorm2d(8, affine=True, track_running_stats=False)
bn.weight.data.fill_(1.0)
bn.bias.data.fill_(0.0)

out_torch = bn(x_torch)

# Your layer
custom_bn = ConvBatchNorm2D(8)
out_custom = custom_bn(Tensor(x_np)).data

# Compare
print("BatchNorm diff:", np.abs(out_custom - out_torch.detach().numpy()).max())


In [None]:
from core.tensor import Tensor
import numpy as np
from core.nn import Linear
# Create a random input tensor
x = Tensor(np.random.rand(2,10,8), requires_grad=True)
# Create a linear layer
linear = Linear(input_dim=8, output_dim=16)

# Forward pass
y = linear(x)

print("Output shape:", y.shape)  # Should be (2, 16)
# Forward pass

In [None]:
import torch
import numpy as np
from core.tensor import Tensor
# Original tensor
x_np = np.array([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]], dtype=np.float32)
x_pt = torch.tensor(x_np, requires_grad=True)
x_coco  = Tensor(x_np, requires_grad=True)

# Split into 3 parts along axis=1
splits = torch.tensor_split(x_pt, 3, dim=1)  # 3 tensors: (1,2), (1,2), (1,2)
splits_coco = x_coco.split(indices_or_sections=3, axis=1)  # 3 tensors: (1,2), (1,2), (1,2)
# Apply operations to trigger backward
result = sum([s.sum() for s in splits])  # Just add all elements
result_coco = sum([s.sum() for s in splits_coco])  # Just add all elements

result.backward()
result_coco.backward()

# Check gradients

print("Torch Grad:", x_pt.grad.numpy())
print("Coco Grad:", x_coco.grad)
print("Gradients match:", np.allclose(x_pt.grad.numpy(), x_coco.grad.data))


In [None]:
import torch
import numpy as np
from core.tensor import Tensor

# Setup input
x_np = np.array([[1., 2., 3., 4., 5., 6.]], dtype=np.float32)
x_pt = torch.tensor(x_np, requires_grad=True)
x_coco = Tensor(x_np, requires_grad=True)

# Split into 3 parts along axis=1
pt_splits = torch.tensor_split(x_pt, 3, dim=1)
coco_splits = x_coco.split(indices_or_sections=3, axis=1)

# Apply different operations to each part
pt_result = pt_splits[0].sum() + (pt_splits[1] ** 2).sum() + (pt_splits[2] * 2).sum()
coco_result = coco_splits[0].sum() + (coco_splits[1] ** 2).sum() + (coco_splits[2] * 2).sum()

# Backward
pt_result.backward()
coco_result.backward()

# Compare gradients
print("Torch Grad:", x_pt.grad.numpy())
print("Coco Grad:", x_coco.grad)
print("Gradients match:", np.allclose(x_pt.grad.numpy(), x_coco.grad.data, atol=1e-5))


In [None]:
coco_result.view_graph(filename="split_graph", view=True)

Match: True
