In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
from einops import rearrange, reduce, repeat
import torchvision
from data_factorcy.data_factorcy import loader_generate 
from utils.decomposition import kron

In [11]:
test_conv2d = nn.Conv2d(8, 64, 3)
test_conv2d.weight.data.shape

torch.Size([64, 8, 3, 3])

In [15]:
type(resnet50)
model = torchvision.models.resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], num_classes=10)
class KronConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias, a_shape, b_shape) -> None:
        super().__init__()
        weight_shape = [out_channels, in_channels, kernel_size, kernel_size]
        # assert each i, a[i] * b[i] == weight[i]
        assert np.array(a_shape[1:]) * np.array(b_shape[1:]) == np.array(weight_shape)
        self.a = nn.Parameter(torch.randn(a_shape))
        self.b = nn.Parameter(torch.randn(b_shape))
        self.stride = stride
        self.padding = padding
        self.bias = bias
        self.dilation = 1
        self.groups = 1
        
    def forward(self, x):
        weight = kron(self.a, self.b)
        return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
    
KronConv2d(3, 64, 7, 2, 3, False, [3, 8, 3, 7, 1], [3, 8, 1, 1, 7])
        

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [3]:
# calcu the params of the model 
def model_params(model):
    params = 0
    for p in model.parameters():
        if p.requires_grad:
            params += p.numel()
    return params
# print the params of the model, for example, 25555 is 25,555
model_params(resnet50)

25557032

In [4]:
# examine the model 
resnet50

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
from gkpd.convolution import kroneckerconv2d
resnet50.conv1
nn.Conv2d

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [None]:
# use minist to fine-tune resnet50
import torchvision.transforms as transforms


In [None]:
kroneckerconv2d(
    3, 64, (7, 7), (2, 2), (3, 3)
)

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Define data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to the input size expected by ResNet-50
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load MNIST dataset
mnist_train = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
mnist_test = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(mnist_train, batch_size=32, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=32, shuffle=False)

# Load pre-trained ResNet-50 model
resnet50 = models.resnet50(pretrained=True)

# Modify the last fully connected layer to match the number of classes in MNIST (10)
num_ftrs = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_ftrs, 10)  # Assuming 10 classes in MNIST

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet50.parameters(), lr=0.001, momentum=0.9)

# Fine-tuning the model
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet50.to(device)

for epoch in range(num_epochs):
    resnet50.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

# Evaluate the fine-tuned model on the test set
resnet50.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = resnet50(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")



RuntimeError: output with shape [1, 224, 224] doesn't match the broadcast shape [3, 224, 224]

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Define data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),  # Convert single-channel to three channels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load MNIST dataset
mnist_train = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
mnist_test = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(mnist_train, batch_size=32, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=32, shuffle=False)

# Load pre-trained ResNet-50 model
resnet50 = models.resnet50(pretrained=True)

# Modify the last fully connected layer to match the number of classes in MNIST (10)
num_ftrs = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_ftrs, 10)  # Assuming 10 classes in MNIST

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet50.parameters(), lr=0.001, momentum=0.9)

# Fine-tuning the model
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet50.to(device)

for epoch in range(num_epochs):
    resnet50.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

# Evaluate the fine-tuned model on the test set
resnet50.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = resnet50(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 1/5, Loss: 0.11093835338565211
Epoch 2/5, Loss: 0.021187182665864626
Epoch 3/5, Loss: 0.011634554800235977
Epoch 4/5, Loss: 0.006403804848863122
Epoch 5/5, Loss: 0.004385608656009814
Test Accuracy: 99.60%


In [16]:
import torch
from gkpd.tensorops import kron
from torch.nn import functional as F


a = torch.randn(3, 2, 2)
b = torch.randn(3, 2, 2)
x = torch.randn(32, 11, 4)
c = kron(a, b)
r = 3
n = 32
out1 = x @ c
out1.shape 
c.shape 
from einops import rearrange
x = rearrange(x , 'n x1 (a1 b1) -> n x1 a1 b1', a1=2, b1=2)
print(x.shape)
b = rearrange(b, 'r b1 b2 ->  b1 (b2 r)')
out2 = F.linear(x, b)
out2 = rearrange(out2, 'n x1 a1 b2 r -> r (n x1 b2) a1')
out2 = torch.bmm(out2, a)
out2 = torch.sum(out2, dim=0).unsqueeze(0)
out2 = rearrange(out2, '(n x1 b2) a2 -> n x1 b2 a2', n=n, b2=2, a2=2)
print(out1, out2)
F.linear()

torch.Size([32, 11, 2, 2])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (704x2 and 6x2)

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
linear = nn.Linear(3, 4)
a = torch.randn(3, 2, 4, 3)
linear(a)

tensor([[[[ 0.2785,  0.1856,  0.2213,  0.7426],
          [ 0.5151,  0.2695,  0.2214,  0.5761],
          [-0.0203, -0.3220, -0.1053,  0.6436],
          [-0.3288, -0.8772, -0.5157, -0.4683]],

         [[-0.1491, -0.0418,  0.1228,  0.2301],
          [-0.0409, -0.4519, -0.2037,  0.5865],
          [ 0.7476,  0.1456,  0.0517,  0.2111],
          [-0.2044,  0.0487,  0.1911, -0.0792]]],


        [[[ 0.3796, -0.1798, -0.1016,  0.4271],
          [ 0.3346,  0.0903,  0.1536,  1.1453],
          [ 0.3324,  0.3385,  0.2869, -0.0793],
          [ 0.4979,  0.3953,  0.3406,  0.9366]],

         [[ 1.3318,  0.7370,  0.4210,  1.2598],
          [ 0.6928,  0.0756,  0.0335,  0.6799],
          [ 0.7791,  0.4807,  0.3353,  0.8543],
          [-0.1847,  0.3920,  0.4750,  0.3823]]],


        [[[ 0.0122, -0.3247, -0.1635, -0.3492],
          [ 0.4908, -0.3324, -0.2683, -0.0190],
          [-0.0077, -0.5009, -0.2482,  0.6191],
          [ 0.6104,  0.7908,  0.6164,  0.8604]],

         [[ 0.5760, -0.337

In [None]:
class KronLinear(nn.Module):
    def __init__(self, input_dim, output_dim, rank=0, structured_sparse=False, bias=True) -> None:
        """Kronecker Linear Layer

        the weight matrix is a kron(a, b) matrix
        
        Args:
            rank (int): the rank of the Kronecker product
            a_shape (tuple): the shape of the **a** matrix 
            b_shape (tuple): the shape of the **b** matrix
            structured_sparse (bool, optional): _description_. Defaults to False.
            bias (bool, optional): _description_. Defaults to True.
        """
        super().__init__()
        input_dims = factorize(input_dim)
        output_dims = factorize(output_dim)

        if rank == 0:
            rank = min(*input_dims, *output_dims) // 2 + 1
        self.rank = rank
        
        a_shape = [input_dims[0], output_dims[1]]
        b_shape = [input_dims[1], output_dims[0]]
        

        self.structured_sparse = structured_sparse
        if structured_sparse:
            self.s = nn.Parameter(torch.randn( *a_shape), requires_grad=True)
        self.a = nn.Parameter(torch.randn(rank, *a_shape), requires_grad=True)
        self.b = nn.Parameter(torch.randn(rank, *b_shape), requires_grad=True)
        self.a_shape = a_shape
        self.b_shape = b_shape
        
        nn.init.xavier_uniform_(self.a)
        nn.init.xavier_uniform_(self.b)
        bias_shape = np.multiply(a_shape, b_shape)
        if bias:
            self.bias = nn.Parameter(torch.randn(*bias_shape[1:]), requires_grad=True)
        else:
            self.bias = None
        
    def forward(self, x):
        a = self.a
        if self.structured_sparse:
            a = self.s.unsqueeze(0) * self.a
        
        # a = self.s.unsqueeze(0) * self.a
        # w = kron(a, self.b)
        x_shape = x.shape 
        b = self.b
        r = self.a_shape[0]
        x = torch.reshape(x, (-1, x_shape[-1]))
        b = rearrange(b, 'r b1 b2 -> b1 (b2 r)')
        x = rearrange(x, 'n (a1 b1) -> n a1 b1', a1=self.a_shape[1], b1=self.b_shape[1])
        out = x @ b
        out = rearrange(out, 'n a1 (b2 r) -> r (n b2) a1', b2=self.b_shape[2], r=r)
        out = torch.bmm(out, a)
        out = torch.sum(out, dim=0).squeeze(0)
        out = rearrange(out, '(n b2) a2 -> n (a2 b2)', b2=self.b_shape[2])
        out = torch.reshape(out, x_shape[:-1] + (self.b_shape[2],))
        
        
        
        if self.bias is not None:
            out += self.bias.unsqueeze(0)
        return out

In [1]:
21871 / 41640

0.525240153698367

In [8]:
import numpy as np
w =np.array([-2, 1])
x1 = [6 ,5]
x2 = [6, 3]
x1, x2 = np.array(x1), np.array(x2)

w = x1 + w
w

array([-9, -6])

In [12]:
w = x1 @ w  + w
w

array([-1101, -1098])

In [4]:
w

array([-2240, -1132])

In [2]:
from torchvision.models import resnet50
resnet = resnet50(pretrained=True)
largest_name, largest_param = '', 0
# for each layer, print the number of parameters
for name, param in resnet.named_parameters():
    print(name, param.numel())
    if param.numel() > largest_param:
        largest_name, largest_param = name, param.numel()
    



conv1.weight 9408
bn1.weight 64
bn1.bias 64
layer1.0.conv1.weight 4096
layer1.0.bn1.weight 64
layer1.0.bn1.bias 64
layer1.0.conv2.weight 36864
layer1.0.bn2.weight 64
layer1.0.bn2.bias 64
layer1.0.conv3.weight 16384
layer1.0.bn3.weight 256
layer1.0.bn3.bias 256
layer1.0.downsample.0.weight 16384
layer1.0.downsample.1.weight 256
layer1.0.downsample.1.bias 256
layer1.1.conv1.weight 16384
layer1.1.bn1.weight 64
layer1.1.bn1.bias 64
layer1.1.conv2.weight 36864
layer1.1.bn2.weight 64
layer1.1.bn2.bias 64
layer1.1.conv3.weight 16384
layer1.1.bn3.weight 256
layer1.1.bn3.bias 256
layer1.2.conv1.weight 16384
layer1.2.bn1.weight 64
layer1.2.bn1.bias 64
layer1.2.conv2.weight 36864
layer1.2.bn2.weight 64
layer1.2.bn2.bias 64
layer1.2.conv3.weight 16384
layer1.2.bn3.weight 256
layer1.2.bn3.bias 256
layer2.0.conv1.weight 32768
layer2.0.bn1.weight 128
layer2.0.bn1.bias 128
layer2.0.conv2.weight 147456
layer2.0.bn2.weight 128
layer2.0.bn2.bias 128
layer2.0.conv3.weight 65536
layer2.0.bn3.weight 512
lay

In [4]:
print(largest_name, largest_param)
print(resnet.layer4[2].conv2.weight.shape)

layer4.0.conv2.weight 2359296
torch.Size([512, 512, 3, 3])


In [5]:
print(resnet.fc.weight.shape)

torch.Size([1000, 2048])
