In [None]:
import random
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from torchsummary import summary
from thop import profile

# Initialize random seed for reproducibility
seed = 1787
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
th.cuda.manual_seed(seed)
th.cuda.manual_seed_all(seed)
th.backends.cudnn.deterministic = True
th.backends.cudnn.benchmark = False

# Set device
device = th.device("cuda" if th.cuda.is_available() else "cpu")

# Parameters
epochs = 1
custom_epochs = 1
new_epochs = 1
'''prune_percentage = [0.04, 0.12]
prune_limits = [1, 2]'''
prune_value=[1,2,4]
prune_limits=[8]*36 + [15]*36 + [30]*36

optim_lr = 0.1
lamda = 0.01
alpha = 0.0001
beta = 0.0001

regularization_prune_percentage = 0.02
decorrelation_lower_bound = 0.3
decorrelation_higher_bound = 0.4

trainloader = th.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('../data', download=True, train=True,
                               transform=transforms.Compose([transforms.ToTensor()])),
    batch_size=100, shuffle=True)

testloader = th.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('../data', download=True, train=False,
                               transform=transforms.Compose([transforms.ToTensor()])),
    batch_size=100, shuffle=True)

class Network():

    def weight_init(self, m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
            if self.a_type == 'relu':
                init.kaiming_normal_(m.weight.data, nonlinearity=self.a_type)
                init.constant_(m.bias.data, 0)
            elif self.a_type == 'leaky_relu':
                init.kaiming_normal_(m.weight.data, nonlinearity=self.a_type)
                init.constant_(m.bias.data, 0)
            elif self.a_type == 'tanh':
                g = init.calculate_gain(self.a_type)
                init.xavier_uniform_(m.weight.data, gain=g)
                init.constant_(m.bias.data, 0)
            elif self.a_type == 'sigmoid':
                g = init.calculate_gain(self.a_type)
                init.xavier_uniform_(m.weight.data, gain=g)
                init.constant_(m.bias.data, 0)
            else:
                raise
                return NotImplemented


    def one_hot(self, y, gpu):

        try:
            y = th.from_numpy(y)
        except TypeError:
            None

        y_1d = y
        if gpu:
            y_hot = th.zeros((y.size(0), th.max(y).int()+1)).cuda()
        else:
            y_hot = th.zeros((y.size(0), th.max(y).int()+1))

        for i in range(y.size(0)):
            y_hot[i, y_1d[i].int()] = 1

        return y_hot

   
    def best_tetr_acc(self, prunes):
        print("prunes values id ", prunes)
        tr_acc = self.train_accuracy[prunes:]
        te_acc = self.test_accuracy[prunes:]
        best_te_acc = max(te_acc)
        indices = [i for i, x in enumerate(te_acc) if x == best_te_acc]
        temp_tr_acc = []
        for i in indices:
            temp_tr_acc.append(tr_acc[i])
        best_tr_acc = max(temp_tr_acc)

        del self.test_accuracy[prunes:]
        del self.train_accuracy[prunes:]
        self.test_accuracy.append(best_te_acc)
        self.train_accuracy.append(best_tr_acc)
        return best_te_acc, best_tr_acc

    def best_tetr_acc(self):
        tr_acc = self.train_accuracy[:]
        te_acc = self.test_accuracy[:]
        best_te_acc = max(te_acc)
        indices = [i for i, x in enumerate(te_acc) if x == best_te_acc]
        temp_tr_acc = []
        for i in indices:
            temp_tr_acc.append(tr_acc[i])
        best_tr_acc = max(temp_tr_acc)

        del self.test_accuracy[prunes:]
        del self.train_accuracy[prunes:]
        self.test_accuracy.append(best_te_acc)
        self.train_accuracy.append(best_tr_acc)
        return best_te_acc, best_tr_acc

    def create_folders(self, total_convs):
        main_dir = strftime("/Results/%b%d_%H:%M:%S%p", localtime()) + "_resnet_56/"
        import os
        current_dir = os.path.abspath(os.path.dirname(__file__))
        par_dir = os.path.abspath(current_dir + "/../")
        parent_dir = par_dir + main_dir
        path2 = os.path.join(parent_dir, "layer_file_info")
        os.makedirs(path2)
        return parent_dir

    def get_writerow(self, k):
        s = 'wr.writerow(['

        for i in range(k):
            s = s + 'd[' + str(i) + ']'
            if i < k - 1:
                s = s + ','
            else:
                s = s + '])'

        return s


    def get_logger(self,file_path):

        logger = logging.getLogger('gal')
        log_format = '%(asctime)s | %(message)s'
        formatter = logging.Formatter(log_format, datefmt='%m/%d %I:%M:%S %p')
        file_handler = logging.FileHandler(file_path)
        file_handler.setFormatter(formatter)
        stream_handler = logging.StreamHandler()
        stream_handler.setFormatter(formatter)

        logger.addHandler(file_handler)
        logger.addHandler(stream_handler)
        logger.setLevel(logging.INFO)

        return logger

class PruningMethod:
    def prune_filters(self, layer_indices):
        conv_layer = 0
        for layer_name, layer_module in self.named_modules():
            if isinstance(layer_module, th.nn.Conv2d):
                if conv_layer == 0:
                    in_channels = [i for i in range(layer_module.weight.shape[1])]
                else:
                    in_channels = layer_indices[conv_layer - 1]

                out_channels = layer_indices[conv_layer]
                
                print('conv_layer:', conv_layer)
                print('in_channels:', in_channels)
                print('out_channels:', out_channels)
                
                layer_module.weight = th.nn.Parameter(th.FloatTensor(th.from_numpy(layer_module.weight.data.cpu().numpy()[out_channels])))
                
                print("1:", layer_module.weight)

                if layer_module.bias is not None:
                    layer_module.bias = th.nn.Parameter(th.FloatTensor(th.from_numpy(layer_module.bias.data.cpu().numpy()[out_channels])).to('cuda'))

                layer_module.weight = th.nn.Parameter(th.FloatTensor(th.from_numpy(layer_module.weight.data.numpy()[:, in_channels])).to('cuda'))
                
                print("2:", layer_module.weight)

                layer_module.in_channels = len(in_channels)
                layer_module.out_channels = len(out_channels)
                
                conv_layer += 1

            if isinstance(layer_module, th.nn.BatchNorm2d):
                out_channels = layer_indices[conv_layer]
                layer_module.weight = th.nn.Parameter(th.FloatTensor(th.from_numpy(layer_module.weight.data.cpu().numpy()[out_channels])).to('cuda'))
                layer_module.bias = th.nn.Parameter(th.FloatTensor(th.from_numpy(layer_module.bias.data.cpu().numpy()[out_channels])).to('cuda'))
                layer_module.running_mean = th.from_numpy(layer_module.running_mean.cpu().numpy()[out_channels]).to('cuda')
                layer_module.running_var = th.from_numpy(layer_module.running_var.cpu().numpy()[out_channels]).to('cuda')
                layer_module.num_features = len(out_channels)

            if isinstance(layer_module, nn.Linear):
                conv_layer -= 1
                in_channels = layer_indices[conv_layer]
                weight_linear = layer_module.weight.data.cpu().numpy()
                size = 4 * 4
                expanded_in_channels = []
                for i in in_channels:
                    for j in range(size):
                        expanded_in_channels.extend([i * size + j])
                layer_module.weight = th.nn.Parameter(th.from_numpy(weight_linear[:, expanded_in_channels]).to('cuda'))
                layer_module.in_features = len(expanded_in_channels)
                break

    '''def get_indices_topk(self,layer_bounds,layer_num,prune_limit,prune_value):

        i=layer_num
        indices=prune_value[i]

        p=len(layer_bounds)
        if (p-indices)<prune_limit:
            prune_value[i]=p-prune_limit
            indices=prune_value[i]

        k=sorted(range(len(layer_bounds)), key=lambda j: layer_bounds[j])[:indices]
        return k
      
    def get_indices_bottomk(self,layer_bounds,i,prune_limit):

        k=sorted(range(len(layer_bounds)), key=lambda j: layer_bounds[j])[-prune_limit:]
        return k'''

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

class ResBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1):
        super(ResBasicBlock, self).__init__()
        self.inplanes = inplanes
        self.planes = planes
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.stride = stride
        self.shortcut = nn.Sequential()
        if stride != 1 or inplanes != planes:
            self.shortcut = LambdaLayer(
                lambda x: F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes // 4, planes - inplanes - (planes // 4)), "constant", 0))

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(x)
        out = self.relu2(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, num_layers, covcfg, num_classes=10):
        super(ResNet, self).__init__()
        assert (num_layers - 2) % 6 == 0, 'depth should be 6n+2'
        n = (num_layers - 2) // 6
        self.covcfg = covcfg
        self.num_layers = num_layers

        self.inplanes = 16
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(1, block, 16, blocks=n, stride=1)
        self.layer2 = self._make_layer(2, block, 32, blocks=n, stride=2)
        self.layer3 = self._make_layer(3, block, 64, blocks=n, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        self.fc = nn.Linear(64 * block.expansion, num_classes)

        self.initialize()

    def initialize(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, a, block, planes, blocks, stride):
        layers = []
        layers.append(block(self.inplanes, planes, stride))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def resnet_110():
    cov_cfg = [(3 * i + 2) for i in range(9 * 6 * 2 + 1)]
    return ResNet(ResBasicBlock, 110, cov_cfg)

# Load the model
device = th.device("cuda" if th.cuda.is_available() else "cpu")
model = resnet_110().to(device)

# Define optimizer and scheduler
optimizer = th.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = th.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30], gamma=0.1)
criterion = nn.CrossEntropyLoss()

conv_layers = [module for module in model.modules() if isinstance(module, nn.Conv2d)]

'''for i, layer in enumerate(conv_layers):
    with th.no_grad():
        filters = layer.weight.data.clone()
        num_filters = filters.size(0)
        #print(i, layer, num_filters)

        # Calculate cosine similarity
        similarity_matrix = []
        for j in range(num_filters):
            for k in range(j + 1, num_filters):
                cosine_sim = F.cosine_similarity(filters[j].flatten(), filters[k].flatten(), dim=0)
                similarity_matrix.append((j, k, cosine_sim.item()))'''

In [3]:
for name, module in model.named_modules():
    print(name)


conv1
bn1
relu
layer1
layer1.0
layer1.0.conv1
layer1.0.bn1
layer1.0.relu1
layer1.0.conv2
layer1.0.bn2
layer1.0.relu2
layer1.0.shortcut
layer1.1
layer1.1.conv1
layer1.1.bn1
layer1.1.relu1
layer1.1.conv2
layer1.1.bn2
layer1.1.relu2
layer1.1.shortcut
layer1.2
layer1.2.conv1
layer1.2.bn1
layer1.2.relu1
layer1.2.conv2
layer1.2.bn2
layer1.2.relu2
layer1.2.shortcut
layer1.3
layer1.3.conv1
layer1.3.bn1
layer1.3.relu1
layer1.3.conv2
layer1.3.bn2
layer1.3.relu2
layer1.3.shortcut
layer1.4
layer1.4.conv1
layer1.4.bn1
layer1.4.relu1
layer1.4.conv2
layer1.4.bn2
layer1.4.relu2
layer1.4.shortcut
layer1.5
layer1.5.conv1
layer1.5.bn1
layer1.5.relu1
layer1.5.conv2
layer1.5.bn2
layer1.5.relu2
layer1.5.shortcut
layer1.6
layer1.6.conv1
layer1.6.bn1
layer1.6.relu1
layer1.6.conv2
layer1.6.bn2
layer1.6.relu2
layer1.6.shortcut
layer1.7
layer1.7.conv1
layer1.7.bn1
layer1.7.relu1
layer1.7.conv2
layer1.7.bn2
layer1.7.relu2
layer1.7.shortcut
layer1.8
layer1.8.conv1
layer1.8.bn1
layer1.8.relu1
layer1.8.conv2
layer1