<a href="https://colab.research.google.com/github/jkordonis/TropicalML/blob/main/ResNet9CIFAR100_Compression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
from sklearn.cluster import KMeans
import numpy as np
import os
import torch
import time
import torchvision
import torch.nn as nn
import copy

import numpy as np
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import Dataset
from torch.utils.data import random_split
from torchvision.utils import make_grid
import torchvision.models as models
import matplotlib.pyplot as plt
from sklearn.metrics import *
from google.colab import files
import cvxpy as cp

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
batch_size = 400
epochs = 120
max_lr = 0.001
grad_clip = 0.01
weight_decay =0.001
opt_func = torch.optim.Adam


In [None]:
train_data = torchvision.datasets.CIFAR100('./', train=True, download=True)

x = np.concatenate([np.asarray(train_data[i][0]) for i in range(len(train_data))])
mean = np.mean(x, axis=(0, 1))/255
std = np.std(x, axis=(0, 1))/255
mean=mean.tolist()
std=std.tolist()

Files already downloaded and verified


In [None]:
transform_train = tt.Compose([tt.RandomCrop(32, padding=4,padding_mode='reflect'),
                         tt.RandomHorizontalFlip(),
                         tt.ToTensor(),
                         tt.Normalize(mean,std,inplace=True)])
transform_test = tt.Compose([tt.ToTensor(), tt.Normalize(mean,std)])

In [None]:
trainset = torchvision.datasets.CIFAR100("./",  train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader( trainset, batch_size, shuffle=True, num_workers=2,pin_memory=True)

testset = torchvision.datasets.CIFAR100("./",  train=False,  download=True,   transform=transform_test)
testloader = torch.utils.data.DataLoader( testset, batch_size*2,pin_memory=True, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)
device = get_default_device()
device

device(type='cuda')

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss

    def validation_step(self, batch):
        images, labels = batch
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))

def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
              nn.BatchNorm2d(out_channels),
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class ResNet9(ImageClassificationBase):
    def __init__(self, in_channels, num_classes):
        super().__init__()

        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128, pool=True)
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))

        self.conv3 = conv_block(128, 256, pool=True)
        self.conv4 = conv_block(256, 512, pool=True)
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))

        self.classifier = nn.Sequential(nn.AdaptiveMaxPool2d((1,1)),
                                        nn.Flatten(),
                                        nn.Dropout(0.2),
                                        nn.Linear(512, num_classes))

    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        return out

model = to_device(ResNet9(3, 100), device)
#model

In [None]:
 sum(p.numel() for p in model.parameters())


6621540

In [None]:
@torch.no_grad()
def evaluate(model, test_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in test_loader]
    return model.validation_epoch_end(outputs)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, test_loader,
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []

    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs,
                                                steps_per_epoch=len(train_loader))

    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:

            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()

        # Validation phase
        result = evaluate(model, test_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model_path = '/content/drive/My Drive/ResNet9CIFAR100.pth'
#model.load_state_dict(torch.load(model_path))
#model.eval()
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))


  model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))


<All keys matched successfully>

In [None]:
model.to(device)
#evaluate(model, DeviceDataLoader(testloader, device))

In [None]:
def include_bn2d_into_conv2d(batchnorm_m,conv2d_c):
  bn_weight,bn_bias, bn_mean,bn_var, eps = batchnorm_m.weight,batchnorm_m.bias,  batchnorm_m.running_mean, batchnorm_m.running_var, batchnorm_m.eps
  scale = bn_weight / torch.sqrt(bn_var + eps)
  shift = bn_bias - bn_mean * scale
  new_weight = (conv2d_c.weight * scale.view(-1, 1, 1, 1)).detach().clone().to('cpu').numpy()
  new_bias = ((conv2d_c.bias * scale + shift)).detach().clone().to('cpu').numpy()
  return new_weight,new_bias

In [None]:
model.eval()
W1,b1=include_bn2d_into_conv2d(model.res2[0][1],model.res2[0][0])
W2,b2=include_bn2d_into_conv2d(model.res2[1][1],model.res2[1][0])
W1_mat=W1.reshape(W1.shape[0],-1)

# Division Part

In [None]:
# Number of terms in the division
m_q=2
#dimension of the space
d=512*3*3

In [None]:
W1,b1=include_bn2d_into_conv2d(model.res2[0][1],model.res2[0][0])
W2,b2=include_bn2d_into_conv2d(model.res2[1][1],model.res2[1][0])
W1_mat=W1.reshape(W1.shape[0],-1)

In [None]:
W1_mat.shape,W2.shape

((512, 4608), (512, 512, 3, 3))

In [None]:
def Sample_Pair_of_outp_to_Divide(W1_mat,W2,b1):
  idx_x1, idx_y1, idx_plane1=np.random.randint(0,3), np.random.randint(0,3), np.random.randint(0,512)
  idx_x2, idx_y2, idx_plane2=np.random.randint(0,3), np.random.randint(0,3), np.random.randint(0,512)

  Coef = W2[idx_plane1,:,idx_x1,idx_y1]-W2[idx_plane2,:,idx_x2,idx_y2]
  W_pos = Coef[Coef>=0,None]*W1_mat[Coef>=0]
  W_neg = Coef[Coef<0,None]*W1_mat[Coef<0]
  b_pos = Coef[Coef>=0]*b1[idx_plane1]
  b_neg = Coef[Coef<0]*b1[idx_plane2]
  return W_pos,W_neg,b_pos,b_neg

W_pos,W_neg,b_pos,b_neg=Sample_Pair_of_outp_to_Divide(W1_mat,W2,b1)


In [None]:
# Collect the output of the intermediate layer

intermediate_output = None

def hook_fn(module, input, output):
    global intermediate_output
    intermediate_output = output

hook_handle = model.conv4[2].register_forward_hook(hook_fn)
X_sample=np.zeros([0,512*9])

cnt=0
for i in trainloader:
  inputs = i[0].to(device)
  output = model(inputs)
  x_smpl,y_smpl=np.random.randint(1,7),np.random.randint(1,7)
  X_to_add = ((intermediate_output[:,:,x_smpl-1:x_smpl+2,y_smpl-1:y_smpl+2]).detach().reshape(400,512*9)).to('cpu').numpy()
  X_sample=np.concatenate((X_sample,X_to_add),axis=0)
  cnt+=1
  if cnt>=5:
    break

X_sample = X_sample[:200]


In [None]:
# Define the Division Function
def Division_function(a_divident,b_divident,Iterations =5):
  #Initialization Process: Start with almost the same number of samples per term
  a_hat, b_hat=np.random.randn(m_q,d), np.zeros(m_q)
  for i in range(20):
    I_i = np.argmax((a_hat@X_sample.T+np.array([b_hat]).T), axis=0)
    b_hat=b_hat-np.array([(I_i==j).sum() for j in range(m_q)])*0.005
    b_hat=b_hat-b_hat.mean()

  #Compute the f values
  f_x_i=np.max((a_divident@X_sample.T+np.array([b_divident]).T), axis=0)
  m_p=a_divident.shape[0]
  # Define cp variables
  x_b, lambda_var =   cp.Variable(1), cp.Variable(m_p)
  for cnt in range(Iterations):
    I_i=np.argmax((a_hat@X_sample.T+np.array([b_hat]).T), axis=0)         # Find the sets I_i
    N=np.array([(I_i==j).sum() for j in range(m_q)])                      # Compute the number of elements in each set I_i
    s=np.array([np.sum(X_sample[I_i==i], axis=0) for i in range(m_q)])    # Compute the summation o x_j's
    for i in range(a_hat.shape[0]):
      # In this problem we substituted a_hat_i by a linear combination of a_i's
      prob = cp.Problem(cp.Maximize(lambda_var@a_divident@s[i]+N[i]*x_b),
                      [(X_sample@(lambda_var@a_divident))+x_b<=f_x_i,
                      lambda_var>=0,
                      cp.sum(lambda_var)==1,
                      ])
      prob.solve(warm_start=True)
      a_hat[i]=(lambda_var@a_divident).value
      b_hat[i]=x_b.value
  return a_hat,b_hat


In [None]:
# Iterative Pairwise Divisions
a_hat_pos_list,b_hat_pos_list,a_hat_neg_list,b_hat_neg_list=[],[],[],[]  # Initialize the lists
for number_of_terms in range(100):
  W_pos,W_neg,b_pos,b_neg=Sample_Pair_of_outp_to_Divide(W1_mat,W2,b1)
  a_hat,b_hat=Division_function(W_pos,b_pos)
  a_hat_pos_list.append(a_hat)
  b_hat_pos_list.append(b_hat)
  a_hat,b_hat=Division_function(W_neg,b_neg)
  a_hat_neg_list.append(a_hat)
  b_hat_neg_list.append(b_hat)







  b_hat[i]=x_b.value


In [None]:
Vectors_to_choose=np.zeros([0,512*9])
Corresp_bias=np.zeros([0])
for list_var in range(len(a_hat_pos_list)):
  Vectors_to_choose=np.append(Vectors_to_choose, a_hat_pos_list[list_var], axis=0).copy()
  Corresp_bias=np.append(Corresp_bias, b_hat_pos_list[list_var], axis=0).copy()

  Vectors_to_choose=np.append(Vectors_to_choose, a_hat_neg_list[list_var], axis=0).copy()
  Corresp_bias=np.append(Corresp_bias, b_hat_pos_list[list_var], axis=0).copy()

DivisionResultsPath_Vectors_to_choose='/content/drive/My Drive/Vectors_to_choose.pt'
DivisionResultsPath_Corresp_bias = '/content/drive/My Drive/Corresp_bias.pt'

torch.save(Vectors_to_choose, DivisionResultsPath_Vectors_to_choose)
torch.save(Corresp_bias, DivisionResultsPath_Corresp_bias)


In [None]:
Vectors_to_choose.shape

(400, 4608)

# Create a new NN  #

In [None]:
K=25
W_mat_conv_1_new = Vectors_to_choose[:2*K]
W_mat_conv_1_new=torch.from_numpy((W_mat_conv_1_new.reshape((-1,512, 3, 3))))
biass_conv_1_new = Corresp_bias[:2*K]
biass_conv_1_new=torch.from_numpy(biass_conv_1_new)

In [None]:
# Create the substitute for the residual block
A_mat1 = torch.zeros(K,2*K,1,1)
A_mat2 = torch.zeros(K,2*K,1,1)

for i in range(K):
  A_mat1[i,2*i,0,0]=1
  A_mat2[i,2*i+1,0,0]=1


class Subs_Res_Block(nn.Module):
    def __init__(self, Dim1, Dim2,Dim3):
        super().__init__()
        self.conv1 = nn.Conv2d(Dim1, Dim2,kernel_size=(3, 3),padding=(1,1))
        self.conv1.weight.data.copy_(W_mat_conv_1_new)
        self.conv1.bias.data.copy_(biass_conv_1_new)

        self.fixed_conv_part_1 = nn.Conv2d(Dim2, Dim2//2,kernel_size=(1, 1),bias=False )
        self.fixed_conv_part_1.weight.data.copy_(A_mat1)
        self.fixed_conv_part_2 = nn.Conv2d(Dim2, Dim2//2,kernel_size=(1, 1),bias=False)
        self.fixed_conv_part_2.weight.data.copy_(A_mat2)
        self.conv3 = nn.Conv2d(Dim2+Dim2//2, Dim3,kernel_size=(3, 3), padding=(1,1))


        self.relu2 = nn.ReLU()
        self.relu1 = nn.ReLU()

    def forward(self, xb):
        out = self.conv1(xb)
        one = self.fixed_conv_part_1(out)
        two = self.fixed_conv_part_2(out)
        out = self.relu1(out)
        out = torch.concatenate((out,torch.max(one,two)),axis=1)
        out = self.conv3(out)
        out = self.relu2(out)
        return out
    def training_step(self, batch):
        Inps, Outps = batch
        Outp_preds = self(Inps)                  # Generate predictions
        loss = F.mse_loss(Outp_preds  , Outps)  # Calculate loss
        return loss

model_small_new = Subs_Res_Block(512,2*K,512)

In [None]:
sum(p.numel() for p in model_small_new.parameters())/sum(p.numel() for p in model.res2.parameters())

0.12263939153654305

In [None]:
# Create Dataset for compressed Layer
activation = {}
def getActivation(name):
  def hook(model, input, output):
    activation[name] = output.detach()
  return hook

def set_hooks(INP_Layer,OUTP_Layer):
  # register forward hooks on the layers
  hook_INP_to_Compressed_Layer =INP_Layer.register_forward_hook(getActivation('INP_to_Compressed_Layer'))
  hook_OUTP_of_Compressed_Layer = OUTP_Layer.register_forward_hook(getActivation('OUTP_of_Compressed_Layer'))

class LayerComputeDataset(Dataset):
    def __init__(self, Res_model,from_layer, to_layer, num_samples):
        self.from_layer= from_layer
        self.to_layer= to_layer
        self.Res_model=Res_model
        self.num_samples = train_data.data.shape[0]

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        x= train_data.data[idx]

        with torch.no_grad():
          self.Res_model(tt.ToTensor()(x).unsqueeze(0).float().to(device))
        input_data = activation['INP_to_Compressed_Layer']

        return input_data.squeeze(), activation['OUTP_of_Compressed_Layer'].squeeze()


In [None]:
set_hooks( model.conv4,model.res2)
DataSetForCompressLayer=LayerComputeDataset(model,'INP_to_Compressed_Layer','OUTP_of_Compressed_Layer',50000)
DataLoaderForCompressLayer = DataLoader(DataSetForCompressLayer, batch_size=32, shuffle=True,num_workers=0)

In [None]:
Sub_Res_Model = Subs_Res_Block(Dim1=512, Dim2=2*K, Dim3=512)
Sub_Res_Model = to_device(Sub_Res_Model,device)

In [None]:
class Extended_ResNet9(ImageClassificationBase):
    def __init__(self, in_channels, num_classes,Type_of_Substitution,Interm_Number):
        super().__init__()

        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128, pool=True)
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))

        self.conv3 = conv_block(128, 256, pool=True)
        self.conv4 = conv_block(256, 512, pool=True)
        if Type_of_Substitution == "Subs_Res_Block_v1":
          self.res2_mod = Subs_Res_Block(512, Interm_Number, 512)
        else:
          error

        self.classifier = nn.Sequential(nn.AdaptiveMaxPool2d((1,1)),
                                        nn.Flatten(),
                                        nn.Dropout(0.2),
                                        nn.Linear(512, num_classes))
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2_mod(out) + out
        out = self.classifier(out)
        return out
Model_Extended = Extended_ResNet9(3,100,"Subs_Res_Block_v1",Interm_Number=2*K)
Model_Extended.to(device)


In [None]:
def copy_from_model(model, Subst_LAYER,Model_Extended):
  Dict1= model.state_dict()
  Dict2= Model_Extended.state_dict()
  for key in Dict1:
      if key in Dict2:
        Dict2[key]=Dict1[key]
  Model_Extended.load_state_dict(Dict2)
  Model_Extended.res2_mod.load_state_dict(Subst_LAYER.state_dict())

In [None]:
lossses=np.zeros(85001)
lossses_2=np.zeros(85001)
indexx=0

optimizer = torch.optim.Adam(Sub_Res_Model.parameters(), lr=0.0001)

for iterations in range(25):
  idx=0
  T=time.time()
  for batch in DataLoaderForCompressLayer:
      idx+=1
      optimizer.zero_grad()
      indexx+=1
      loss = Sub_Res_Model.training_step(batch)
      lossses[indexx]=loss.cpu().detach().numpy()
      loss.backward()
      lossses_2[idx]=loss.cpu().detach().numpy()
      optimizer.step()
      if indexx ==5009:
        break
  print(lossses_2[:idx].mean())
  copy_from_model(model, Sub_Res_Model,Model_Extended)
  print('valid_acc',evaluate(Model_Extended, DeviceDataLoader(testloader, device))['val_acc'])
  print(time.time()-T)





L1 structured prunning comparison

In [None]:
N/512

NameError: name 'N' is not defined

In [None]:
model_copy = copy.deepcopy(model)
W1 = model_copy.res2[0][0].weight.detach()
W2 = model_copy.res2[1][0].weight.detach()
b1 = model_copy.res2[0][0].bias.detach()
b2 = model_copy.res2[1][0].bias.detach()
N=int(512*(1-0.35))
NORMS=torch.tensor([torch.norm(W1[i]) for i in range(512)])
values, indices = torch.topk(NORMS, N, largest=False)
W1[indices]=0
b1[indices]=0
NORMS=torch.tensor([torch.norm(W2[i]) for i in range(512)])
values, indices = torch.topk(NORMS, N, largest=False)
W2[indices]=0
b2[indices]=0

model_copy.res2[0][0].weight.data.copy_(W1)
model_copy.res2[0][0].bias.data.copy_(b1)
model_copy.res2[1][0].weight.data.copy_(W2)
model_copy.res2[1][0].bias.data.copy_(b2)

print('valid_acc',evaluate(model_copy, DeviceDataLoader(testloader, device))['val_acc'])



In [None]:
print('valid_acc',evaluate(model, DeviceDataLoader(testloader, device))['val_acc'])
