In [15]:
import torch
import torchvision
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split


from scipy import stats
    

import math
import random


def countDiffMasks(mask1,mask2):
    total_diff=0
    for i in range(len(mask1)):
        m_1=mask1[i].flatten()
        m_2=mask2[i].flatten()
        count_same=(m_1 == m_2).sum()
        count_different=m_1.flatten().shape[0]-count_same
        total_diff+=count_different
    return total_diff


def get_mask_compression(mask_whole_model):
    num_total=0
    num_non_zeros=0
    for mask_each_layer in mask_whole_model:
        num_total+=torch.numel(mask_each_layer)
        num_non_zeros+=torch.count_nonzero(mask_each_layer)
        
    return (num_total-num_non_zeros)/num_total

    
    

def prune_model_get_mask(model,prune_rate):
    '''
    works purely on the model to get
    mask
    '''
    mask_whole_model=[]
    for nm, params in model.named_parameters():
        if "weight" in nm and "bn" not in nm and "linear" not in nm:
            mask_layer=torch.ones(params.shape)
#             print(nm,params.shape)
            abs_var=torch.std(torch.abs(params.data))
#             print(abs_var)
#             print(params)
            threshold=abs_var*prune_rate
            num_components=params.shape[0]
            for index_component in range(num_components):
                values=params[index_component]            
                re_shaped_values=values.flatten()                
                mask_vals = (torch.abs(re_shaped_values)>threshold).float()
                mask_vals=mask_vals.reshape(values.shape)
#                 print(mask_vals.shape)
                mask_layer[index_component]=mask_vals
            mask_whole_model.append(mask_layer)
    return mask_whole_model
 
    
def get_thresholds_each_layer(model,prune_rate):
    thresholds_per_layer=[]
    for nm, params in model.named_parameters():
        if "weight" in nm and "bn" not in nm and "linear" not in nm:
            mask_layer=torch.ones(params.shape)
            abs_std=torch.std(torch.abs(params.data))
            threshold=abs_std*prune_rate
            thresholds_per_layer.append(threshold)
    return thresholds_per_layer
    
                
def apply_mask_model(model,list_mask_whole_model):
    mask_layer_count=0
    for nm, params in model.named_parameters():
        if "weight" in nm and "bn" not in nm and "linear" not in nm:
            mask_layer=list_mask_whole_model[mask_layer_count]
            with torch.no_grad():
                device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#                 print("Devices are ",params.device,mask_layer.device)
                mask_layer=mask_layer.to(device)
    
                params.data=params.data*mask_layer            
            mask_layer_count+=1
    

def store_weights_in_dic(weight_description,model):
    for nm, params in model.named_parameters():
        if "weight" in nm and "bn" not in nm and "linear" not in nm:
            if nm not in weight_description:
                weight_description[nm]={}
            num_components=params.shape[0]
            for index_component in range(num_components):
                if index_component not in weight_description[nm]:
                    weight_description[nm][index_component]={}
                values=params[index_component]
                flat_values=values.flatten()
                for index_wt in range(flat_values.shape[0]):
                    if index_wt not in weight_description[nm][index_component]:
                        weight_description[nm][index_component][index_wt]=[]
                    weight_description[nm][index_component][index_wt].append(flat_values[index_wt].detach().item())
    return weight_description


def get_boolean_dict_weight_dict(weight_description,prune_rate,thresholds_per_layer):
    '''
    works on the dictionary of weights
    to create a dict of 1s and 0s to show
    how many times weight is more than threshold
    per layer
    '''
    boolean_weight_description={}
    count=0
    for layer in weight_description.keys():  
#         print("Count = ",count)
        threshold_this_layer=thresholds_per_layer[count]
#         print("Threshold for layer ",count,layer,"is ",threshold_this_layer)
        if layer not in boolean_weight_description:
            boolean_weight_description[layer]={}
        for index_component in weight_description[layer].keys():
            if index_component not in boolean_weight_description[layer]:
                boolean_weight_description[layer][index_component]={}
            for index_wt in weight_description[layer][index_component].keys():
                if index_wt not in boolean_weight_description[layer][index_component]:
                    boolean_weight_description[layer][index_component][index_wt]=[]
                all_wts=weight_description[layer][index_component][index_wt]
                all_wts_boolean=[]
                for wt in all_wts:
                    if abs(wt)>threshold_this_layer:
                        all_wts_boolean.append(1)
                    else:
                        all_wts_boolean.append(0)
                boolean_weight_description[layer][index_component][index_wt]=all_wts_boolean                    
        count+=1
        
    return boolean_weight_description
    
# create mask from boolean weight dictionary
def create_mask_from_boolean_wt(model,boolean_wt_dict):
    mask_whole_model=[]
    for nm, params in model.named_parameters():
        if "weight" in nm and "bn" not in nm and "linear" not in nm:
            mask_layer=torch.ones(params.shape)
#             print(nm,params.shape)
            abs_var=torch.var(torch.abs(params.data))
#             print(abs_var)
#             print(params)
#             threshold=abs_var*prune_rate
            num_components=params.shape[0]
            for index_component in range(num_components):
                values=params[index_component]            
                re_shaped_values=values.flatten() 
                mask_vals=[]
                for val_index in range(re_shaped_values.shape[0]):
                    boolean_vals=boolean_wt_dict[nm][index_component][val_index]
                    m = stats.mode(boolean_vals)
#                     print("Verdict for this weight is ",m[0][0])
                    mask_vals.append(m[0][0])
#                 mask_vals = (torch.abs(re_shaped_values)>threshold).float()                
                mask_vals=np.asarray(mask_vals)
                mask_vals=mask_vals.reshape(values.shape)
#                 print(mask_vals.shape)
                mask_layer[index_component]=torch.from_numpy(mask_vals)
            mask_whole_model.append(mask_layer)
    return mask_whole_model
         


In [2]:
# this part copied from shrinkbench

def nonzero(tensor):
    """Returns absolute number of values different from 0

    Arguments:
        tensor {numpy.ndarray} -- Array to compute over

    Returns:
        int -- Number of nonzero elements
    """
    return np.sum(tensor != 0.0)


def model_size(model, as_bits=False):
    """Returns absolute and nonzero model size

    Arguments:
        model {torch.nn.Module} -- Network to compute model size over

    Keyword Arguments:
        as_bits {bool} -- Whether to account for the size of dtype

    Returns:
        int -- Total number of weight & bias params
        int -- Out total_params exactly how many are nonzero
    """

    total_params = 0
    nonzero_params = 0
    for tensor in model.parameters():
        t = np.prod(tensor.shape)
        nz = nonzero(tensor.detach().cpu().numpy())
        if as_bits:
            bits = dtype2bits[tensor.dtype]
            t *= bits
            nz *= bits
        total_params += t
        nonzero_params += nz
    return int(total_params), int(nonzero_params)


In [3]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))        

def correct(output, target, topk=(1,)):
    """Computes how many correct outputs with respect to targets

    Does NOT compute accuracy but just a raw amount of correct
    outputs given target labels. This is done for each value in
    topk. A value is considered correct if target is in the topk
    highest values of output.
    The values returned are upperbounded by the given batch size

    [description]

    Arguments:
        output {torch.Tensor} -- Output prediction of the model
        target {torch.Tensor} -- Target labels from data

    Keyword Arguments:
        topk {iterable} -- [Iterable of values of k to consider as correct] (default: {(1,)})

    Returns:
        List(int) -- Number of correct values for each topk
    """

    with torch.no_grad():
        maxk = max(topk)
        # Only need to do topk for highest k, reuse for the rest
        _, pred = output.topk(k=maxk, dim=1, largest=True, sorted=True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(torch.tensor(correct_k.item()))
        return res


# below copied from shrinkbench, to be used later
# def accuracy(model, dataloader, topk=(1,)):
#     """Compute accuracy of a model over a dataloader for various topk

#     Arguments:
#         model {torch.nn.Module} -- Network to evaluate
#         dataloader {torch.utils.data.DataLoader} -- Data to iterate over

#     Keyword Arguments:
#         topk {iterable} -- [Iterable of values of k to consider as correct] (default: {(1,)})

#     Returns:
#         List(float) -- List of accuracies for each topk
#     """

#     # Use same device as model
#     device = next(model.parameters()).device

#     accs = np.zeros(len(topk))
#     with torch.no_grad():

#         for i, (input, target) in enumerate(dataloader):
#             input = input.to(device)
#             target = target.to(device)
#             output = model(input)

#             accs += np.array(correct(output, target, topk))

#     # Normalize over data length
#     accs /= len(dataloader.dataset)

#     return accs





# LENET 300-100 for MNIST and comparison
class FashionMnistNet(nn.Module):
    """Feedfoward neural network with 1 hidden layer"""
    def __init__(self):
        super(FashionMnistNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)        
        self.fc4 = nn.Linear(64, 10)
        self.fc4.is_classifier = True
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))        
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        top_1, top_5 = correct(out, labels,topk=(1,5))
#         print("Batch is ",batch[1].shape)
        
        top_1=top_1/batch[1].shape[0]
        top_5=top_5/batch[1].shape[0]

#         print("corr",top_1,top_5)
#         return {'val_loss': loss, 'val_acc': acc}
        return {'val_loss': loss, 'val_acc': acc, 'top_1': top_1, 'top_5': top_5}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        
        batch_top_1s = [x['top_1'] for x in outputs]
#         print(batch_top_1s)
        epoch_top_1 = torch.stack(batch_top_1s).mean()      # Combine top_1
        
        batch_top_5s = [x['top_5'] for x in outputs]
        epoch_top_5 = torch.stack(batch_top_5s).mean()      # Combine top_5
        
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item(),
               'val_top_1': epoch_top_1.item(), 'val_top_5': epoch_top_5.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}, val_top_1: {:.4f}, val_top_5: {:.4f}".format(
                                epoch, result['val_loss'], result['val_acc'], 
                                result['val_top_1'], result['val_top_5']))
        

In [4]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)



class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)
    
    
def evaluate(model, val_loader):
    """Evaluate the model's performance on the validation set"""
    outputs = [model.validation_step(batch) for batch in val_loader]
#     print("outputs are ",outputs)
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD,
        weight_description=None,mask_whole_model=None):
    """Train the model using gradient descent"""
    print("At train")
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            if mask_whole_model:
#                 print("Applying mask")
                apply_mask_model(model,mask_whole_model)
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
        print("wt desc = ",weight_description)
        if weight_description!=None:
            print("going for weight")
            weight_description=store_weights_in_dic(weight_description,model)
    return history, weight_description


def predict_image(img, model):
    xb = to_device(img.unsqueeze(0), device)
    yb = model(xb)
    _, preds  = torch.max(yb, dim=1)
    return preds[0].item()

In [5]:
# print(dataset.data.shape)

# val_size = 10000
# train_size = len(dataset) - val_size

# train_ds, val_ds = random_split(dataset, [train_size, val_size])
# print(len(train_ds), len(val_ds))


# batch_size=128

# train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
# val_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)


# shape=dataset[0][0].shape
# input_size=1
# for s in shape:
#     input_size*=s
# print(input_size)

# for images, labels in train_loader:
#     print('images.shape:', images.shape)
#     inputs = images.reshape(-1, input_size)
#     print('inputs.shape:', inputs.shape)
#     break
    
# input_size = inputs.shape[-1]
# print(input_size)
# hidden_size = 32




In [6]:
print("Program with weighht pruning, dynamic, many ranges")

print("Torch cuda ",torch.cuda.is_available())


device = get_default_device()
print("device ",device)



dataset = FashionMNIST(root='data/', download=True, transform=ToTensor())


# Define test dataset
test_dataset = FashionMNIST(root='data/', train=False,transform=ToTensor())

val_size = 10000
train_size = len(dataset) - val_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])

batch_size=128

train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=256)

shape=dataset[0][0].shape
input_size=1
for s in shape:
    input_size*=s

    
    


num_classes = 10


Program with weighht pruning, dynamic, many ranges
Torch cuda  False
device  cpu


In [7]:
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)
test_loader = DeviceDataLoader(test_loader, device)

targets=train_ds.dataset.targets
training_data=torch.tensor(train_ds.dataset.data)

training_data = training_data.to(device=device)


  


In [8]:
model=FashionMnistNet()

history = [evaluate(model, val_loader)]
print("initial result",history)
# weight_description={}
epochs=20
lr=0.01
# history2,weight_description = fit(epochs, lr, model, train_loader, val_loader,weight_description=weight_description)



initial result [{'val_loss': 2.3055808544158936, 'val_acc': 0.03623046725988388, 'val_top_1': 0.03623046725988388, 'val_top_5': 0.5213867425918579}]


In [59]:
model_state_path="model_state/mod.pt"
model.load_state_dict(torch.load(model_state_path))

result = evaluate(model, test_loader)
print("Test result is ",result)


total_size,nz_size=model_size(model)
compression=(total_size-nz_size)/total_size
print("Compression=",compression)



prune_rate=3.403 # compression of 0.9627


metrics={}
metrics["prune_rate"]=[]
metrics["compression"]=[]
metrics["epochs"]=[]
metrics["top_5"]=[]
metrics["top_1"]=[]



prune_rate_range=[0.1,0.3,0.4,0.8,1.1,1.3,1.5,1.7,
                  1.8,2.1,2.4,2.9,3.1,3.15,3.12,3.35,3.4,3.5,3.6,3.7,3.8,3.9,4,6]

# prune_rate_range=[0.1,1.5,3.9,4,6]

for prune_rate in prune_rate_range:
    mask_whole_model=prune_model_get_mask(model,prune_rate)

    print("Mask compression = ",prune_rate,get_mask_compression(mask_whole_model))


    apply_mask_model(model,mask_whole_model)
    total_size,nz_size=model_size(model)
    compression=(total_size-nz_size)/total_size
    res = evaluate(model, test_loader)

    print("After pruning, Compression=",compression,"Result after pruning is ",res)


    epochs=35
    history_prune,_ = fit(epochs, lr, model, train_loader, val_loader,
                          mask_whole_model=mask_whole_model)


    total_size,nz_size=model_size(model)
    compression=(total_size-nz_size)/total_size
    res = evaluate(model, test_loader)

    print("Compression=",compression,"Result after pruning is ",res)
    metrics["prune_rate"].append(prune_rate)
    metrics["compression"].append(compression)
    metrics["epochs"].append(epochs)
    metrics["top_5"].append(res['val_top_5'])
    metrics["top_1"].append(res['val_top_1'])




Test result is  {'val_loss': 0.3928965628147125, 'val_acc': 0.860644519329071, 'val_top_1': 0.860644519329071, 'val_top_5': 0.996777355670929}
Compression= 0.0
Mask compression =  0.1 tensor(0.0350)
After pruning, Compression= 0.03497664379103814 Result after pruning is  {'val_loss': 0.3937625288963318, 'val_acc': 0.8609374761581421, 'val_top_1': 0.8609374761581421, 'val_top_5': 0.9966796636581421}
At train
Epoch [0], val_loss: 0.3293, val_acc: 0.8868, val_top_1: 0.8868, val_top_5: 0.9972
wt desc =  None
Epoch [1], val_loss: 0.3307, val_acc: 0.8829, val_top_1: 0.8829, val_top_5: 0.9973
wt desc =  None
Epoch [2], val_loss: 0.3344, val_acc: 0.8820, val_top_1: 0.8820, val_top_5: 0.9969
wt desc =  None
Epoch [3], val_loss: 0.3383, val_acc: 0.8801, val_top_1: 0.8801, val_top_5: 0.9972
wt desc =  None
Epoch [4], val_loss: 0.3387, val_acc: 0.8787, val_top_1: 0.8787, val_top_5: 0.9968
wt desc =  None
Epoch [5], val_loss: 0.3363, val_acc: 0.8799, val_top_1: 0.8799, val_top_5: 0.9971
wt desc =  

Epoch [0], val_loss: 0.2991, val_acc: 0.8955, val_top_1: 0.8955, val_top_5: 0.9974
wt desc =  None
Epoch [1], val_loss: 0.3001, val_acc: 0.8977, val_top_1: 0.8977, val_top_5: 0.9979
wt desc =  None
Epoch [2], val_loss: 0.3145, val_acc: 0.8910, val_top_1: 0.8910, val_top_5: 0.9981
wt desc =  None
Epoch [3], val_loss: 0.3125, val_acc: 0.8962, val_top_1: 0.8962, val_top_5: 0.9978
wt desc =  None
Epoch [4], val_loss: 0.3091, val_acc: 0.8924, val_top_1: 0.8924, val_top_5: 0.9980
wt desc =  None
Epoch [5], val_loss: 0.3085, val_acc: 0.8900, val_top_1: 0.8900, val_top_5: 0.9980
wt desc =  None
Epoch [6], val_loss: 0.2949, val_acc: 0.9000, val_top_1: 0.9000, val_top_5: 0.9977
wt desc =  None
Epoch [7], val_loss: 0.3138, val_acc: 0.8902, val_top_1: 0.8902, val_top_5: 0.9980
wt desc =  None
Epoch [8], val_loss: 0.3013, val_acc: 0.8980, val_top_1: 0.8980, val_top_5: 0.9979
wt desc =  None
Epoch [9], val_loss: 0.3026, val_acc: 0.8987, val_top_1: 0.8987, val_top_5: 0.9979
wt desc =  None
Epoch [10]

Epoch [4], val_loss: 0.3360, val_acc: 0.8921, val_top_1: 0.8921, val_top_5: 0.9978
wt desc =  None
Epoch [5], val_loss: 0.3217, val_acc: 0.8983, val_top_1: 0.8983, val_top_5: 0.9971
wt desc =  None
Epoch [6], val_loss: 0.3331, val_acc: 0.8955, val_top_1: 0.8955, val_top_5: 0.9977
wt desc =  None
Epoch [7], val_loss: 0.3183, val_acc: 0.8986, val_top_1: 0.8986, val_top_5: 0.9973
wt desc =  None
Epoch [8], val_loss: 0.3674, val_acc: 0.8827, val_top_1: 0.8827, val_top_5: 0.9972
wt desc =  None
Epoch [9], val_loss: 0.3248, val_acc: 0.8973, val_top_1: 0.8973, val_top_5: 0.9974
wt desc =  None
Epoch [10], val_loss: 0.3357, val_acc: 0.8984, val_top_1: 0.8984, val_top_5: 0.9969
wt desc =  None
Epoch [11], val_loss: 0.3571, val_acc: 0.8895, val_top_1: 0.8895, val_top_5: 0.9975
wt desc =  None
Epoch [12], val_loss: 0.3509, val_acc: 0.8935, val_top_1: 0.8935, val_top_5: 0.9970
wt desc =  None
Epoch [13], val_loss: 0.3306, val_acc: 0.8965, val_top_1: 0.8965, val_top_5: 0.9970
wt desc =  None
Epoch 

Epoch [8], val_loss: 0.3766, val_acc: 0.8968, val_top_1: 0.8968, val_top_5: 0.9963
wt desc =  None
Epoch [9], val_loss: 0.3787, val_acc: 0.8970, val_top_1: 0.8970, val_top_5: 0.9956
wt desc =  None
Epoch [10], val_loss: 0.3904, val_acc: 0.8938, val_top_1: 0.8938, val_top_5: 0.9962
wt desc =  None
Epoch [11], val_loss: 0.3748, val_acc: 0.8979, val_top_1: 0.8979, val_top_5: 0.9962
wt desc =  None
Epoch [12], val_loss: 0.3993, val_acc: 0.8955, val_top_1: 0.8955, val_top_5: 0.9957
wt desc =  None
Epoch [13], val_loss: 0.3780, val_acc: 0.8976, val_top_1: 0.8976, val_top_5: 0.9959
wt desc =  None
Epoch [14], val_loss: 0.3966, val_acc: 0.8968, val_top_1: 0.8968, val_top_5: 0.9956
wt desc =  None
Epoch [15], val_loss: 0.4181, val_acc: 0.8887, val_top_1: 0.8887, val_top_5: 0.9952
wt desc =  None
Epoch [16], val_loss: 0.4012, val_acc: 0.8963, val_top_1: 0.8963, val_top_5: 0.9952
wt desc =  None
Epoch [17], val_loss: 0.4050, val_acc: 0.8944, val_top_1: 0.8944, val_top_5: 0.9957
wt desc =  None
Ep

Epoch [12], val_loss: 0.4582, val_acc: 0.8919, val_top_1: 0.8919, val_top_5: 0.9954
wt desc =  None
Epoch [13], val_loss: 0.4480, val_acc: 0.8937, val_top_1: 0.8937, val_top_5: 0.9951
wt desc =  None
Epoch [14], val_loss: 0.4857, val_acc: 0.8917, val_top_1: 0.8917, val_top_5: 0.9940
wt desc =  None
Epoch [15], val_loss: 0.4658, val_acc: 0.8934, val_top_1: 0.8934, val_top_5: 0.9944
wt desc =  None
Epoch [16], val_loss: 0.4841, val_acc: 0.8905, val_top_1: 0.8905, val_top_5: 0.9942
wt desc =  None
Epoch [17], val_loss: 0.4583, val_acc: 0.8931, val_top_1: 0.8931, val_top_5: 0.9951
wt desc =  None
Epoch [18], val_loss: 0.4717, val_acc: 0.8944, val_top_1: 0.8944, val_top_5: 0.9947
wt desc =  None
Epoch [19], val_loss: 0.4936, val_acc: 0.8839, val_top_1: 0.8839, val_top_5: 0.9947
wt desc =  None
Epoch [20], val_loss: 0.4660, val_acc: 0.8945, val_top_1: 0.8945, val_top_5: 0.9950
wt desc =  None
Epoch [21], val_loss: 0.4815, val_acc: 0.8933, val_top_1: 0.8933, val_top_5: 0.9948
wt desc =  None


Epoch [16], val_loss: 0.4466, val_acc: 0.8917, val_top_1: 0.8917, val_top_5: 0.9931
wt desc =  None
Epoch [17], val_loss: 0.4435, val_acc: 0.8926, val_top_1: 0.8926, val_top_5: 0.9931
wt desc =  None
Epoch [18], val_loss: 0.4579, val_acc: 0.8896, val_top_1: 0.8896, val_top_5: 0.9937
wt desc =  None
Epoch [19], val_loss: 0.4506, val_acc: 0.8920, val_top_1: 0.8920, val_top_5: 0.9927
wt desc =  None
Epoch [20], val_loss: 0.4592, val_acc: 0.8907, val_top_1: 0.8907, val_top_5: 0.9937
wt desc =  None
Epoch [21], val_loss: 0.4572, val_acc: 0.8927, val_top_1: 0.8927, val_top_5: 0.9925
wt desc =  None
Epoch [22], val_loss: 0.4589, val_acc: 0.8918, val_top_1: 0.8918, val_top_5: 0.9934
wt desc =  None
Epoch [23], val_loss: 0.4608, val_acc: 0.8905, val_top_1: 0.8905, val_top_5: 0.9935
wt desc =  None
Epoch [24], val_loss: 0.4626, val_acc: 0.8930, val_top_1: 0.8930, val_top_5: 0.9926
wt desc =  None
Epoch [25], val_loss: 0.4720, val_acc: 0.8922, val_top_1: 0.8922, val_top_5: 0.9930
wt desc =  None


Epoch [20], val_loss: 0.4238, val_acc: 0.8897, val_top_1: 0.8897, val_top_5: 0.9949
wt desc =  None
Epoch [21], val_loss: 0.4229, val_acc: 0.8889, val_top_1: 0.8889, val_top_5: 0.9955
wt desc =  None
Epoch [22], val_loss: 0.4298, val_acc: 0.8873, val_top_1: 0.8873, val_top_5: 0.9956
wt desc =  None
Epoch [23], val_loss: 0.4297, val_acc: 0.8881, val_top_1: 0.8881, val_top_5: 0.9957
wt desc =  None
Epoch [24], val_loss: 0.4284, val_acc: 0.8896, val_top_1: 0.8896, val_top_5: 0.9952
wt desc =  None
Epoch [25], val_loss: 0.4330, val_acc: 0.8887, val_top_1: 0.8887, val_top_5: 0.9956
wt desc =  None
Epoch [26], val_loss: 0.4312, val_acc: 0.8887, val_top_1: 0.8887, val_top_5: 0.9955
wt desc =  None
Epoch [27], val_loss: 0.4361, val_acc: 0.8877, val_top_1: 0.8877, val_top_5: 0.9949
wt desc =  None
Epoch [28], val_loss: 0.4383, val_acc: 0.8878, val_top_1: 0.8878, val_top_5: 0.9952
wt desc =  None
Epoch [29], val_loss: 0.4464, val_acc: 0.8885, val_top_1: 0.8885, val_top_5: 0.9947
wt desc =  None


Epoch [1], val_loss: 0.5599, val_acc: 0.8802, val_top_1: 0.8802, val_top_5: 0.9934
wt desc =  None
Epoch [2], val_loss: 0.5581, val_acc: 0.8832, val_top_1: 0.8832, val_top_5: 0.9928
wt desc =  None
Epoch [3], val_loss: 0.5444, val_acc: 0.8827, val_top_1: 0.8827, val_top_5: 0.9943
wt desc =  None
Epoch [4], val_loss: 0.5510, val_acc: 0.8820, val_top_1: 0.8820, val_top_5: 0.9947
wt desc =  None
Epoch [5], val_loss: 0.5453, val_acc: 0.8827, val_top_1: 0.8827, val_top_5: 0.9949
wt desc =  None
Epoch [6], val_loss: 0.5450, val_acc: 0.8840, val_top_1: 0.8840, val_top_5: 0.9938
wt desc =  None
Epoch [7], val_loss: 0.5505, val_acc: 0.8854, val_top_1: 0.8854, val_top_5: 0.9943
wt desc =  None
Epoch [8], val_loss: 0.5466, val_acc: 0.8861, val_top_1: 0.8861, val_top_5: 0.9936
wt desc =  None
Epoch [9], val_loss: 0.5476, val_acc: 0.8868, val_top_1: 0.8868, val_top_5: 0.9946
wt desc =  None
Epoch [10], val_loss: 0.5405, val_acc: 0.8867, val_top_1: 0.8867, val_top_5: 0.9940
wt desc =  None
Epoch [11

Epoch [5], val_loss: 0.6028, val_acc: 0.8781, val_top_1: 0.8781, val_top_5: 0.9948
wt desc =  None
Epoch [6], val_loss: 0.5881, val_acc: 0.8806, val_top_1: 0.8806, val_top_5: 0.9949
wt desc =  None
Epoch [7], val_loss: 0.6357, val_acc: 0.8727, val_top_1: 0.8727, val_top_5: 0.9956
wt desc =  None
Epoch [8], val_loss: 0.5998, val_acc: 0.8804, val_top_1: 0.8804, val_top_5: 0.9928
wt desc =  None
Epoch [9], val_loss: 0.6098, val_acc: 0.8796, val_top_1: 0.8796, val_top_5: 0.9949
wt desc =  None
Epoch [10], val_loss: 0.5983, val_acc: 0.8797, val_top_1: 0.8797, val_top_5: 0.9951
wt desc =  None
Epoch [11], val_loss: 0.6127, val_acc: 0.8780, val_top_1: 0.8780, val_top_5: 0.9948
wt desc =  None
Epoch [12], val_loss: 0.5978, val_acc: 0.8820, val_top_1: 0.8820, val_top_5: 0.9947
wt desc =  None
Epoch [13], val_loss: 0.6388, val_acc: 0.8814, val_top_1: 0.8814, val_top_5: 0.9912
wt desc =  None
Epoch [14], val_loss: 0.6121, val_acc: 0.8810, val_top_1: 0.8810, val_top_5: 0.9943
wt desc =  None
Epoch

Epoch [9], val_loss: 0.6061, val_acc: 0.8813, val_top_1: 0.8813, val_top_5: 0.9953
wt desc =  None
Epoch [10], val_loss: 0.6098, val_acc: 0.8819, val_top_1: 0.8819, val_top_5: 0.9950
wt desc =  None
Epoch [11], val_loss: 0.6136, val_acc: 0.8817, val_top_1: 0.8817, val_top_5: 0.9951
wt desc =  None
Epoch [12], val_loss: 0.6138, val_acc: 0.8836, val_top_1: 0.8836, val_top_5: 0.9945
wt desc =  None
Epoch [13], val_loss: 0.6162, val_acc: 0.8801, val_top_1: 0.8801, val_top_5: 0.9954
wt desc =  None
Epoch [14], val_loss: 0.6203, val_acc: 0.8826, val_top_1: 0.8826, val_top_5: 0.9950
wt desc =  None
Epoch [15], val_loss: 0.6156, val_acc: 0.8825, val_top_1: 0.8825, val_top_5: 0.9949
wt desc =  None
Epoch [16], val_loss: 0.6241, val_acc: 0.8817, val_top_1: 0.8817, val_top_5: 0.9953
wt desc =  None
Epoch [17], val_loss: 0.6408, val_acc: 0.8797, val_top_1: 0.8797, val_top_5: 0.9941
wt desc =  None
Epoch [18], val_loss: 0.6163, val_acc: 0.8793, val_top_1: 0.8793, val_top_5: 0.9951
wt desc =  None
E

Epoch [13], val_loss: 0.6244, val_acc: 0.8741, val_top_1: 0.8741, val_top_5: 0.9912
wt desc =  None
Epoch [14], val_loss: 0.6113, val_acc: 0.8805, val_top_1: 0.8805, val_top_5: 0.9930
wt desc =  None
Epoch [15], val_loss: 0.6114, val_acc: 0.8809, val_top_1: 0.8809, val_top_5: 0.9926
wt desc =  None
Epoch [16], val_loss: 0.6179, val_acc: 0.8821, val_top_1: 0.8821, val_top_5: 0.9910
wt desc =  None
Epoch [17], val_loss: 0.6079, val_acc: 0.8830, val_top_1: 0.8830, val_top_5: 0.9926
wt desc =  None
Epoch [18], val_loss: 0.6216, val_acc: 0.8763, val_top_1: 0.8763, val_top_5: 0.9934
wt desc =  None
Epoch [19], val_loss: 0.6196, val_acc: 0.8774, val_top_1: 0.8774, val_top_5: 0.9938
wt desc =  None
Epoch [20], val_loss: 0.6430, val_acc: 0.8735, val_top_1: 0.8735, val_top_5: 0.9937
wt desc =  None
Epoch [21], val_loss: 0.6378, val_acc: 0.8742, val_top_1: 0.8742, val_top_5: 0.9927
wt desc =  None
Epoch [22], val_loss: 0.6136, val_acc: 0.8805, val_top_1: 0.8805, val_top_5: 0.9913
wt desc =  None


Epoch [17], val_loss: 1.0729, val_acc: 0.5916, val_top_1: 0.5916, val_top_5: 0.9712
wt desc =  None
Epoch [18], val_loss: 1.0657, val_acc: 0.5489, val_top_1: 0.5489, val_top_5: 0.9706
wt desc =  None
Epoch [19], val_loss: 1.0604, val_acc: 0.5480, val_top_1: 0.5480, val_top_5: 0.9711
wt desc =  None
Epoch [20], val_loss: 1.0558, val_acc: 0.5521, val_top_1: 0.5521, val_top_5: 0.9701
wt desc =  None
Epoch [21], val_loss: 1.0507, val_acc: 0.5496, val_top_1: 0.5496, val_top_5: 0.9722
wt desc =  None
Epoch [22], val_loss: 1.0447, val_acc: 0.5517, val_top_1: 0.5517, val_top_5: 0.9718
wt desc =  None
Epoch [23], val_loss: 1.0418, val_acc: 0.5507, val_top_1: 0.5507, val_top_5: 0.9733
wt desc =  None
Epoch [24], val_loss: 1.0386, val_acc: 0.5497, val_top_1: 0.5497, val_top_5: 0.9744
wt desc =  None
Epoch [25], val_loss: 1.0325, val_acc: 0.5521, val_top_1: 0.5521, val_top_5: 0.9741
wt desc =  None
Epoch [26], val_loss: 1.0302, val_acc: 0.5490, val_top_1: 0.5490, val_top_5: 0.9750
wt desc =  None


In [60]:
import pandas as pd
dataframe_results=pd.DataFrame(metrics)
print(dataframe_results.head())

   prune_rate  compression  epochs     top_5     top_1
0         0.1     0.034977      35  0.996289  0.870215
1         0.3     0.124080      35  0.997168  0.878223
2         0.4     0.196468      35  0.996875  0.877930
3         0.8     0.382292      35  0.996094  0.881836
4         1.1     0.565377      35  0.996094  0.882617


In [61]:
dataframe_results.to_csv("results_sheet/song_hn.csv",
                         index=False)


Compression= 0.9588732997750884 Result after pruning is  {'val_loss': 0.47730427980422974, 'val_acc': 0.841601550579071, 'val_top_1': 0.841601550579071, 'val_top_5': 0.9935547113418579}
