In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import time
import os
import matplotlib.pyplot as plt
from torchvision import transforms
import copy

In [2]:
class MNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        data = pd.read_csv(csv_file)
        self.labels = data.iloc[:, 0].values
        self.pixels = data.iloc[:, 1:].values.astype('float32')
        self.pixels = self.pixels.reshape(-1, 28, 28)  # Reshape to 28x28 images

        # Normalize the pixel values
        self.pixels_mean = self.pixels.mean()
        self.pixels_std = self.pixels.std()
        self.pixels = (self.pixels - self.pixels_mean) / self.pixels_std

        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.pixels[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(torch.tensor(image).unsqueeze(0))

        return image.squeeze(0), torch.tensor(label)
    

class FFNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_hidden_layers):
        super(FFNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.num_hidden_layers = num_hidden_layers
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, hidden_size))
        for _ in range(num_hidden_layers - 1):
            self.layers.append(nn.Linear(hidden_size, hidden_size))
        self.layers.append(nn.Linear(hidden_size, num_classes))
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        for layer in self.layers[:-1]:
            x = self.relu(layer(x))
        return self.layers[-1](x)

In [3]:
def create_dataloader(dataset_path, batch_size, is_train=True):
    # Create center crop transform
    transform = transforms.Compose([
        transforms.CenterCrop(20)  # Crop to 20x20 as specified
    ])
    
    # Create dataset and dataloader
    dataset = MNISTDataset(dataset_path, transform=transform)
    return DataLoader(dataset, batch_size=batch_size, shuffle=is_train)

def print_size_of_model(model, label=""):
    sd = model.state_dict()
    for item in sd:
        sd[item] = model.state_dict()[item].to_sparse()
    
    torch.save(sd, "temp.pt")
    size=os.path.getsize("temp.pt")
    #print("model: ",label,' \t','Size (MB):', size/1e6)
    os.remove('temp.pt')
    return size

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [4]:
def train_model(model, train_loader, val_loader, epochs, learning_rate, device):
    print(f"Training normal precision model for {epochs} epochs")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                outputs = model(data)
                _, predicted = torch.max(outputs.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
        
        print(f'Epoch {epoch+1}, Accuracy: {100 * correct / total:.2f}%')
    
    return model

In [5]:
def measure_inference_time(model, test_loader, batch_size=-1, num_runs=5):
    model.eval()
    times = []
    
    with torch.no_grad():
        for _ in range(num_runs):
            data, _ = next(iter(test_loader))
            if batch_size == 1:
                data = data[0:1]
                
            start_time = time.time()
            _ = model(data)
            end_time = time.time()
            times.append(end_time - start_time)
    
    mean_time = np.mean(times)
    std_time = np.std(times)
    return mean_time, std_time

In [6]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total

    size = print_size_of_model(model, "sparse")
    inference_time, inference_std = measure_inference_time(model, test_loader)
    
    return accuracy, size, inference_time, inference_std

def sparse_evaluate(model, dataloader, device):
    model.to(device)
    model_copy = FFNN(
        model.input_size,
        model.hidden_size,
        model.num_classes,
        model.num_hidden_layers
    )
    prune_params = [(m[1], "weight") for m in model_copy.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], nn.ReLU)]
    for p in prune_params:
        prune.identity(p[0], "weight")
    # Copy the parameters
    model_copy.load_state_dict(model.state_dict())
    
    copy_params = [(m[1], "weight") for m in model_copy.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], nn.ReLU)]
    # (we assume the same model architecture as the MNIST or SST-2 architecture we specify above)
    for p in copy_params:
        prune.remove(*p)
    
    return evaluate_model(model_copy, dataloader)

In [7]:
def calculate_sparsity(model, print_results=False):
    """
    Calculate the sparsity level (using the percent of elements that are 0) for:
    - each parameter,
    - all pruned parameters overall, and
    - the model overall.
    
    Report each of these values: 
    - the sparsity level of each parameter, 
    - across all pruned parameters, and 
    - for the model overall. 
    """
    sparsity_per_parameter = {}
    total_zero_count_pruned = 0
    total_element_count_pruned = 0
    total_zero_count_model = 0
    total_element_count_model = sum(p.numel() for p in model.parameters() if p.requires_grad)

    # Iterate over all buffers in the model
    for name, buffer in model.named_buffers():
        # Calculate the number of zero elements and total elements in the buffer
        zero_count = (buffer == 0).sum().item()
        total_elements = buffer.numel()
        
        # Calculate the sparsity level for this parameter
        sparsity_per_parameter[name] = zero_count / total_elements * 100

        # Check if this is a pruned parameter by looking for "weight_mask" or "bias_mask" in the name
        if "weight_mask" in name or "bias_mask" in name:
            total_zero_count_pruned += zero_count
            total_element_count_pruned += total_elements

        # Accumulate for overall model sparsity
        total_zero_count_model += zero_count

    # Calculate overall sparsity for pruned parameters and the entire model
    sparsity_pruned_parameters = (total_zero_count_pruned / total_element_count_pruned * 100
                                  if total_element_count_pruned > 0 else 0)
    sparsity_model = total_zero_count_model / total_element_count_model * 100

    # Print or return the results
    if print_results:
        print("Sparsity per parameter:")
        for name, sparsity in sparsity_per_parameter.items():
            print(f"  {name}: {sparsity:.2f}%")
        
        print(f"Sparsity across all pruned parameters: {sparsity_pruned_parameters:.2f}%")
        print(f"Sparsity for the model overall: {sparsity_model:.2f}%")

    # Optionally, return the values for further use
    return {
        "sparsity_per_parameter": sparsity_per_parameter,
        "sparsity_pruned_parameters": sparsity_pruned_parameters,
        "sparsity_model": sparsity_model
    }


In [8]:
import pandas as pd

def main():
    # Hyperparameters
    input_size = 20 * 20  # 20x20 pixels
    hidden_size = 1024
    num_classes = 10
    num_hidden_layers = 2
    batch_size = 64
    learning_rate = 0.001
    epochs = 2
    
    # Create empty DataFrame to store results
    results_df = pd.DataFrame(columns=[
        'iteration', 
        'accuracy', 
        'size_mb', 
        'inference_time_avg', 
        'inference_time_std',
        'sparsity_pruned_parameters',
        'sparsity_model'
    ])
    
    # Create model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = FFNN(input_size, hidden_size, num_classes, num_hidden_layers)
    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], nn.ReLU)]

    torch.save(model.state_dict(), "data/lab4/initial_weights.pth")
    
    train_loader = create_dataloader('data/mnist_train.csv', batch_size, True)
    test_loader = create_dataloader('data/mnist_test.csv', batch_size, False)
    
    model = model.to(device)
    model = train_model(model, train_loader, test_loader, epochs, learning_rate, device)

    # Store initial results (iteration 0)
    accuracy, size, inference_time, inference_std = evaluate_model(model, test_loader)
    sparsity_results = calculate_sparsity(model)
    
    results_df.loc[0] = {
        'iteration': 0,
        'accuracy': accuracy,
        'size_mb': size/1e6,
        'inference_time_avg': inference_time,
        'inference_time_std': inference_std,
        'sparsity_pruned_parameters': sparsity_results['sparsity_pruned_parameters'],
        'sparsity_model': sparsity_results['sparsity_model']
    }

    print(f"Iteration 0 - Accuracy: {accuracy:.2f}%, Model Size: {size/1e6:.2f} MB, "
          f"Mean Inference Time: {inference_time:.4f}s ± {inference_std:.4f}s")

    for i in range(1, 11):
        print(f"Pruning iteration {i}")
        prune.global_unstructured(prune_params, pruning_method=prune.L1Unstructured, amount=0.33)
        
        accuracy, size, inference_time, inference_std = sparse_evaluate(model, test_loader, device)
        sparsity_results = calculate_sparsity(model)
        
        # Store results in DataFrame
        results_df.loc[i] = {
            'iteration': i,
            'accuracy': accuracy,
            'size_mb': size/1e6,
            'inference_time_avg': inference_time,
            'inference_time_std': inference_std,
            'sparsity_pruned_parameters': sparsity_results['sparsity_pruned_parameters'],
            'sparsity_model': sparsity_results['sparsity_model']
        }
        
        print(f"Accuracy: {accuracy:.2f}%, Model Size: {size/1e6:.2f} MB, "
              f"Mean Inference Time: {inference_time:.4f}s ± {inference_std:.4f}s")
        print("Sparsity per parameter:")
        for name, sparsity in sparsity_results["sparsity_per_parameter"].items():
            print(f"  {name}: {sparsity:.2f}%")
        
        print(f"Sparsity across all pruned parameters: {sparsity_results['sparsity_pruned_parameters']:.2f}%")
        print(f"Sparsity for the model overall: {sparsity_results['sparsity_model']:.2f}%")
    
    # Save results to CSV
    results_df.to_csv('pruning_results.csv', index=False)
    
    # Display the DataFrame
    print("\nFinal Results DataFrame:")
    print(results_df)
    
    return results_df

In [9]:
df = main()
# Save the DataFrame to a CSV file
df.to_csv('pruning_results.csv', index=False)

Training normal precision model for 2 epochs
Epoch 1, Accuracy: 96.95%
Epoch 2, Accuracy: 97.42%
Iteration 0 - Accuracy: 97.42%, Model Size: 29.40 MB, Mean Inference Time: 0.0013s ± 0.0006s
Pruning iteration 1
Accuracy: 97.40%, Model Size: 19.71 MB, Mean Inference Time: 0.0015s ± 0.0005s
Sparsity per parameter:
  layers.0.weight_mask: 26.41%
  layers.1.weight_mask: 35.57%
  layers.2.weight_mask: 33.42%
Sparsity across all pruned parameters: 33.00%
Sparsity for the model overall: 32.95%
Pruning iteration 2
Accuracy: 97.18%, Model Size: 13.21 MB, Mean Inference Time: 0.0008s ± 0.0004s
Sparsity per parameter:
  layers.0.weight_mask: 45.33%
  layers.1.weight_mask: 58.89%
  layers.2.weight_mask: 59.51%
Sparsity across all pruned parameters: 55.11%
Sparsity for the model overall: 55.03%
Pruning iteration 3
Accuracy: 96.19%, Model Size: 8.86 MB, Mean Inference Time: 0.0009s ± 0.0005s
Sparsity per parameter:
  layers.0.weight_mask: 59.63%
  layers.1.weight_mask: 73.87%
  layers.2.weight_mask: 