In [2]:
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
from torch import nn
from torch.nn.utils import prune
import numpy as np
import sys
import matplotlib.pyplot as plt
import math
import random
import sklearn.metrics as perf
import os
import cv2
import time
import torch_pruning as tp
import csv

from models.models import MTLClassifier, AgeRegressor, GenderClassifier, EthnicityClassifier
from utils.data import FacesDataset, data_transform
from utils.training import train_mtl_model, train_age_model, train_gender_model, train_ethnicity_model
from utils.evaluation import run_evaluation, show_example_predictions
from utils.pruning import prune_model, prune_other_tasks, get_f1_and_lat

### Load and Prepare Data

In [3]:
### Load in the data
folder = 'UTKFace'
transform = data_transform()
dataset = FacesDataset(folder=folder, transform=transform)

In [4]:
### Set up train and val datasets and loaders
train_len = int(len(dataset)*0.8)
val_len = len(dataset) - train_len
train_dataset, val_dataset = random_split(dataset, [train_len, val_len], torch.Generator().manual_seed(8))

train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=16, shuffle=False)

### MTL Model Variants

#### Define pruned training function

In [7]:
def pruned_mtl_training(task, prune_pct, num_epochs,
                        train_loader=train_loader, val_loader=val_loader,
                        val_dataset=None):
    
    ### Set up model, loss, and optimizer
    if task.upper()=='MTL':
        tasks = ['age', 'gender', 'ethnicity']
        model = MTLClassifier()
        model = model.cuda()
        age_criterion = nn.MSELoss()
        gender_criterion = nn.CrossEntropyLoss()
        ethnicity_criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())

        # Set up and run model training
        age_coeff = 0.004
        gender_coeff = 2
        ethnicity_coeff = 1
        
        # Train initial model
        print('---------------- Train Initial Model ----------------')
        save_no_prune = f'model_variants/{task.lower()}_p-0_lat-init_f1-init.pth'
        train_mtl_model(num_epochs=num_epochs, model=model, optimizer=optimizer,
                        train_loader=train_loader, val_loader=val_loader,
                        age_criterion=age_criterion, gender_criterion=gender_criterion, ethnicity_criterion=ethnicity_criterion,
                        age_coeff=age_coeff, gender_coeff=gender_coeff, ethnicity_coeff=ethnicity_coeff, save=True,
                        save_name=save_no_prune)
        
        # Do pruning
        model = torch.load(f'models/{save_no_prune}')
        pruned_model = prune_model(model, PRUNING_PERCENT=prune_pct)
        
        # Fine-tune model
        print('-------------- Fine-tuning Pruned Model -------------')
        save_initial = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}_lat-init_f1-init.pth'
        train_mtl_model(num_epochs=num_epochs, model=pruned_model, optimizer=optimizer,
                        train_loader=train_loader, val_loader=val_loader,
                        age_criterion=age_criterion, gender_criterion=gender_criterion, ethnicity_criterion=ethnicity_criterion,
                        age_coeff=age_coeff, gender_coeff=gender_coeff, ethnicity_coeff=ethnicity_coeff, save=True,
                        save_name=save_initial)
        
        # Test latency and accuracy (F1) and save model variant (and update lookup file)
        [scores, [mean_lat, std_lat]] = get_f1_and_lat(model_path=save_initial,
                                                   eval_dataset=val_dataset,
                                                   eval_dataloader=val_loader,
                                                   tasks=tasks,
                                                   mtl_model=True)
        
        # Save model with score and latency information in the model name
        ager2 = scores['age'][1]
        genderf1 = scores['gender'][0]
        ethnicityf1 = scores['ethnicity'][0]
        row = [task.upper(), prune_pct, mean_lat, ager2, genderf1, ethnicityf1]
        with open('models/model_variants/model_score_lookup.tsv', 'a', newline='') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerow(row)
            
        new_name = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}.pth'
        torch.save(model, f"models/{new_name}")
        
        
    elif task.upper()=='GENDER':
        tasks = ['gender']
        model = GenderClassifier()
        model = model.cuda()
        gender_criterion = nn.CrossEntropyLoss()

        # Set up and run model training
        # Train initial model
        print('---------------- Train Initial Model ----------------')
        save_no_prune = f'model_variants/{task.lower()}_p-0_lat-init_f1-init.pth'
        train_gender_model(num_epochs=num_epochs, model=model, optimizer=optimizer,
                           train_loader=train_loader, val_loader=val_loader,
                           gender_criterion=gender_criterion, gender_coeff=1.0,
                           save=True, save_name=save_no_prune)
        
        # Do pruning
        model = torch.load(f'models/{save_no_prune}')
        pruned_model = prune_other_tasks(model, task1='age', task2='ethnicity', PRUNING_PERCENT=prune_pct)
        
        # Fine-tune model
        print('-------------- Fine-tuning Pruned Model -------------')
        save_initial = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}_lat-init_f1-init.pth'
        train_gender_model(num_epochs=num_epochs, model=pruned_model, optimizer=optimizer,
                           train_loader=train_loader, val_loader=val_loader,
                           gender_criterion=gender_criterion, gender_coeff=1.0,
                           save=True, save_name=save_initial)
        
        # Test latency and accuracy (F1) and save model variant (and update lookup file)
        [scores, [mean_lat, std_lat]] = get_f1_and_lat(model_path=save_initial,
                                                   eval_dataset=val_dataset,
                                                   eval_dataloader=val_loader,
                                                   tasks=tasks,
                                                   mtl_model=True)
        
        # Save model with score and latency information in the model name
        genderf1 = scores['gender'][0]
        row = [task.upper(), prune_pct, mean_lat, 0.0, genderf1, 0.0]
        with open('models/model_variants/model_score_lookup.tsv', 'a', newline='') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerow(row)
            
        new_name = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}.pth'
        torch.save(model, f"models/{new_name}")
        
    elif task.upper()=='ethnicity':
        tasks = ['ethnicity']
        model = EthnicityClassifier()
        model = model.cuda()
        ethnicity_criterion = nn.CrossEntropyLoss()

        # Set up and run model training
        # Train initial model
        print('---------------- Train Initial Model ----------------')
        save_no_prune = f'model_variants/{task.lower()}_p-0_lat-init_f1-init.pth'
        train_ethnicity_model(num_epochs=num_epochs, model=model, optimizer=optimizer,
                              train_loader=train_loader, val_loader=val_loader,
                              ethnicity_criterion=ethnicity_criterion, ethnicity_coeff=1.0,
                              save=True, save_name=save_no_prune)
        
        # Do pruning
        model = torch.load(f'models/{save_no_prune}')
        pruned_model = prune_other_tasks(model, task1='age', task2='gender', PRUNING_PERCENT=prune_pct)
        
        # Fine-tune model
        print('-------------- Fine-tuning Pruned Model -------------')
        save_initial = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}_lat-init_f1-init.pth'
        train_ethnicity_model(num_epochs=num_epochs, model=pruned_model, optimizer=optimizer,
                              train_loader=train_loader, val_loader=val_loader,
                              ethnicity_criterion=ethnicity_criterion, ethnicity_coeff=1.0,
                              save=True, save_name=save_initial)
        
        # Test latency and accuracy (F1) and save model variant (and update lookup file)
        [scores, [mean_lat, std_lat]] = get_f1_and_lat(model_path=save_initial,
                                                       eval_dataset=val_dataset,
                                                       eval_dataloader=val_loader,
                                                       tasks=tasks,
                                                       mtl_model=True)
        
        # Save model with score and latency information in the model name
        ethnicityf1 = scores['ethnicity'][0]
        row = [task.upper(), prune_pct, mean_lat, 0.0, 0.0, ethnicityf1]
        with open('models/model_variants/model_score_lookup.tsv', 'a', newline='') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerow(row)
            
        new_name = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}.pth'
        torch.save(model, f"models/{new_name}")
        
    elif task.upper()=='AGE':
        tasks = ['age']
        model = AgeRegressor()
        model = model.cuda()
        age_criterion = nn.MSE()

        # Set up and run model training
        # Train initial model
        print('---------------- Train Initial Model ----------------')
        save_no_prune = f'model_variants/{task.lower()}_p-0_lat-init_f1-init.pth'
        train_age_model(num_epochs=num_epochs, model=model, optimizer=optimizer,
                              train_loader=train_loader, val_loader=val_loader,
                              age_criterion=age_criterion, age_coeff=1.0,
                              save=True, save_name=save_no_prune)
        
        # Do pruning
        model = torch.load(f'models/{save_no_prune}')
        pruned_model = prune_other_tasks(model, task1='gender', task2='ethnicity', PRUNING_PERCENT=prune_pct)
        
        # Fine-tune model
        print('-------------- Fine-tuning Pruned Model -------------')
        save_initial = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}_lat-init_f1-init.pth'
        train_age_model(num_epochs=num_epochs, model=pruned_model, optimizer=optimizer,
                              train_loader=train_loader, val_loader=val_loader,
                              age_criterion=age_criterion, age_coeff=1.0,
                              save=True, save_name=save_initial)
        
        # Test latency and accuracy (F1) and save model variant (and update lookup file)
        [scores, [mean_lat, std_lat]] = get_f1_and_lat(model_path=save_initial,
                                                       eval_dataset=val_dataset,
                                                       eval_dataloader=val_loader,
                                                       tasks=tasks,
                                                       mtl_model=True)
        
        # Save model with score and latency information in the model name
        ager2 = scores['age'][1]
        row = [task.upper(), prune_pct, mean_lat, ager2, 0.0, 0.0]
        with open('models/model_variants/model_score_lookup.tsv', 'a', newline='') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerow(row)
            
        new_name = f'model_variants/{task.lower()}_p-{int(prune_pct*100)}.pth'
        torch.save(model, f"models/{new_name}")
        
    else:
        print('Invalid task was specified.')
        
    return scores, mean_lat, std_lat
        


#### 0% to 90% Pruning

In [8]:
task = 'MTL'
num_epochs = 1
with open('models/model_variants/model_score_lookup.tsv', 'w') as f:
    writer = csv.writer(f, delimiter='\t')
    writer.writerow(['Task', 'prune_pct', 'mean_latency', 'age_r2', 'gender_f1', 'ethnicity_f1'])
            
for i in range(2):
    print(f'--------------------------------- Prune {i*10} % ---------------------------------')
    prune_pct = 1.0*i / 10
    scores, mean_lat, std_lat = pruned_mtl_training(task, prune_pct, num_epochs, train_loader, val_loader, val_dataset)
    print()

--------------------------------- Prune 0 % ---------------------------------
---------------- Train Initial Model ----------------
Epoch 0, val loss: inf -> 0.11199, train loss: 0.16045
Epoch 0, age val loss: 0.02628, gender val loss: 0.03684, ethnicity val loss: 0.04888
-------------- Fine-tuning Pruned Model -------------
Epoch 0, val loss: inf -> 0.10169, train loss: 0.11915
Epoch 0, age val loss: 0.02409, gender val loss: 0.03013, ethnicity val loss: 0.04747

--------------------------------- Prune 10 % ---------------------------------
---------------- Train Initial Model ----------------
Epoch 0, val loss: inf -> 0.11635, train loss: 0.16354
Epoch 0, age val loss: 0.03150, gender val loss: 0.03175, ethnicity val loss: 0.05310
-------------- Fine-tuning Pruned Model -------------
Epoch 0, val loss: inf -> 0.18380, train loss: 0.19348
Epoch 0, age val loss: 0.09112, gender val loss: 0.04174, ethnicity val loss: 0.05093



In [26]:
test_model = MTLClassifier()
#test_model = test_model.cuda()
test_model.eval()

start = time.time()
for i, sample in enumerate(val_dataset):
    image = sample[0].unsqueeze(0)#.cuda()
    test_model(image)
    if i >= 100:
        break
time_taken = time.time() - start
print(time_taken)

2.532001256942749


In [27]:
test_model = MTLClassifier()
#test_model = test_model.cuda()
test_model.eval()
pruned_model = prune_model(test_model, 0.90)

start = time.time()
for i, sample in enumerate(val_dataset):
    image = sample[0].unsqueeze(0)#.cuda()
    pruned_model(image)
    if i >= 100:
        break
time_taken = time.time() - start
print(time_taken)

0.26000332832336426
