#### train_dcnn_analysis
A notebook to analytically evaluate the training process for the D-CNN.

In [None]:
import os

import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

from tqdm.autonotebook import tqdm
from torch import optim
from torch.utils.data import DataLoader

from datasets import PatchNpyDataset, PatchCompare
from d_net import D_CNN

import train_dcnn
import test_dcnn

In [None]:
# TODO pass required training parameters to the train_dcnn.train / train_dcnn.validate automatically
BATCH_SIZE    = 50
EPOCHS        = 15 # Number of epochs to train each model
MODEL_SAMPLES = 30 # Sample size of randomly-initialized models to compare
DATA_ROOT     = '' # Root path where patch data/etc. is found.
VERBOSE = False    # Whether to print detailed output during sampling/training

In [None]:
# Optional transforms that normalizes the image for ResNet.
train_tf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
])

val_tf = transforms.Compose([
])

# Load the material patch datasets
# - The testing set is only used to track its loss over time,
#   it is not used for parameter tuning / etc.
train_set     = PatchNpyDataset(root = os.path.join(DATA_ROOT, 'patch-set', 'npy', 'train'), transform = train_tf)
train_samples = PatchCompare(train_set)
train_loader  = DataLoader(train_samples, batch_size=BATCH_SIZE, shuffle=True)
train_losses  = []
train_accuracies = []
print(f'Training set   : {len(train_samples)} samples')

val_set     = PatchNpyDataset(root = os.path.join(DATA_ROOT, 'patch-set', 'npy', 'val'), transform = val_tf)
val_samples = PatchCompare(val_set)
val_loader  = DataLoader(val_samples, batch_size=BATCH_SIZE, shuffle=True)
val_losses  = []
val_accuracies = []
print(f'Validation set : {len(val_samples)} samples')

test_set     = PatchNpyDataset(root = os.path.join(DATA_ROOT, 'patch-set', 'npy', 'test'), transform = val_tf)
test_samples = PatchCompare(test_set)
test_loader  = DataLoader(test_samples, batch_size=BATCH_SIZE, shuffle=True)
test_losses  = []
test_accuracies = []
print(f'Testing set    : {len(test_samples)} samples')

In [None]:
device_str = 'cuda' if torch.cuda.is_available() else 'cpu'
device     = torch.device(device_str)

In [None]:
def plot_sample_losses(tr_losses, vl_losses, ts_losses, sample_idx=0):
    """Plots the training, validation, and testing losses per epoch 
    for one model sample using Matplotlib.
    
    Parameters:
        tr_losses: np.array
            Total training loss per epoch.
        vl_losses: np.array
            Total validation loss per epoch.
        ts_losses: np.array
            Total testing losses per epoch.
        sample_idx: int (optional)
            The index of the desired sample. The default is 0.
    """
    fig, ax = plt.subplots()
    plt.title('D-CNN losses per epoch')
    
    x = np.arange(0, EPOCHS)
    ax.plot(x, tr_losses[sample_idx,:], '.-', label='Training loss', color='blue')
    ax.plot(x, vl_losses[sample_idx,:], '.-', label='Validation loss', color='orange')
    ax.plot(x, ts_losses[sample_idx,:], '.-', label='Testing loss', color='green')
    ax.set_xlim(0)
    ax.set_ylim(0)

    ax.set_xlabel('Epoch')
    ax.set_ylabel('Total loss')
    ax.legend()
    
    plt.savefig('dcnn-epoch-losses.eps', dpi=192, format='eps')
    plt.savefig('dcnn-epoch-losses.png', dpi=300, format='png')
    plt.show()

In [None]:
def plot_sample_accuracies(tr_accuracies, vl_accuracies, ts_accuracies, sample_idx=0):
    """Plots the training, validation, and testing accuracies per epoch 
    for one model sample using Matplotlib.
    
    Parameters:
        tr_accuracies: np.array
            Total training acccuracies per epoch.
        vl_accuracies: np.array
            Total validation accuracies per epoch.
        ts_accuracies: np.array
            Total testing accuracies per epoch.
        sample_idx: int (optional)
            The index of the desired sample. The default is 0.
    """
    fig, ax = plt.subplots()
    plt.title('D-CNN accuracies per epoch')
    
    x = np.arange(0, EPOCHS)
    ax.plot(x, tr_accuracies[sample_idx,:], '.-', label='Training accuracy', color='blue')
    ax.plot(x, vl_accuracies[sample_idx,:], '.-', label='Validation accuracy', color='orange')
    ax.plot(x, ts_accuracies[sample_idx,:], '.-', label='Testing accuracy', color='green')
    
    ax.set_xlim(xmin=0)
    ax.set_ylim(ymin=0, ymax=100)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy (%)')
    ax.legend()
    
    plt.savefig('dcnn-epoch-accuracies.eps', dpi=192, format='eps')
    plt.savefig('dcnn-epoch-accuracies.png', dpi=300, format='png')
    plt.show()

In [None]:
def plot_distribution_losses(tr_losses, vl_losses, ts_losses):
    """Plots the distributions of training, validation, and testing
    losses per epoch for all model samples using Matplotlib.
    
    Parameters:
        tr_losses: np.array
            Total training loss per epoch.
        vl_losses: np.array
            Total validation loss per epoch.
        ts_losses: np.array
            Total testing losses per epoch.
    """
    fig, ax = plt.subplots()
    plt.title('D-CNN losses per epoch')
    
    # Build median lines
    med_tr_losses = np.median(tr_losses, axis=0)
    med_vl_losses = np.median(vl_losses, axis=0)
    med_ts_losses = np.median(ts_losses, axis=0)
    
    # Build 75th percentile lines
    p75_tr_losses = np.percentile(tr_losses, 75, axis=0)
    p75_vl_losses = np.percentile(vl_losses, 75, axis=0)
    #p75_ts_losses = np.percentile(ts_losses, 75, axis=0)
    
    # Build 25th percentile lines
    p25_tr_losses = np.percentile(tr_losses, 25, axis=0)
    p25_vl_losses = np.percentile(vl_losses, 25, axis=0)
    #p25_ts_losses = np.percentile(ts_losses, 25, axis=0)
    
    x = np.arange(0, EPOCHS)

    # Plot median lines
    ax.plot(x, med_tr_losses, '.-', label='Training loss', color='blue')
    ax.plot(x, med_vl_losses, '.-', label='Validation loss', color='orange')
    #ax.plot(x, med_ts_losses, '.-', label='Testing loss', color='green')
    
    # Plot 25th-75th percentile shaded regions
    ax.fill_between(x, p25_tr_losses, p75_tr_losses, facecolor='blue', alpha=0.2)
    ax.fill_between(x, p25_vl_losses, p75_vl_losses, facecolor='orange',  alpha=0.2)
    #ax.fill_between(x, p25_ts_losses, p75_ts_losses, facecolor='green', alpha=0.2)
    
    ax.set_xlim(0)
    ax.set_ylim(0)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Total loss')
    ax.legend()
    
    plt.savefig('dcnn-epoch-losses-dist.eps', dpi=192, format='eps')
    plt.savefig('dcnn-epoch-losses-dist.png', dpi=192, format='png')
    plt.show()

In [None]:
def plot_distribution_accuracies(tr_accuracies, vl_accuracies, ts_accuracies):
    """Plots the distributions of training, validation, and testing 
    accuracies per epoch for all model samples using Matplotlib.
    
    Parameters:
        tr_accuracies: np.array
            Total training acccuracies per epoch.
        vl_accuracies: np.array
            Total validation accuracies per epoch.
        ts_accuracies: np.array
            Total testing accuracies per epoch.
    """
    fig, ax = plt.subplots()
    plt.title('D-CNN accuracies per epoch')
       
    # Build median lines
    med_tr_accuracies = np.median(tr_accuracies, axis=0)
    med_vl_accuracies = np.median(vl_accuracies, axis=0)
    med_ts_accuracies = np.median(ts_accuracies, axis=0)
    
    # Build 75th %ile lines
    p75_tr_accuracies = np.percentile(tr_accuracies, 75, axis=0)
    p75_vl_accuracies = np.percentile(vl_accuracies, 75, axis=0)
    p75_ts_accuracies = np.percentile(ts_accuracies, 75, axis=0)
    
    # Build 25th %ile lines
    p25_tr_accuracies = np.percentile(tr_accuracies, 25, axis=0)
    p25_vl_accuracies = np.percentile(vl_accuracies, 25, axis=0)
    p25_ts_accuracies = np.percentile(ts_accuracies, 25, axis=0)
    
    x = np.arange(0, EPOCHS)

    # Plot median lines
    ax.plot(x, med_tr_accuracies, '.-', label='Training accuracy', color='blue')
    ax.plot(x, med_vl_accuracies, '.-', label='Validation accuracy', color='orange')
    #ax.plot(x, med_ts_accuracies, '.-', label='Testing loss', color='green')
    
    # Plot 25th-75th percentile shaded regions
    ax.fill_between(x, p25_tr_accuracies, p75_tr_accuracies, facecolor='blue', alpha=0.2)
    ax.fill_between(x, p25_vl_accuracies, p75_vl_accuracies, facecolor='orange', alpha=0.2)
    #ax.fill_between(x, p25_ts_accuracies, p75_ts_accuracies, facecolor='green', alpha=0.2)
    
    ax.set_xlim(xmin=0)
    ax.set_ylim(ymin=0, ymax=100)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy (%)')
    ax.legend()
    
    plt.savefig('dcnn-epoch-accuracies-dist.eps', dpi=192, format='eps')
    plt.savefig('dcnn-epoch-accuracies-dist.png', dpi=192, format='png')
    plt.show()

In [None]:
def test(model, device, test_loader, num_classes, verbose=True):
    """Runs the testing set on the D-CNN.

    Parameters:
        model: D_CNN
            The D-CNN neural network instance being validated.
        device: string
            The device (cpu or cuda) that the model is being run on.
        test_loader: DataLoader
            The testing data.
        num_classes: int
            The number of material classes
            
    Returns:
        total_loss: float
            The total loss accured while evaluating the D-CNN on the validation set.
        accuracy: float
            The ratio of correct similarity predictions out of all predictions.
    """
    model.eval()
    correct   = 0
    incorrect = 0
    total_loss = 0.0
    
    # Used to generate the D matrix for the DCNN.
    Ref_ICs = [] # [# image sets]              List that holds the image class of each ref image viewed
    Sim_Ds  = [] # [# image sets, num_classes] List of arrays of binary simiarlity decisions for each ref image
                 #                             against the comparsion images                                            
    
    for batch_idx, batch in enumerate(tqdm(test_loader, unit=' testing batches')):   
        images     = batch[0].to(device) # Images
        img_labels = batch[1].to(device) # Image labels
        cmp_labels = batch[2].to(device) # Comparison labels
        results = model(images)
        
        # Split all of the batch values into tuples of each of the (n + 1) images.
        results = torch.split(results, 1, dim=1)
        results = [res.squeeze() for res in results]
        
        cmp_labels = torch.split(cmp_labels, 1, dim=1)
        cmp_labels = [lbl.squeeze() for lbl in cmp_labels]
        cmp_labels = [lbl.type(torch.LongTensor).to(device) for lbl in cmp_labels]
        
        img_labels = torch.split(img_labels, 1, dim=1)
        img_labels = [lbl.squeeze() for lbl in img_labels]
        img_labels = [lbl.type(torch.LongTensor) for lbl in img_labels]

        losses = []
        for i in range(1, len(results)): # TODO Check why indexes start at 1.  
            losses.append(F.cross_entropy(results[i], cmp_labels[i]))
            
        loss = sum(losses)
        total_loss += float(loss)
    
        # Count the correct/incorrect similarity decisions for this batch
        for i in range(len(img_labels[0])):         
            for j in range(1, len(cmp_labels)):
                comp_cmp_lbl = cmp_labels[j][i].item()
                pred = torch.argmax(results[j][i]).item()
                is_correct = (pred == comp_cmp_lbl)
                if is_correct:
                    correct += 1
                else:
                    incorrect += 1
            
    accuracy = correct/(correct+incorrect) 
    
    if verbose:
        print(f'Testing set : {correct:5}/{correct+incorrect:5}, {accuracy*100:.2f}%')
        print(f'Testing loss: {total_loss}')
    
    del cmp_labels # Free CUDA memory
    del img_labels
    del results
    
    return total_loss, accuracy

In [None]:
print(f'Labels   : {train_set.get_labels()}')
print(f'Device   : {device_str}')
print(f'Verbose? : {VERBOSE}')

train_losses = np.zeros((MODEL_SAMPLES, EPOCHS))
val_losses   = np.zeros((MODEL_SAMPLES, EPOCHS))
test_losses  = np.zeros((MODEL_SAMPLES, EPOCHS))

train_accuracies = np.zeros((MODEL_SAMPLES, EPOCHS))
val_accuracies   = np.zeros((MODEL_SAMPLES, EPOCHS))
test_accuracies  = np.zeros((MODEL_SAMPLES, EPOCHS))

min_loss = float('inf') # Lowest loss of the D-CNN on the validation set of all epochs (and model samples) so far

for s in range(MODEL_SAMPLES):
    print(f'\n---------------- SAMPLE {s+1}/{MODEL_SAMPLES} ---------------- ')
    
    d_cnn     = D_CNN().to(device) # Create a new model sample
    optimizer = optim.Adam(d_cnn.parameters(), lr=1e-3)
    
    for e in range(EPOCHS):
        print(f'~~~~ EPOCH {e+1}/{EPOCHS} ~~~~')

        train_loss, acc = train_dcnn.train(
            d_cnn, device, train_loader, optimizer, 
            verbose=VERBOSE
        )
        
        train_losses[s, e] = train_loss
        train_accuracies[s, e] = acc * 100.0

        D, val_loss, acc, model_state = train_dcnn.validate(
            d_cnn, device, val_loader, len(val_samples.classes), 
            verbose=VERBOSE
        )
        
        val_losses[s, e] = val_loss
        val_accuracies[s, e] = acc * 100.0

        if s == 0:
            # Only run the testing set of the first sample, to show how extremely close
            # testing and validation loss are.
            test_loss, acc = test(
                d_cnn, device, test_loader, len(test_samples.classes), 
                verbose=VERBOSE
            )
            
            test_losses[s, e] = test_loss
            test_accuracies[s, e] = acc * 100.0

        # If this is the lowest-loss (and therefore generally most accurate)
        # run so far out of ALL model samples, save the D matrix and model weights to disk
        if val_loss < min_loss:
            min_loss = val_loss
            print('New minimum validation loss:', min_loss)
            print('Saving model to dcnn.pt...')
            torch.save(model_state, 'dcnn.pt')
            print('Saving D matrix to d.npy...\n')
            np.save('d.npy', D)
    
    # Free CUDA memory
    del d_cnn
    del optimizer

In [None]:
# After done, plot the training and testing losses for the first sample (idx 0) on MatPlotLib
plot_sample_losses(train_losses, val_losses, test_losses, sample_idx=0)
plot_sample_accuracies(train_accuracies, val_accuracies, test_accuracies, sample_idx=0)

In [None]:
plot_distribution_losses(train_losses, val_losses, test_losses)
plot_distribution_accuracies(train_accuracies, val_accuracies, test_accuracies)

print('Train losses:\n',train_losses)
print('\nVal losses:\n',val_losses)

In [None]:
np.save('dcnn_train_losses.npy', train_losses)
np.save('dcnn_val_losses.npy', val_losses)
np.save('dcnn_test_losses.npy', test_losses)

np.save('dcnn_train_accuracies.npy', train_accuracies)
np.save('dcnn_val_accuracies.npy', val_accuracies)
np.save('dcnn_test_accuracies.npy', test_accuracies)