# Note on training:

Rather than data being the image and target being the correct value, the data will be three images and the target will be which image is more similar to the first image.

Create a custom dataset with PyTorch using the 3 images as the data.

In [4]:
# installs
# !pip install wandb

# Library imports
import pandas as pd
import numpy as np
import wandb
import os
import torch

#local imports
import utilities.CelebAUtils as utils
import utilities.CelebAData as data
import utilities.CelebASettings as settings

##### Change these accordingly

In [5]:
#Logging in W&B?
logging = True
wandb_project_name = 'qualitative-analysis'
wandb_entity_name = 'witw'


# Parent and Data Directory
working_directory = '/home/CELEBA'
data_dir = 'data/celeba'

os.chdir('/home/CELEBA')
os.listdir()

['code',
 'docker',
 '.ipynb_checkpoints',
 'wandb',
 'vae.pt',
 'data',
 'models',
 'README.md']

In [6]:
def metacriteria_attractive(A_attr: dict, B_attr: dict, C_attr: dict):
    A_attractive = A_attr['Attractive']
    B_attractive = B_attr['Attractive']
    C_attractive = C_attr['Attractive']

    if A_attractive == B_attractive:
        if A_attractive == C_attractive:
            return 0.5
        else:
            return 1
    else:
        if A_attractive == C_attractive:
            return 0
        else: 
            return 0.5

# Settings

In [7]:
# Imported Settings - Change these in /utilities/settings
gpu_num = settings.pt_gpu_num
img_shape = settings.img_shape
img_channels_shape = settings.img_channels_shape
num_channels = settings.num_channels
num_res = settings.num_res
num_kernels = settings.num_kernels
num_hidden = settings.num_hidden
encoder_path = settings.encoder
min_size = settings.min_size
max_size = settings.max_size
lr = settings.pt_lr
run_name = settings.run_name

# data settings:
n_test = 5000
batch_size=64 #input batch size for training (default: 64)
batch_size_test=1000 


# model settings
epochs=40 #number of epochs to train (default: 14)
gamma=0.7 #Learning rate step gamma (default: 0.7)
seed=42 #random seed (default: 42)
save_model=False #save the trained model (default: False)

# misc settings
no_cuda=False #disables CUDA training (default: True)
use_cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device(gpu_num)
kwargs = {'num_workers': 10, 'pin_memory': True}

print("Device:", device)

Device: cuda:3


# TRAINING LOOP

In [8]:
metacriteria_list = [metacriteria_attractive]                    
trainingSize = np.logspace(np.log10(min_size), np.log10(max_size), 15).astype(np.int)

for metacriteria in metacriteria_list:
    
    # make pandas data frame
    columnTitles = ['Datasize', 'Test Loss', 'AUROC Filtered','AUROC All','Accuracy Filtered','Accuracy All','Epoch']
    df = pd.DataFrame(columns = columnTitles)
    
    if logging:
        print("--Run is being logged in W&B--")
        run = wandb.init(project=wandb_project_name, entity=wandb_entity_name, config = {
            "metacriteria": metacriteria.__name__, "pretrained": True}, reinit = True)
        wandb.run.name = run_name + "_pretrained " + metacriteria.__name__ 
    else:
        print("--Run is NOT being logged in W&B--")

    for n_train in trainingSize:
        print("TRAINING SIZE: %i", n_train)

        # get data
        train_loader,test_loader = data.make_data(batch_size, batch_size_test, kwargs, n_train, n_test, metacriteria, data_dir)

        # make model
        model = utils.make_model(device, num_hidden, img_channels_shape, num_channels, num_res, num_kernels, pretrained=True, encoder_path=encoder_path)

        #test model
        best_test, best_epoch, best_accuracy_all, best_accuracy_filtered, best_auroc_filtered, best_auroc_all = utils.run_model(model,device,train_loader,test_loader,epochs,lr,gamma)
        
        if logging:
            wandb.log({'Datasize': n_train,
                       'Test_Loss': best_test, 
                       'AUROC_filtered': best_auroc_filtered, 
                       'AUROC_all': best_auroc_all,
                       'Accuracy_filtered': best_accuracy_filtered, 
                       'Accuracy_all': best_accuracy_all, 
                       'Epoch': best_epoch})
            
        print("Best Test Loss: %0.3f. Best AUROC Filtered: %0.3f. Best AUROC All: %0.3f. Best Accuracy Filtered: %0.3f. Best Accuracy All: %0.3f. Best Epoch: %i\n" % 
              (best_test, best_auroc_filtered, best_auroc_all, best_accuracy_filtered, best_accuracy_all, best_epoch))
        
        df.loc[len(df.index)] = [n_train,best_test,best_auroc_filtered,best_auroc_all,best_accuracy_filtered,best_accuracy_all,best_epoch]
        display(df)
        
        if save_model:
            torch.save(model.state_dict(), "mnist_flat.pt")
            
    if logging:
        run.finish()
        



--Run is being logged in W&B--


[34m[1mwandb[0m: Currently logged in as: [33mwitw[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.31 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


TRAINING SIZE: %i 1000
Train Loss: 0.694. Test Loss: 0.693. AUROC_Filtered: 0.494. AUROC_All: 0.502. Accuracy Filtered: 0.499. Accuracy All: 0.499. Epoch: 1


KeyboardInterrupt: 