# Note on training:

Rather than data being the image and target being the correct value, the data will be three images and the target will be which image is more similar to the first image.

Create a custom dataset with PyTorch using the 3 images as the data.

In [1]:
# installs
!pip install wandb

# Library imports
import numpy as np
import pandas as pd
import argparse
import wandb
import os
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, utils
from torch.optim.lr_scheduler import StepLR
from torchviz import make_dot

import matplotlib.pyplot as plt
from PIL import Image
import math
%matplotlib inline

from skimage import io, transform
from sklearn.metrics import roc_auc_score




# Meta-criteria

In [2]:
def metacriteria_attractive(A_attr: dict, B_attr: dict, C_attr: dict):
  A_attractive = A_attr['Attractive']
  B_attractive = B_attr['Attractive']
  C_attractive = C_attr['Attractive']

  if A_attractive == B_attractive:
    if A_attractive == C_attractive:
      return 0.5
    return 1
  if A_attractive == C_attractive:
    return 0
  return 0.5

def metacriteria_similar_hair(a, b, c):
  return 0

def metacriteria_similar_facial_hair(a, b, c):
  if(a == b and a != c):
    return 1
  if(a == b and a == c):
    return 0.5
  return 0

# Settings

In [67]:
# data settings:

n_test = 5000
# metacriteria = metacriteria_sameNumber
batch_size=64 #input batch size for training (default: 64)
batch_size_test=1000 


# model settings
epochs=40 #number of epochs to train (default: 14)
lr=3 #learning rate (default: 1.0)
gamma=0.7 #Learning rate step gamma (default: 0.7)
seed=42 #random seed (default: 42)
save_model=False #save the trained model (default: False)

# misc settings
no_cuda=False #disables CUDA training (default: True)
use_cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device(3)
kwargs = {'num_workers': 10, 'pin_memory': True}

print("Device:", device)

Device: cuda:3


### Data Directories

In [93]:
# Danny's Working Directory
data_dir = 'data/celeba'
encoder_path = 'models/celeba_encoder_64.pt'

# Danny's CelebA Paths
os.chdir('/home')
os.listdir()

['.git',
 'code',
 'docker',
 '.ipynb_checkpoints',
 'wandb',
 'vae.pt',
 '.gitignore',
 'data',
 'models',
 'README.md']

#CelebA Dataset

### Get Dataset

### Transforms

In [94]:
img_shape = (64, 64)
img_channels_shape = (1,64,64)

# transforms
tfms_train = transforms.Compose([
    transforms.Grayscale(),
    transforms.RandomHorizontalFlip(),
    transforms.Resize(img_shape),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

tfms_val = tfms_test = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize(img_shape),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

###Dataset Class

In [95]:
class CelebA(Dataset):
  def __init__(self, root_dir: str, partition: str, transform=None):
    # the image data
    self.img_dir = f"{root_dir}/img_align_celeba"

    # the partition data
    if partition=='train':
      self.partition_ix = 0
    elif partition=='val':
      self.partition_ix = 1
    elif partition=='test':
      self.partition_ix = 2
    else:
      raise ValueError("partition must be one of 'train', 'val', or 'test'")

    df_partitions = pd.read_csv(f"{root_dir}/list_eval_partition.csv")
    self.df_partitions = df_partitions[df_partitions['partition']==self.partition_ix]

    # the attribute data
    df_attributes = pd.read_csv(f"{root_dir}/list_attr_celeba.csv")

    # filter the attributes
    self.df_attributes = self.df_partitions.merge(df_attributes, how='left', on='image_id').drop(columns=['partition'])

    # transforms
    self.transform = transform

  def __len__(self):
    return self.df_attributes.shape[0]

  def __getitem__(self, ix):
    # image
    filepath = f"{self.img_dir}/{self.df_attributes.image_id[ix]}"
    image = Image.open(filepath)
    if self.transform:
      image = self.transform(image)

    # attributes
    attributes =self.df_attributes.iloc[ix, 1:].to_dict()

    return {'image': image, 'attributes': attributes}


#Triplet dataset class
Where the inputs are:
*   List of triple indices to use for training
*   Original training set
*   Function for evaluating meta-criteria

And the output is:
*   ((A: image,B: image,C: image), target: bool)



### Defining the class

In [96]:
class ImageTripletDataset(Dataset):
    """Dataset of triplets of images"""

    def __init__(self, ImageDataset, num_triplets, criteria):
        """
        Args:
            ImageDataset (torch.utils.data.Dataset):  A pytorch dataset that serves individual images
            num_triplets (int): Number of triplets
        """
        self.imagedataset=ImageDataset
        self.num_triplets = num_triplets
        self.criteria = criteria

        # generate indices list
        self.indices = np.random.randint(0, len(self.imagedataset), (self.num_triplets, 3))
        # TODO: make sure there are no duplicates?

    def __len__(self):
        return self.num_triplets

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        Aix, Bix, Cix = self.indices[idx]

        A = self.imagedataset[Aix]
        B = self.imagedataset[Bix]
        C = self.imagedataset[Cix]

        A_img, A_attr = A['image'], A['attributes']
        B_img, B_attr = B['image'], B['attributes']
        C_img, C_attr = C['image'], C['attributes']

        sample = {'A': A_img, 'B': B_img, 'C':C_img, 
                  'target': self.criteria(A_attr,B_attr,C_attr),
                  'image_indices': (Aix, Bix, Cix), 'image_digits': (A_attr, B_attr, C_attr)}

        return sample

### Creating datasets/dataloaders for training and testing

# VAE Model

### Encoder Helper Functions

In [97]:
class ResNetBlock(nn.Module):
    r"""Resnet style block for predicting color shift masks on input images. 
    
    Args:
        num_in (int) - number of input channels (and output channels)
        num_features (int) - number of intermediate channels in resnet block
    """

    def __init__(self, num_in, num_mid, kernel_size=5):

        super(ResNetBlock, self).__init__()
        
        self.res = nn.Sequential(OrderedDict([
            # conv block 1
            ('conv0', nn.Conv2d(num_in, num_mid, kernel_size, stride=1,
                                padding=(kernel_size-1)//2, bias=False)),
            ('norm0', nn.BatchNorm2d(num_mid)),
            ('relu0', nn.ReLU(inplace=True)),
            # conv block 2
            ('conv1', nn.Conv2d(num_mid, num_in, kernel_size, stride=1,
                                padding=(kernel_size-1)//2, bias=False)),
            ('norm1', nn.BatchNorm2d(num_in)),

        ]))
        
        self.relu1 = nn.ReLU(inplace=True)

    def forward(self, x):
        # resnet style output: add input to features at relu
        return self.relu1(x + self.res(x))
    
class EncoderBlock(nn.Module):
    def __init__(self, n_in, n_mid, n_out, kernel_size=5):
        super(EncoderBlock, self).__init__()
        
        self.block = nn.Sequential(
            ResNetBlock(n_in, n_mid, kernel_size),
            nn.Conv2d(n_in, n_out, kernel_size=kernel_size, padding=(kernel_size-1)//2, stride=2),
            nn.BatchNorm2d(n_out),
            nn.ReLU(inplace=True)
        )
        
    def forward(self, x):
        return self.block(x)
        
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

### Encoder 
VAE Encoder copied from a separate VAE Training Notebook

In [98]:
class Encoder(nn.Module):
    def __init__(self, input_shape, n_mid=(96,64,32,32), n_res=64, kernel_size=5):
        super(Encoder, self).__init__()
        
        self.encoder = nn.Sequential(
            # start
            nn.Conv2d(input_shape[0], n_mid[0], kernel_size=7, padding=3, stride=2),
            #nn.MaxPool2d(kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(n_mid[0]),
            nn.ReLU(),
            # encoding blocks
            EncoderBlock(n_mid[0], n_res, n_mid[1], kernel_size),
            EncoderBlock(n_mid[1], n_res, n_mid[2], kernel_size),
            EncoderBlock(n_mid[2], n_res, n_mid[3], kernel_size),
            Flatten()
        )
        
    def forward(self, x):
        return self.encoder(x)

# Triplet model

### Triplet probability class

In [193]:
# From Van der Maaten: https://ieeexplore.ieee.org/abstract/document/6349720/?casa_token=_dWfybjO5O4AAAAA:T4Al2g3ZWaHcRwCzlp9QqRji1amJ-uCHwbEyAOHk3_AH9uIGjs4iBFaUf8XO-wqLpd7D2BH1eQ
# this has 0 free parameters
class TripletProbability(nn.Module):
  def __init__(self, alpha):
    super(TripletProbability, self).__init__()
    self.alpha=alpha

  def t_dist(self, d):
    return (1+d**2/self.alpha)**(-1*(self.alpha+1)/2)
  
  def forward(self, dAB, dAC):
    tAB = self.t_dist(dAB)
    tAC = self.t_dist(dAC)
    return tAB / (tAB + tAC)

### Pairwise-distance neural network module

In [203]:
class PairwiseDistance(nn.Module):
  def __init__(self, n_hid):
    super(PairwiseDistance, self).__init__()
    self.n_hid = n_hid

    self.f = nn.Sequential(
        nn.Linear(2*self.n_hid, self.n_hid),
        nn.ReLU(),
        nn.Linear(self.n_hid,1)
    )
  
  def forward(self, A, B):
    # A: [batch_size, n_hid]
    # B: [batch_size, n_hid]
    ### Size of A and B are 64 each
    print("A Size: ", A.shape)
    AB = torch.cat([A, B], dim=0) # I changed this from 1 to 0 in order to make the mat dims match
    print("AB Size: ", AB.shape)
    ### Size of AB is 128
    squeezed = self.f(AB).squeeze()
    print("Squeezed AB Size: ", squeezed.shape)
    return squeezed

### TripletNet 
A neural network for modeling whether or not a triplet of images, (A, B, C), 
satisfies the proposition "A is more similar to B than C" according to a meta-
criteria. 

In [212]:
class TripletNet(nn.Module):
    def __init__(self, n_hid=10, alpha=1):
        super(TripletNet, self).__init__()

        self.n_hid=n_hid
        self.alpha=alpha

        # feature encoder
        # resuse for each input image
        self.encoder = Encoder(img_channels_shape, kernel_size=5)
        
        # distance computer: takes two samples and computes a distance
        # reuse this for pairs (A, B) and (A, C)
        # can make this more complex or more simple in future
        self.pairwise_distance = PairwiseDistance(n_hid=self.n_hid)

        # triplet probability computer defined in the class above
        self.triplet_probability = TripletProbability(self.alpha)
        
    def forward(self, A, B, C):
      # first compute all of the encodings
        print("A shape pre encoder: " , A.shape)
        A, B, C = [self.encoder(x) for x in (A,B,C)]
        print("A shape post encoder: ", A.shape)
        
      # then get the pairwise distances
        ### Size of A B C are 64 each - correct
        dAB = self.pairwise_distance(A, B)
        dAC = self.pairwise_distance(A, C)
        ### Size of dAB and dAC are 128 each - incorrect
        
      # finally return the triplet probability
        return self.triplet_probability(dAB, dAC)

# Training and Testing

In [213]:
#@title Training and testing functions
def train(model, criteria, device, loader, optimizer):
  model.train()

  mean_batch_losses = []
  for batch_idx, batch_dict in enumerate(loader):
    A, B, C, target = [batch_dict[key].to(device) for key in ["A", "B", "C", "target"]]
    optimizer.zero_grad()
    output = model(A,B,C)
    ### output is computing 128 probabilities instead of 64
    loss = criteria(output.float(), target.float())
    loss.backward()
    optimizer.step()
    mean_batch_losses.append(loss.item())
        
  return np.mean(mean_batch_losses)

            
def test(model, criteria, device, loader):
    model.eval()

    mean_batch_losses = []
    outputs = []
    targets = []

    with torch.no_grad():
        for batch_idx, batch_dict in enumerate(loader):
          A, B, C, target = [batch_dict[key].to(device) for key in ["A", "B", "C", "target"]]
          output = model(A, B, C)
          loss = criteria(output.float(), target.float()) 

          # store results
          mean_batch_losses.append(loss.item())
          outputs.append(output)
          targets.append(target)

    outputs = torch.cat(outputs)
    targets = torch.cat(targets)


    
    return np.mean(mean_batch_losses), outputs, targets

# TRAINING LOOP

### Loop Helper Functions

In [214]:
def makeData(n_train, metacriteria):
  #training data
 # image_dataset_train = dataPath
    image_dataset_train = CelebA(data_dir, "train", transform = tfms_train)
    triplet_dataset_train = ImageTripletDataset(image_dataset_train, n_train, metacriteria)
    train_loader = torch.utils.data.DataLoader(
      triplet_dataset_train,
      batch_size=batch_size, shuffle=True, **kwargs)

    # testing data
    image_dataset_test = CelebA(data_dir, "test", transform = tfms_test)
    triplet_dataset_test = ImageTripletDataset(image_dataset_test, n_test, metacriteria)
    test_loader = torch.utils.data.DataLoader(
      triplet_dataset_test,
      batch_size=batch_size_test, shuffle=True, **kwargs)

    return train_loader,test_loader



def makeModel():
  model = TripletNet(n_hid=256).to(device)
  encoder_state_dict = torch.load(encoder_path)
  model.encoder.load_state_dict(encoder_state_dict)
  
  return model



def getLowestError(model, train_loader, test_loader):
  optimizer = optim.Adadelta(model.parameters(), lr=lr)
  scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
  criteria = nn.BCELoss()

  best_test = 10**10
  best_train = 10**10
  best_auroc_filtered = 0.5
  best_auroc_all = 0.5
  best_accuracy_all = 0.5
  best_accuracy_filtered = 0.5
  best_epoch = 0
  for epoch in range(1, epochs + 1):
    train_loss = train(model, criteria, device, train_loader, optimizer)
    test_loss, outputs, targets = test(model, criteria, device, test_loader)
    scheduler.step()

    auroc_filtered = getAUROC_filtered(outputs, targets)
    auroc_all = getAUROC_all(outputs, targets)
    accuracy_filtered = getAccuracy_filtered(outputs, targets)
    accuracy_all = getAccuracy_all(outputs,targets)
    
    print("Train Loss: %0.3f. Test Loss: %0.3f. AUROC_Filtered: %0.3f. AUROC_All: %0.3f. Accuracy Filtered: %0.3f. Accuracy All: %0.3f. Epoch: %i" % (train_loss, test_loss, auroc_filtered, auroc_all, accuracy_filtered, accuracy_all, epoch))
    if test_loss<best_test:
      best_test = test_loss
      best_train = train_loss.item()
      best_auroc_filtered = auroc_filtered
      best_auroc_all = auroc_all
      best_accuracy_filtered = accuracy_filtered
      best_accuracy_all = accuracy_all
      best_epoch = epoch
    if epoch > best_epoch + 5:
      break

  #return best_test, best_auroc, best_epoch
  return best_test, best_epoch, best_accuracy_all, best_accuracy_filtered, best_auroc_filtered, best_auroc_all

def getAUROC_filtered(outputs, targets):
  ix_keep = targets!=0.5
  filteredOutputs = outputs[ix_keep]
  filteredTargets = targets[ix_keep]
  return roc_auc_score(filteredTargets.cpu().numpy(), filteredOutputs.cpu().numpy())

def getAUROC_all(outputs, targets):
  predictions = outputs.cpu().numpy()
  targets = targets.cpu().numpy()
  binned_predictions = []
  binned_targets = []

  bins = [0, 1/2, 1]
  bin_indices = np.digitize(predictions, bins)
  for index in bin_indices:
    if index == 1:
      binned_predictions.append(0)
    elif index == 2:
      binned_predictions.append(1)

  bin_indices = np.digitize(targets, bins)
  for target in targets:
    if target == 0:
      binned_targets.append(0)
    elif target == 0.5:
      binned_targets.append(1)
    elif target == 1:
      binned_targets.append(1)
  
  return roc_auc_score(binned_targets, binned_predictions)

def getAccuracy_filtered(outputs, targets):
  ix_keep = targets!=0.5
  filteredOutputs = outputs[ix_keep]
  filteredTargets = targets[ix_keep]
  filteredOutputs = filteredOutputs.cpu().numpy()
  filteredTargets = filteredTargets.cpu().numpy()

  correct = 0;
  results = np.equal(filteredOutputs, filteredTargets)
  for result in results:
    if result:
      correct += 1
  
  return (correct / filteredTargets.size)


def getAccuracy_all(predictions, targets):
  predictions = predictions.cpu().numpy()
  targets = targets.cpu().numpy()
  new_predictions = []
  correct = 0

  bins = [0, 1/3, 2/3, 1]
  bin_indices = np.digitize(predictions, bins)
  for index in bin_indices:
    if index == 1:
      new_predictions.append(0)
    elif index == 2:
      new_predictions.append(0.5)
    elif index == 3:
      new_predictions.append(1)
  
  results = np.equal(new_predictions,targets)
  for result in results:
    if result:
      correct += 1
  
  return (correct / predictions.size)

### The Loop

In [215]:
metacriteria_list = [metacriteria_attractive]                    
trainingSize = np.logspace(2, 3.69897000434, 15).astype(np.int)


for metacriteria in metacriteria_list:

  run = wandb.init(project='qualitative-analysis', entity='witw', config = {
        "metacriteria": metacriteria.__name__, "pretrained": True}, reinit = True)
  wandb.run.name = "CELEBA6-pretrained" + metacriteria.__name__ 

  for n_train in trainingSize:
    
    print("TRAINING SIZE: %i", n_train)
    train_loader,test_loader = makeData(n_train, metacriteria)
    
    # make model
    model = makeModel()
    
    #test model
    best_test, best_epoch, best_accuracy_all, best_accuracy_filtered, best_auroc_filtered, best_auroc_all = getLowestError(model,train_loader,test_loader)
    wandb.log({'Datasize': n_train,
               'Test_Loss': best_test, 
               'AUROC_filtered': best_auroc_filtered, 
               'Accuracy_filtered': best_accuracy_filtered, 
               'Accuracy_all': best_accuracy_all, 
               'Epoch': best_epoch})
    
    print("Datasize: %i", n_train)    
    print("Best Test Loss: %0.3f. Best AUROC Filtered: %0.3f. Best AUROC All: %0.3f. Best Accuracy Filtered: %0.3f. Best Accuracy All: %0.3f. Best Epoch: %i\n" % (best_test, best_auroc_filtered, best_auroc_all, best_accuracy_filtered, best_accuracy_all, best_epoch))


    if save_model:
        torch.save(model.state_dict(), "mnist_flat.pt")

  run.finish()




[34m[1mwandb[0m: wandb version 0.10.30 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


TRAINING SIZE: %i 100
A shape pre encoder:  torch.Size([64, 1, 64, 64])
A shape post encoder:  torch.Size([64, 512])
A Size:  torch.Size([64, 512])
AB Size:  torch.Size([128, 512])
Squeezed AB Size:  torch.Size([128])
A Size:  torch.Size([64, 512])
AB Size:  torch.Size([128, 512])
Squeezed AB Size:  torch.Size([128])


ValueError: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([128])) is deprecated. Please ensure they have the same size.

In [None]:
df

# Visualize the embeddings

In [None]:
def get_encodings(model, device, loader):
    model.eval()

    outputs = []
    targets = []
    with torch.no_grad():
        for batch_idx, (img, target) in enumerate(loader):
          output = model(img.to(device))
          outputs.append(output)
          targets.append(target)

    outputs = torch.cat(outputs)
    targets = torch.cat(targets)

    
    return outputs, targets

In [None]:
encoder = model.encoder
image_loader_test = torch.utils.data.DataLoader(
    image_dataset_test,
    batch_size=batch_size_test, shuffle=True, **kwargs)

outputs, targets = get_encodings(encoder, device, image_loader_test)

NameError: ignored

In [None]:
test_embeddings = outputs.cpu().numpy()
targets = targets.cpu().numpy()

In [None]:
reducer = umap.UMAP()
embedding = reducer.fit_transform(test_embeddings)
embedding.shape

In [None]:
plt.figure(figsize=(15,15))
sns.scatterplot(x=embedding[:,0], y=embedding[:,1], hue=targets.astype(str), hue_order=[str(i) for i in range(10)])
plt.show()

In [None]:
pd.concat([pd.DataFrame(embedding), pd.DataFrame(targets)], axis = 1).to_csv(dataPath + "least_common_multiple_UMAP.csv")

In [None]:
def experiment_criteria(metacriteria, epochs=3, dataset_train=False, dataset_test=False, n_train=50000, n_test=5000, batch_size=64,batch_size_test=1000, UMAP=True):
  #settings
  lr=1
  gamma=0.7
  seed=42
  no_cuda=False
  use_cuda = not no_cuda and torch.cuda.is_available()
  torch.manual_seed(seed)
  device = torch.device("cuda" if use_cuda else "cpu")
  kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

  if not dataset_train and dataset_test:
    #download training data if not included
    dataset_train = datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
    # download testing data if not included
    dataset_test = datasets.MNIST('../data', train=False, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
  #train data
  triplet_dataset_train = ImageTripletDataset(dataset_train, n_train, metacriteria)
  train_loader = torch.utils.data.DataLoader(
    triplet_dataset_train,
    batch_size=batch_size, shuffle=True, **kwargs)
  
  #test data
  triplet_dataset_test = ImageTripletDataset(dataset_test, n_test, metacriteria)
  test_loader = torch.utils.data.DataLoader(
    triplet_dataset_test,
    batch_size=batch_size_test, shuffle=True, **kwargs)
  
  model = TripletNet().to(device)

  optimizer = optim.Adadelta(model.parameters(), lr=lr)
  scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
  criteria = nn.BCELoss()

  for epoch in range(1, epochs + 1):
    train_loss = train(model, criteria, device, train_loader, optimizer)
    test_loss, outputs, targets = test(model, criteria, device, test_loader)
    scheduler.step()

    roc_auc = roc_auc_score(targets.cpu().numpy(), outputs.cpu().numpy())

    print("Train loss: %0.3f. Test loss: %0.3f. AUROC: %0.3f" % (train_loss, test_loss, roc_auc))

  if UMAP:
    encoder = model.encoder
    image_loader_test = torch.utils.data.DataLoader(
      dataset_test,
      batch_size=batch_size_test, shuffle=True, **kwargs)

    outputs, targets = get_encodings(encoder, device, image_loader_test)
    
    test_embeddings = outputs.cpu().numpy()
    targets = targets.cpu().numpy() 

    reducer = umap.UMAP()
    embedding = reducer.fit_transform(test_embeddings)
    embedding.shape

    plt.figure(figsize=(15,15))
    sns.scatterplot(x=embedding[:,0], y=embedding[:,1], hue=targets.astype(str), hue_order=[str(i) for i in range(10)])
    plt.show()

In [None]:
experiment_criteria(metacriteria_least_common_multiple, epochs=6,dataset_train=image_dataset_train, dataset_test=image_dataset_test)