# Checking Quantitative Metrics


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%cd /content/gdrive/MyDrive/internship/style-gan/

/content/gdrive/MyDrive/internship/style-gan


In [None]:
!pip install click requests tqdm pyspng ninja imageio-ffmpeg==0.4.3
!pip install lpips
!pip install pytorch-ignite
!pip install pytorch-msssim



In [1]:
SAVING_DIR = 'stuff/results/metrics/'
PATH_DIR = "stuff/data/input/"

In [None]:
!ls $SAVING_DIR/images/no_square_regularizer_lambda_0-001/last_generated

aatik-tasneem-7omHUGhhmZ0-unsplash_latents_iters_001300_step_0100_w_mean.png
aiony-haust-3TLl_97HNJo-unsplash_latents_iters_001300_step_0100_w_mean.png
aleksandr-minakov-xDyqR14KyAs-unsplash_latents_iters_001300_step_0100_w_mean.png
alex-lee-l7oI8wCfQ8Y-unsplash_latents_iters_001300_step_0100_w_mean.png
amin-rk-QKxRH1sAfYY-unsplash_latents_iters_001300_step_0100_w_mean.png
andrew-heald-z2wyh1Maq8E-unsplash_latents_iters_001300_step_0100_w_mean.png
annie-spratt-ZyWN6N41JUc-unsplash_latents_iters_001300_step_0100_w_mean.png
austin-wade-X6Uj51n5CE8-unsplash_latents_iters_001300_step_0100_w_mean.png
awab-husameldin-678MnkzVdRU-unsplash_latents_iters_001300_step_0100_w_mean.png
ayo-ogunseinde-sibVwORYqs0-unsplash_latents_iters_001300_step_0100_w_mean.png
dorrell-tibbs-gisFZKWpKQ4-unsplash_latents_iters_001300_step_0100_w_mean.png
pexels-wallace-chuck-4580470_latents_iters_001300_step_0100_w_mean.png


## Reconstruction Metrics

### Peak signal-to-noise ratio (PSNR)

PSNR is most easily defined via the mean squared error (MSE). Given a noise-free $m\times n$ monochrome image $I$ and its noisy approximation $K$, MSE is defined as:

$$M S E=\frac{1}{m n} \sum_{i=0}^{m-1} \sum_{j=0}^{n-1}[I(i, j)-K(i, j)]^{2} $$


The PSNR is defined as:

$$
\begin{aligned}
P S N R &=10 \cdot \log _{10}\left(\frac{M A X_{I}^{2}}{M S E}\right) \\
&=20 \cdot \log _{10}\left(\frac{M A X_{I}}{\sqrt{M S E}}\right) \\
&=20 \cdot \log _{10}\left(M A X_{I}\right)-10 \cdot \log _{10}(M S E)
\end{aligned}
$$

Here, $MAX_I$ is the maximum possible pixel value of the image.


In [None]:
#@title build_tensor_results 
#@markdown Load the generated imgs with its respective reference img
def build_tensor_results(path_generated_imgs, path_references):
  input_data = load_data(path_references)

  generated_imgs = load_data(path_generated_imgs)

  full_batches = []

  for i in range(12):
    references = input_data[i]['img']
    synthetics = generated_imgs[i]['img']

    batch_data = np.stack([synthetics, references], axis = 0)

    full_batches.append(batch_data)

  full_batches = np.array(full_batches)
  print("full_batches numpy: ", full_batches.shape)

  # convert to pytorch tensor
  full_batches = torch.tensor(full_batches, device = DEVICE, dtype = torch.float32)
  full_batches = full_batches.permute(0, 1, 4, 2, 3)
  print("full_batches tensor: ", full_batches.size())

  return full_batches

In [None]:
path_generated_imgs = os.path.join(SAVING_DIR,"images/no_square_regularizer_lambda_0-001/last_generated/")

full_batches = build_tensor_results(path_generated_imgs)
full_batches.size()

full_batches numpy:  (12, 2, 1024, 1024, 3)
full_batches tensor:  torch.Size([12, 2, 3, 1024, 1024])


torch.Size([12, 2, 3, 1024, 1024])

In [None]:
def get_PSNR(full_batches):
  def process_function(engine, batch):
      y_pred = batch[0]
      y = batch[1]
      return y_pred, y

  engine = Engine(process_function)
  psnr = PSNR(data_range=255.0)
  psnr.attach(engine, "psnr")

  state = engine.run(full_batches)
  print(f"n_imgs: {full_batches.size(0)} PSNR: {state.metrics['psnr']}")

  return state.metrics['psnr']

In [None]:
psnr_metric = get_PSNR(full_batches)

n_imgs: 12 PSNR: 23.279572988230957


## Root Mean Square Deviation (RMSD)

Given two $m\times n$ images $I_1$ and $I_2$

$$M S E=\frac{1}{m n} \sum_{i=0}^{m-1} \sum_{j=0}^{n-1}[I_1(i, j)-I_2(i, j)]^{2} $$

Then, RMSD is

$$
\mathrm{RMSD}=\sqrt{MSE}
$$

In [None]:
def get_RMSD(full_batches):
  generates = full_batches[:,0,:,:,:] / 255.0
  references = full_batches[:,1,:,:,:] / 255.0

  criterion = nn.MSELoss()
  rmsd = torch.sqrt(criterion(generates, references))

  print(f"n_imgs: {full_batches.size(0)} RMSD: {rmsd}")
  return rmsd

In [None]:
get_RMSD(full_batches)

n_imgs: 12 RMSD: 0.07621462643146515


tensor(0.0762, device='cuda:0')

## VGG perceptual similarity

In [None]:
#@title the VGG16 Perceptual Network

class PerceptualVGG16(torch.nn.Module):
    def __init__(self, requires_grad=False, n_layers=[2, 4, 14, 21]):
        super(PerceptualVGG16, self).__init__()
        
        # Dowsampling according to input of ImageNet 256x256
        self.upsample2d = torch.nn.Upsample(scale_factor=256/RESOLUTION, mode='bicubic')

        # Get the pretrained vgg16 model
        vgg_pretrained_features = models.vgg16(pretrained=True).features

        self.slice0 = torch.nn.Sequential()
        self.slice1 = torch.nn.Sequential()
        self.slice2 = torch.nn.Sequential()
        self.slice3 = torch.nn.Sequential()
        
        # [0,1] layers indexes
        for x in range(n_layers[0]):  
            self.slice0.add_module(str(x), vgg_pretrained_features[x])
            
        # [2, 3] layers indexes
        for x in range(n_layers[0], n_layers[1]):  # relu1_2
            self.slice1.add_module(str(x), vgg_pretrained_features[x])
        
        # [4, 13] layers indexes
        for x in range(n_layers[1], n_layers[2]): # relu3_2
            self.slice2.add_module(str(x), vgg_pretrained_features[x])

        # [14, 20] layers indexes
        for x in range(n_layers[2], n_layers[3]):# relu4_2
            self.slice3.add_module(str(x), vgg_pretrained_features[x])

        # Setting the gradients to false
        if not requires_grad:
            for param in self.parameters():
                param.requires_grad=False
                
    def forward(self, x):
        upsample = self.upsample2d(x)
        
        h0 = self.slice0(upsample)
        h1 = self.slice1(h0)
        h2 = self.slice2(h1)
        h3 = self.slice3(h2)

        return h0, h1, h2, h3

In [None]:
#@title Pass VGG network
def get_vgg_perceptual(full_batches, separate = True):

  generates = full_batches[:,0,:,:,:].clone() / 255.0
  references = full_batches[:,1,:,:,:].clone() / 255.0

  # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
  #                                std=[0.229, 0.224, 0.225])
  # generates = normalize(generates)
  # references = normalize(references)

  perceptual_net = PerceptualVGG16(n_layers=[2,4,14,21]).to(DEVICE)

  def get_loss(synth_img, original_img):
    real_0,real_1,real_2,real_3 = perceptual_net(original_img)
    synth_0,synth_1,synth_2,synth_3 = perceptual_net(synth_img)

    perceptual_loss=0
    perceptual_loss+=MSE_Loss(synth_0,real_0)
    perceptual_loss+=MSE_Loss(synth_1,real_1)
    perceptual_loss+=MSE_Loss(synth_2,real_2)
    perceptual_loss+=MSE_Loss(synth_3,real_3)

    return perceptual_loss

  if separate:
    perceptual_per_img = []

    for i in range(full_batches.size(0)):
      
      perceptual_loss = get_loss(generates[i].unsqueeze(0), references[i].unsqueeze(0))

      perceptual_per_img.append(perceptual_loss.item())

    metric = np.mean(perceptual_per_img)
  else:

    metric = get_loss(generates, references)

  print(f"n_imgs: {full_batches.size(0)} VGG: {metric}")

  return metric

In [None]:
def get_VGG(full_batches, separate = True):
  url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt'
  with dnnlib.util.open_url(url) as f:
      perceptual_vgg16 = torch.jit.load(f).eval().to(DEVICE)

  generates = full_batches[:,0,:,:,:].clone()
  references = full_batches[:,1,:,:,:].clone()

  # generates = F.interpolate(generates, size=(256, 256), mode='area')
  # references = F.interpolate(references, size=(256, 256), mode='area')
  
  perceptual_per_img = []
  
  if separate:
    for i in range(full_batches.size(0)):

      synth_features = perceptual_vgg16(generates[i].unsqueeze(0), resize_images=True, return_lpips=False)
      reference_features = perceptual_vgg16(references[i].unsqueeze(0), resize_images=True, return_lpips=False)

      perceptual_loss = (reference_features - synth_features).square().sum()

      perceptual_per_img.append(perceptual_loss.item())

    metric = np.mean(perceptual_per_img)
  else:
    synth_features = perceptual_vgg16(generates, resize_images=True, return_lpips=False)
    reference_features = perceptual_vgg16(references, resize_images=True, return_lpips=False)

    metric = (reference_features - synth_features).square().sum()

  print(f"n_imgs: {full_batches.size(0)} VGG: {metric}")

  return metric

In [None]:
get_VGG(full_batches, separate=False)

Downloading https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt ... done
n_imgs: 12 VGG: 0.46020272374153137


tensor(0.4602, device='cuda:0')

## LPIPS Perceptual Metric

In [None]:
def get_LPIPS(full_batches, separate = True):
  url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt'
  with dnnlib.util.open_url(url) as f:
      perceptual_vgg16 = torch.jit.load(f).eval().to(DEVICE)

  generates = full_batches[:,0,:,:,:].clone()
  references = full_batches[:,1,:,:,:].clone()

  generates = F.interpolate(generates, size=(256, 256), mode='area')
  references = F.interpolate(references, size=(256, 256), mode='area')
  
  perceptual_per_img = []
  
  if separate:
    for i in range(full_batches.size(0)):

      synth_features = perceptual_vgg16(generates[i].unsqueeze(0), resize_images=False, return_lpips=True)
      reference_features = perceptual_vgg16(references[i].unsqueeze(0), resize_images=False, return_lpips=True)

      perceptual_loss = (reference_features - synth_features).square().sum()

      perceptual_per_img.append(perceptual_loss.item())

    metric = np.mean(perceptual_per_img)
  else:
    synth_features = perceptual_vgg16(generates, resize_images=False, return_lpips=True)
    reference_features = perceptual_vgg16(references, resize_images=False, return_lpips=True)

    metric = (reference_features - synth_features).square().sum()

  print(f"n_imgs: {full_batches.size(0)} LPIPS: {metric}")

  return metric

In [None]:
get_LPIPS(full_batches)

n_imgs: 12 LPIPS: 0.11547330704828103


0.11547330704828103

## Differentiable structural similarity (SSIM) index.

In [None]:
def get_SSIM(full_batches):
  generates = full_batches[:,0,:,:,:]
  references = full_batches[:,1,:,:,:]

  metric = ssim(generates, references,data_range=255.0)

  print(f"n_imgs: {full_batches.size(0)} SSIM: {metric}")

  return metric

In [None]:
get_SSIM(full_batches)

n_imgs: 12 SSIM: 0.705211877822876


tensor(0.7052, device='cuda:0')

### Get the metrics of different executions

In [None]:
folders_results = {
    "no square lambda: 0.001" : "images/no_square_regularizer_lambda_0-001/last_generated/",
    "bicubic lambda: 0.001" :"images/lambda_0-001_bicubic/last_generated/",
    "area lambda: 0.001" :"images/lambda_0-001_area/last_generated/",
    "area lambda: 0.005" :"images/lambda_0-005_area/last_generated/",
    "area lambda: 0.01" :"images/lambda_0-01_area/last_generated/",
}

In [None]:
df = pd.DataFrame(columns=["SSIM", "RMSE", "PSNR", "VGG", "LPIPS"], index= [ k for k, v in folders_results.items()])


In [None]:
df = pd.DataFrame(columns=["SSIM", "RMSE", "PSNR", "VGG", "LPIPS"],
                  index= [ k for k, v in folders_results.items()])

for k, path in folders_results.items():
  full_batches = build_tensor_results(path)
  print(path)
  print(full_batches.size())

  metric_ssim = get_SSIM(full_batches).item()
  metric_rmse = get_RMSD(full_batches).item()
  metric_psnr = get_PSNR(full_batches)
  metric_vgg = get_VGG(full_batches,separate=False).item()
  metric_lpips = get_LPIPS(full_batches)

  df.loc[k] = [
      metric_ssim, 
      metric_rmse,
      metric_psnr, 
      metric_vgg,
      metric_lpips]

df

full_batches numpy:  (12, 2, 1024, 1024, 3)
full_batches tensor:  torch.Size([12, 2, 3, 1024, 1024])
images/no_square_regularizer_lambda_0-001/last_generated/
torch.Size([12, 2, 3, 1024, 1024])
n_imgs: 12 SSIM: 0.705211877822876
n_imgs: 12 RMSD: 0.07621462643146515
n_imgs: 12 PSNR: 23.279572988230957
n_imgs: 12 VGG: 0.46020272374153137
n_imgs: 12 LPIPS: 0.11547330704828103
full_batches numpy:  (12, 2, 1024, 1024, 3)
full_batches tensor:  torch.Size([12, 2, 3, 1024, 1024])
images/lambda_0-001_bicubic/last_generated/
torch.Size([12, 2, 3, 1024, 1024])
n_imgs: 12 SSIM: 0.6869617700576782
n_imgs: 12 RMSD: 0.08984023332595825
n_imgs: 12 PSNR: 21.962373764348147
n_imgs: 12 VGG: 0.835750162601471
n_imgs: 12 LPIPS: 0.16997494486471018
full_batches numpy:  (12, 2, 1024, 1024, 3)
full_batches tensor:  torch.Size([12, 2, 3, 1024, 1024])
images/lambda_0-001_area/last_generated/
torch.Size([12, 2, 3, 1024, 1024])
n_imgs: 12 SSIM: 0.7014564871788025
n_imgs: 12 RMSD: 0.07909423112869263
n_imgs: 12 PS

Unnamed: 0,SSIM,RMSE,PSNR,VGG,LPIPS
no square lambda: 0.001,0.705212,0.0762146,23.2796,0.460203,0.115473
bicubic lambda: 0.001,0.686962,0.0898402,21.9624,0.83575,0.169975
area lambda: 0.001,0.701456,0.0790942,22.9986,0.284373,0.120003
area lambda: 0.005,0.700685,0.0808319,22.7409,0.554058,0.128989
area lambda: 0.01,0.702564,0.0793384,22.8376,0.468568,0.127826
