# Statistics of Data
---

In [1]:
import torch
from IPython.display import Image, clear_output 
print('PyTorch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

PyTorch 1.5.0+cu101 _CudaDeviceProperties(name='Tesla K80', major=3, minor=7, total_memory=11441MB, multi_processor_count=13)


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import math
import numpy as np
import random
import os
import cv2

In [0]:
def find_normalization_values(data, num_of_inp_channels):
  means = ()
  stdevs = ()
  for i in range(num_of_inp_channels):
      if num_of_inp_channels == 1:
        pixels = data.ravel()
      elif num_of_inp_channels == 2:
        pixels = data[:,:,i].ravel()
      else:
        pixels = data[:,:,:,i].ravel()
      means = means +(round(np.mean(pixels)),)
      stdevs = stdevs +(np.std(pixels),)

  return means, stdevs


def convert_size(size_bytes):
   if size_bytes == 0:
       return "0B"
   size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
   i = int(math.floor(math.log(size_bytes, 1024)))
   p = math.pow(1024, i)
   s = round(size_bytes / p, 2)
   return "%s %s" % (s, size_name[i])

def find_size_count_images(filepath):
  img_size = 0
  for _, filename in enumerate(os.listdir(filepath)):
      src = filepath + filename
      img_size += os.stat(src).st_size
  
  return img_size, len(os.listdir(filepath))

## 1) BG
---

In [133]:
bg_path = './gdrive/My Drive/EVA4/DepthMaskDataset/data/bg/'

bg_size, bg_cnt = find_size_count_images(bg_path)
print(f"Size of BG data = {convert_size(bg_size)}")
print(f"Total number of images = {bg_cnt}")

Size of BG data = 790.72 KB
Total number of images = 100


## 2) FG
---

In [134]:
fg_path = './gdrive/My Drive/EVA4/DepthMaskDataset/data/fg/'

fg_size, fg_cnt = find_size_count_images(fg_path)
print(f"Size of FG data = {convert_size(fg_size)}")
print(f"Total number of images = {fg_cnt}")

Size of FG data = 2.16 MB
Total number of images = 100


## 3) FG Mask
---

In [136]:
fg_mask_path = './gdrive/My Drive/EVA4/DepthMaskDataset/data/fg_mask/'

fg_mask_size, fg_mask_cnt = find_size_count_images(fg_mask_path)
print(f"Size of FG mask data = {convert_size(fg_mask_size)}")
print(f"Total number of images = {fg_mask_cnt}")

Size of FG mask data = 351.34 KB
Total number of images = 100


## 4) FG_BG and Mask
---

In [0]:
from PIL import Image
from io import BytesIO

def load_zip_images(data, image_files):
    loaded_images = []
    for file in image_files:
        x = np.clip(np.asarray(Image.open(BytesIO(data[ file ])), dtype=float) / 255, 0, 1)
        loaded_images.append(x)
    return np.stack(loaded_images, axis=0)

def loadZipToMem(zip_file):
    # Load zip file into memory
    print('Loading dataset zip file...', end='')
    from zipfile import ZipFile
    input_zip = ZipFile(zip_file, 'r')
    data = {name: input_zip.read(name) for name in input_zip.namelist()}
    input_zip.close()
    
    return data

def generate_fg_bg_mask(data):
  custom_data = list((row.split('\t') for row in (data['data_label.txt']).decode("utf-8").split('\n') if len(row) > 0))
  fg_bg_list = []
  fg_bg_mask_list = []
  for fg_bg_img_row in custom_data:
    fg_bg_list.append(fg_bg_img_row[0])
    fg_bg_mask_list.append(fg_bg_img_row[1])
  random.shuffle(fg_bg_list)
  random.shuffle(fg_bg_mask_list)

  return fg_bg_list, fg_bg_mask_list


In [12]:
path='./gdrive/My Drive/EVA4/DepthMaskDataset/depth_mask_custom_dataset.zip'
data = loadZipToMem(path)
fg_bg_list, fg_bg_mask_list = generate_fg_bg_mask(data)

Loading dataset zip file...

In [127]:
fg_bg_mask = (os.stat(path).st_size)
print(f"Size of FG_BG and its mask compressed file = {convert_size(fg_bg_mask)}")

Size of FG_BG and its mask compressed file = 1.49 GB


## For FG_BG
---

In [78]:
print(f"Total number of images = {len(fg_bg_list)}")

Total number of images = 400000


In [30]:
bs = 200*20

means, stds = [], []
for i in range(0, 100):
  print(f"\n Batch{i+1} - Images [{bs*i}: {(bs*i)+bs}]")

  mean, std = find_normalization_values(load_zip_images(data, fg_bg_list[bs*i: (bs*i)+bs]), 3)
  means.append(mean)
  stds.append(std)

num_imgs = 100
fg_bg_mean = sum(i[0] for i in means)/num_imgs, sum(i[1] for i in means)/num_imgs, sum(i[2] for i in means)/num_imgs
fg_bg_std = sum(i[0] for i in stds)/num_imgs, sum(i[1] for i in stds)/num_imgs, sum(i[2] for i in stds)/num_imgs

print("For FG_BG data:-")
print(f"Mean is {fg_bg_mean}")
print(f"Std is {fg_bg_std}")

For FG_BG data:-
Mean is (1.0, 1.0, 1.0)
Std is (0.21974199573317058, 0.228182355952634, 0.24135464023694234)


# FG_BG_MASK
---

In [79]:
print(f"Total number of images = {len(fg_bg_mask_list)}")

Total number of images = 400000


In [24]:
bs = 200*20

mask_means, mask_stds = [], []
for i in range(0, 100):
  mask_mean, mask_std = find_normalization_values(load_zip_images(data, fg_bg_mask_list[bs*i: (bs*i)+bs]), 1)
  mask_means.append(mask_mean)
  mask_stds.append(mask_std)

num_imgs = 100
mask_mean = sum(i[0] for i in mask_means)/num_imgs
mask_std = sum(i[0] for i in mask_stds)/num_imgs

print("For FG_BG_MASK data:-")
print(f"Mean is {mask_mean}")
print(f"Std is {mask_std}")

For FG_BG_MASK data:-
Mean is 0.0
Std is 0.3169699513813231


## 5) Depth Map
---

In [80]:
num_depthmaps = 0
for i in range(0, 100):
  num_depthmaps += (np.load(f'{depthmap_path}/depthmap_output_batch{i+1}.npz')['output']).shape[0]
print(f"Total number of images = {num_depthmaps}")

Total number of images = 400000


In [91]:
depthmap_size = 0
for i in range(0, 100):
  depthmap_size += os.stat(f'{depthmap_path}/depthmap_output_batch{i+1}.npz').st_size
print(f"Size of depthmap compressed file = {convert_size(depthmap_size)}")

Size of depthmap compressed file = 7.78 GB


In [36]:
depthmap_path = './gdrive/My Drive/EVA4/DepthMaskDataset/data/fg_bg_depth'
bs = 200*20

depthmap_means, depthmap_stds = [], []

for i in range(0, 100):
  outputs = np.load(f'{depthmap_path}/depthmap_output_batch{i+1}.npz')['output']
  depthmap_mean, depthmap_std = find_normalization_values(outputs, 1)
  depthmap_means.append(depthmap_mean)
  depthmap_stds.append(depthmap_std)

num_imgs = 100
depthmap_mean = sum(i[0] for i in depthmap_means)/num_imgs
depthmap_std = sum(i[0] for i in depthmap_stds)/num_imgs

print("For Depth map data:-")
print(f"Mean is {depthmap_mean}")
print(f"Std is {depthmap_std}")

For Depth map data:-
Mean is 0.0
Std is 0.03913845191709697


# Total Dataset Size
---

In [138]:
print(f"Total Dataset Size = {convert_size(bg_size + fg_size + fg_mask_size + fg_bg_mask + depthmap_size)}")

Total Dataset Size = 9.27 GB
