# Creates the prototypes for the gray, augmented and normalized datasets.

In [14]:
import os
import imageio
import numpy as np
from matplotlib import pyplot as plt

from skimage.color import rgb2gray
from skimage import exposure
from skimage.morphology import ball, disk
from skimage.filters import rank
from skimage.util import img_as_ubyte
from scipy import signal

from tqdm import tqdm
import json

In [15]:
from google.colab import drive
drive.mount("/content/drive/", force_remount=True)

Mounted at /content/drive/


## .1. Define the paths and create directories

In [16]:
### Obs: 'ep_data' was the alias I used for the folder name.
PATH_EP_ROOT = "/content/drive/MyDrive/ep_data"
PATH_DATASET_GRAY = os.path.join(PATH_EP_ROOT, "dataset_gray")
PATH_DATASET_AUG = os.path.join(PATH_EP_ROOT, "dataset_augmented")
PATH_DATASET_NORM = os.path.join(PATH_EP_ROOT, "dataset_normalized")

In [17]:
# Creates dataset folder if it does not exist
PATH_DATASET_PROTOTYPES = os.path.join(PATH_EP_ROOT, "dataset_prototypes")
PATH_DATASET_PROTOTYPES_GRAY = os.path.join(PATH_DATASET_PROTOTYPES, "dataset_gray")
PATH_DATASET_PROTOTYPES_AUG = os.path.join(PATH_DATASET_PROTOTYPES, "dataset_augmented")
PATH_DATASET_PROTOTYPES_NORM = os.path.join(PATH_DATASET_PROTOTYPES, "dataset_normalized")
os.makedirs(PATH_DATASET_PROTOTYPES, exist_ok = True)
os.makedirs(PATH_DATASET_PROTOTYPES_GRAY, exist_ok = True)
os.makedirs(PATH_DATASET_PROTOTYPES_AUG, exist_ok = True)
os.makedirs(PATH_DATASET_PROTOTYPES_NORM, exist_ok = True)

## .2. Utils

In [18]:
def get_histogram_summary(normalized_histogram):
  """ Computes mean and variance from NORMALIZED histogram
      
      Parameters
      ---------------
      normalized_histogram: (np.array)
        A (256, 1) shaped np.array object contaning the relative frequencies of 
        the intensity
  """

  intensity_values = np.array([x for x in range(256)])
  intensity_mean = np.dot(intensity_values, normalized_histogram)
  intesinty_squared_mean = np.dot(pow(intensity_values, 2), normalized_histogram) # expectation of the square
  intensity_var = intesinty_squared_mean - pow(intensity_mean, 2)
  summary_dict = {"mean": intensity_mean, 
                  "sd": np.sqrt(intensity_var),
                  "variance": intensity_var,
                 }
  return summary_dict

In [19]:
def create_dataset_prototype(dataset_path, output_path):
  """ Creates and saves image prototypes
  """
  for dirpath, dirname, filename in os.walk(dataset_path):
      if dirpath != dataset_path:
          # 0. Define class
          object_class = dirpath.split("/")[-1]
          folder_path = os.path.join(output_path, object_class)

          if not os.path.exists(folder_path):
              os.mkdir(folder_path)
          else:
              print(f"'{folder_path}' already exists!")

          print(f"\nCurrent object class: {object_class}")


          #-- 1. Setup for class prototype computation per position
          class_prototype = {"p1": [], "p2": [], "p3": []}
          for key in class_prototype.keys():
            class_prototype[key] = np.zeros((256, 256), dtype = "int")
          # number of images per position
          imgs_per_pos = len(filename)/3
          #--

          for file in tqdm(filename, desc=f"{object_class}  -> "):
              if file.split(".")[-1] in ["jpg", "png", "jpeg"]:
                  img = imageio.imread(os.path.join(dirpath, file))
                  img = img.astype("int")
                  img_hist = exposure.histogram(img, nbins = 256)

                  # class prototype computation
                  # Skipping convolution for a while
                  # we need to fix it so it has shape (256, 256) instead of
                  # (254, 254)
                  pos = file.split("_")[2]                
                  if img.shape == (256, 256):
                    class_prototype[pos] += img

          # save prototype image
          if imgs_per_pos != 0:
            for key_pos in class_prototype.keys():
              class_prototype[key_pos] = np.uint8(class_prototype[key_pos]/imgs_per_pos)
              imageio.imwrite(os.path.join(folder_path, 
                                          object_class + "_" + key_pos + "_prototype.png"), 
                              class_prototype[key_pos])

In [26]:
def create_dataset_summary_hist(dataset_path, output_path,
                                histogram_title = "Histograma médio para\n o objeto "):
  """ Creates and saves the histogram image and summary
  """
  for dirpath, dirname, filename in os.walk(dataset_path):
      if dirpath != dataset_path:
          # 0. Define class
          object_class = dirpath.split("/")[-1]
          folder_path = os.path.join(output_path, object_class)

          if not os.path.exists(folder_path):
              os.mkdir(folder_path)
          else:
              print(f"'{folder_path}' already exists!")

          print(f"\nCurrent object class: {object_class}")


          imgs_per_pos = len(filename)/3
          #--

          #-- 2. Setup for class histogram computation 
          class_histogram = np.zeros((256), dtype = "int")
          #--

          for file in tqdm(filename, desc=f"{object_class}  -> "):
              if file.split(".")[-1] in ["jpg", "png", "jpeg"]:
                  img = imageio.imread(os.path.join(dirpath, file))
                  img = img.astype("int")
                  img_hist = exposure.histogram(img, nbins = 256)

                  # class histogram update
                  for intensity_count, intensity_value in zip(img_hist[0], img_hist[1]):
                    class_histogram[intensity_value] += intensity_count


          #-- save histogram data, plot and summary
          with open(os.path.join(folder_path, object_class + '_histogram.npy'), 'wb') as output_file:
                    np.save(output_file, class_histogram)
          
          
          class_histogram = class_histogram/(256*256*imgs_per_pos*3) # normalize histogram
          with open(os.path.join(folder_path, object_class + '_normalized_histogram.npy'), 'wb') as output_file:
            np.save(output_file, class_histogram)

          histogram_summary = get_histogram_summary(class_histogram)
          with open(os.path.join(folder_path, object_class + '_histogram_summary.json'), 'w') as output_file:
            json.dump(histogram_summary, output_file)
          
          fig = plt.figure(figsize = (8, 4))
          plt.bar(x = [x for x in range(256)], height = class_histogram, 
                  color = "black")
          
          plt.xlabel("Intensidade", fontsize= 20, fontweight='bold')
          plt.ylabel("Frequência", fontsize= 20, fontweight='bold')
          plt.title(histogram_title + object_class, fontsize= 20, fontweight='bold')
          plt.subplots_adjust(top=0.85) # avoid that the title gets cropped
          plt.close(fig) # avoid displaying histograms on notebook
          fig.savefig(os.path.join(folder_path, object_class + '_histogram.png'), 
                      dpi=fig.dpi)
          #--


## .3. Prototype and histogram computation

### 3.1. Prototypes for all datasets

In [None]:
print("Creating prototype images for gray dataset")
print("-------------------------------------")
create_dataset_prototype(PATH_DATASET_GRAY, PATH_DATASET_PROTOTYPES_GRAY) # gray
print("Creating prototype images for augmented dataset")
print("-------------------------------------")
create_dataset_prototype(PATH_DATASET_AUG, PATH_DATASET_PROTOTYPES_AUG) # augmented
print("Creating prototype images for normalized dataset")
print("-------------------------------------")
create_dataset_prototype(PATH_DATASET_NORM, PATH_DATASET_PROTOTYPES_NORM) # equalized histogram dataset

'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/celular' already exists!

Current object class: celular


celular  -> : 100%|██████████| 180/180 [00:00<00:00, 192.92it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/sapato' already exists!

Current object class: sapato


sapato  -> : 100%|██████████| 216/216 [00:02<00:00, 102.77it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/chinelo' already exists!

Current object class: chinelo


chinelo  -> : 100%|██████████| 144/144 [00:01<00:00, 91.86it/s] 


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/caneca' already exists!

Current object class: caneca


caneca  -> : 100%|██████████| 144/144 [00:01<00:00, 89.18it/s] 


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/tesoura' already exists!

Current object class: tesoura


tesoura  -> : 100%|██████████| 144/144 [00:01<00:00, 95.61it/s] 


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/livro' already exists!

Current object class: livro


livro  -> : 100%|██████████| 144/144 [00:01<00:00, 82.39it/s] 


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/portacopo' already exists!

Current object class: portacopo


portacopo  -> : 100%|██████████| 144/144 [00:01<00:00, 90.43it/s] 


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/garrafa' already exists!

Current object class: garrafa


garrafa  -> : 100%|██████████| 144/144 [00:01<00:00, 89.71it/s] 


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/prato' already exists!

Current object class: prato


prato  -> : 100%|██████████| 72/72 [00:12<00:00,  5.71it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/chave' already exists!

Current object class: chave


chave  -> : 100%|██████████| 144/144 [00:01<00:00, 75.26it/s] 


### 3.2 Histogram and summaries for all dataset

In [28]:
print("Creating histograms for gray dataset")
print("-------------------------------------")
create_dataset_summary_hist(PATH_DATASET_GRAY, PATH_DATASET_PROTOTYPES_GRAY,
                            "histograma médio do conjunto cinza \n para o objeto ") # gray
print("Creating histograms for augmented dataset")
print("-------------------------------------")
create_dataset_summary_hist(PATH_DATASET_AUG, PATH_DATASET_PROTOTYPES_AUG,
                            "histograma médio do conjunto aumentado \n para o objeto ") # augmented
print("Creating histograms for normalized dataset")
print("-------------------------------------")
create_dataset_summary_hist(PATH_DATASET_NORM, PATH_DATASET_PROTOTYPES_NORM,
                            "histograma médio do conjunto normalizado \n para o objeto ") # equalized histogram dataset

Creating histograms for gray dataset
-------------------------------------
'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/celular' already exists!

Current object class: celular


celular  -> : 100%|██████████| 180/180 [00:01<00:00, 167.00it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/sapato' already exists!

Current object class: sapato


sapato  -> : 100%|██████████| 216/216 [00:01<00:00, 174.57it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/chinelo' already exists!

Current object class: chinelo


chinelo  -> : 100%|██████████| 144/144 [00:01<00:00, 142.13it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/caneca' already exists!

Current object class: caneca


caneca  -> : 100%|██████████| 144/144 [00:00<00:00, 167.48it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/tesoura' already exists!

Current object class: tesoura


tesoura  -> : 100%|██████████| 144/144 [00:01<00:00, 130.12it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/livro' already exists!

Current object class: livro


livro  -> : 100%|██████████| 144/144 [00:01<00:00, 123.14it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/portacopo' already exists!

Current object class: portacopo


portacopo  -> : 100%|██████████| 144/144 [00:00<00:00, 148.40it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/garrafa' already exists!

Current object class: garrafa


garrafa  -> : 100%|██████████| 144/144 [00:00<00:00, 148.69it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/prato' already exists!

Current object class: prato


prato  -> : 100%|██████████| 72/72 [00:00<00:00, 158.81it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_gray/chave' already exists!

Current object class: chave


chave  -> : 100%|██████████| 144/144 [00:00<00:00, 155.40it/s]


Creating histograms for augmented dataset
-------------------------------------
'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/celular' already exists!

Current object class: celular


celular  -> : 100%|██████████| 675/675 [00:06<00:00, 100.95it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/sapato' already exists!

Current object class: sapato


sapato  -> : 100%|██████████| 720/720 [00:05<00:00, 131.35it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/chinelo' already exists!

Current object class: chinelo


chinelo  -> : 100%|██████████| 720/720 [00:05<00:00, 140.21it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/caneca' already exists!

Current object class: caneca


caneca  -> : 100%|██████████| 720/720 [00:05<00:00, 125.98it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/tesoura' already exists!

Current object class: tesoura


tesoura  -> : 100%|██████████| 720/720 [00:05<00:00, 140.01it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/livro' already exists!

Current object class: livro


livro  -> : 100%|██████████| 720/720 [00:04<00:00, 148.20it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/portacopo' already exists!

Current object class: portacopo


portacopo  -> : 100%|██████████| 720/720 [00:05<00:00, 136.63it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/garrafa' already exists!

Current object class: garrafa


garrafa  -> : 100%|██████████| 720/720 [00:05<00:00, 140.15it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/prato' already exists!

Current object class: prato


prato  -> : 100%|██████████| 360/360 [00:02<00:00, 145.20it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_augmented/chave' already exists!

Current object class: chave


chave  -> : 100%|██████████| 720/720 [00:04<00:00, 144.68it/s]


Creating histograms for normalized dataset
-------------------------------------
'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/prato' already exists!

Current object class: prato


prato  -> : 100%|██████████| 360/360 [00:02<00:00, 142.68it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/celular' already exists!

Current object class: celular


celular  -> : 100%|██████████| 675/675 [00:05<00:00, 124.62it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/sapato' already exists!

Current object class: sapato


sapato  -> : 100%|██████████| 720/720 [00:05<00:00, 132.45it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/chinelo' already exists!

Current object class: chinelo


chinelo  -> : 100%|██████████| 720/720 [00:05<00:00, 135.88it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/caneca' already exists!

Current object class: caneca


caneca  -> : 100%|██████████| 720/720 [00:04<00:00, 151.71it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/tesoura' already exists!

Current object class: tesoura


tesoura  -> : 100%|██████████| 720/720 [00:04<00:00, 151.89it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/livro' already exists!

Current object class: livro


livro  -> : 100%|██████████| 720/720 [00:05<00:00, 140.26it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/portacopo' already exists!

Current object class: portacopo


portacopo  -> : 100%|██████████| 720/720 [00:05<00:00, 126.83it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/garrafa' already exists!

Current object class: garrafa


garrafa  -> : 100%|██████████| 720/720 [00:05<00:00, 141.21it/s]


'/content/drive/MyDrive/ep_data/dataset_prototypes/dataset_normalized/chave' already exists!

Current object class: chave


chave  -> : 100%|██████████| 720/720 [00:05<00:00, 123.60it/s]
