## Imports

In [1]:
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path
from collections import defaultdict
from tqdm import tqdm

## Settings

In [2]:
src_folder = '/home/drevital/obstacles_classification_datasets/obstacle_classification_RGB_data'
annotated_folder = '/home/drevital/obstacles_classification_datasets/rgb_6/annotated'
in_folders = ['/home/drevital/obstacles_classification_datasets/test_rgb_6']
out_folders = ['/home/drevital/obstacles_classification_datasets/test_7_new_morph']
sites = ['_'.join(s.split('_')[:-2]) for s in os.listdir(src_folder)]

In [3]:
sites

['musashi_office',
 'koki_factory',
 'israel',
 'new_factory',
 'new_factory_humid']

In [4]:
site_thresholds = {'israel': 55, 'new_factory': 50, 'new_factory_humid': 50, 'musashi_office': 40, 'koki_factory': 40}
default_threshold = 50

## Make dictionary for the image names of each site

In [5]:
site_images = defaultdict(list)

for site in sites:
    site_folder = os.path.join(src_folder, site + '_rgb_data','all_data')
    class_folders = os.listdir(site_folder)
    for cls in class_folders:
        site_images[site] += [f for f in os.listdir(os.path.join(site_folder,cls))]

## List images not found in any site

In [6]:
class_folders = ['no_obstacle', 'obstacle']

for class_folder in class_folders:
    annotated = os.listdir(os.path.join(annotated_folder, class_folder))
    for a in annotated:
        # alt_name takes into account the same name with ignoring one _ at the end
        alt_name = '.'.join(a.split('.')[:-1])[:-1] + '.jpg'
        found_states = [a in site_images[site] for site in sites]
        found = any(found_states)
        alt_found = any([alt_name in site_images[site] for site in sites])
        found = found or alt_found
        if not found:
            print(f'{class_folder}: {a}')

obstacle: 43_1561__reversed.jpg
obstacle: 43_1697__reversed.jpg
obstacle: 1_1235_1_reversed.jpg
obstacle: 43_1589__reversed.jpg
obstacle: 1_1195_1_reversed.jpg
obstacle: 43_1665__reversed.jpg
obstacle: 1_1031_1_reversed.jpg
obstacle: 43_1625__reversed.jpg
obstacle: 43_1525__reversed.jpg
obstacle: 1_725__reversed.jpg
obstacle: 1_1131_1_reversed.jpg
obstacle: 1_1027_1_reversed.jpg
obstacle: 43_1689__reversed.jpg
obstacle: 1_1035_1_reversed.jpg
obstacle: 43_1485__reversed.jpg
obstacle: 1_1111_1_reversed.jpg
obstacle: 1_1135_1_reversed.jpg
obstacle: 1_1159_1_reversed.jpg
obstacle: 43_1645__reversed.jpg
obstacle: 43_1509__reversed.jpg
obstacle: 1_1071_1_reversed.jpg
obstacle: 1_1147_1_reversed.jpg
obstacle: 43_1541__reversed.jpg
obstacle: 43_1661__reversed.jpg
obstacle: 43_1669__reversed.jpg
obstacle: 43_1677__reversed.jpg
obstacle: 1_1175_1_reversed.jpg
obstacle: 43_1569__reversed.jpg
obstacle: 43_1649__reversed.jpg
obstacle: 1_745__reversed.jpg
obstacle: 1_1263_1_reversed.jpg
obstacle: 1_

## A funciton to find the source site of a given image

In [7]:
def find_site_and_threshold(im_name):
    found_states = [im_name in site_images[site] for site in sites]
    
    if any(found_states):
        site = sites[np.argmax(found_states)]
        threshold = site_thresholds[site]
    else:
        site = 'unknown'
        threshold = default_threshold
        
    return site, threshold

## Generate <ref, current, mask> triplet from <ref, current> pair

### Mask Configuration

In [8]:
factor = 1280/720
default_erode_filter_size = [3, 3]
default_dilate_filter_size = [15, 15]

### Mask Utilities

In [9]:
def adjust_filter_by_res(filter_to_adjust):
    # check minimum filter size
    if int(filter_to_adjust[0]*factor) < 3 or int(filter_to_adjust[1]*factor) < 3:
        return (3, 3)
    else:
        return (int(filter_to_adjust[0]*factor), int(filter_to_adjust[1]*factor))

### Mask Generation

In [10]:
def triplet_image(pair, threshold):
    w = pair.shape[1]
    ref = pair[:, :w//2]
    current = pair[:, w//2:]
    diff = cv2.absdiff(current, ref)
    agg_rgb = np.stack((diff[:, :, 0], diff[:, :, 1], diff[:, :, 2])).max(0)
    _, mask = cv2.threshold(agg_rgb, threshold, 255, cv2.THRESH_BINARY)

    # New morphological operations
    erode_filter_size = adjust_filter_by_res(default_erode_filter_size)
    dilate_filter_size = adjust_filter_by_res(default_dilate_filter_size)
    _erod = cv2.erode(mask, np.ones(erode_filter_size, np.uint8), iterations=1)
    mask = cv2.dilate(_erod, np.ones(dilate_filter_size, np.uint8), iterations=2)
    mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)  
    
    return cv2.hconcat([ref, current, mask])

## Loop over in_folders, create <ref, current, mask> images and write the to corresponding out_folders

In [11]:
subset_names = ['train', 'eval']
class_names = ['no_obstacle', 'obstacle']

for i, in_folder in enumerate(in_folders):
    for subset_name in subset_names:
        for class_name in class_names:
            class_path = os.path.join(in_folder, subset_name, class_name)
            im_names = os.listdir(class_path)
            out_folder = os.path.join(out_folders[i], subset_name, class_name)
            Path(out_folder).mkdir(parents=True, exist_ok=True)
            for im_name in tqdm(im_names):
                im_path = os.path.join(class_path, im_name)
                pair = cv2.imread(im_path)
                site, threshold = find_site_and_threshold(im_name)
                triplet = triplet_image(pair, threshold)
                out_im_name = '.'.join(im_name.split('.')[:-1]) + f'_{site}_.jpg'
                out_path = os.path.join(out_folders[i], subset_name, class_name, out_im_name)
                cv2.imwrite(out_path, triplet)

  0%|          | 0/5 [00:00<?, ?it/s]


UnboundLocalError: local variable 'erode_filter_size' referenced before assignment