In [1]:
import glob
import pandas as pd
import numpy as np
import cv2
import json
from skimage.draw import polygon
import os
import shutil
# Put all dataset images + csv file to default path and run all cell

DEFAULT_PATH = "E:/few_data/dd/"
PATH_TO_CSV_DATA = DEFAULT_PATH + "*.csv"
PATH_TO_IMAGES_DATA = DEFAULT_PATH + "*.jpg"

csvs = glob.glob(PATH_TO_CSV_DATA)
orig_img = cv2.imread(DEFAULT_PATH + "20210712_141048_857A_ACCC8EAF31F3_0.jpg")

all_df = []
for csv in csvs:
    cdf = pd.read_csv(csv)
    cdf['filename'] = cdf['filename']
    all_df.append(cdf)
df = pd.concat(all_df)
df = df.reset_index()


In [2]:
df['region_shape_attributes'] = df['region_shape_attributes'].apply(json.loads)

In [None]:
# Remove unlabeled data
df = df[(df['region_count'] > 0)]
df

### Semantic segmentation

In [4]:
areas = df.groupby(['filename'])['region_shape_attributes'].apply(list)
names = df.groupby(['filename'])['filename'].apply(list)
indexes = df.groupby(['filename'])['index'].apply(list)
res = [item[0] for item in names]
areas

filename
20210712_141048_857A_ACCC8EAF31F3_0.jpg      [{'name': 'polygon', 'all_points_x': [330, 326...
20210712_141048_857A_ACCC8EAF31F3_120.jpg    [{'name': 'polygon', 'all_points_x': [312, 297...
20210712_141048_857A_ACCC8EAF31F3_150.jpg    [{'name': 'polygon', 'all_points_x': [831, 831...
20210712_141048_857A_ACCC8EAF31F3_180.jpg    [{'name': 'polygon', 'all_points_x': [999, 987...
20210712_141048_857A_ACCC8EAF31F3_210.jpg    [{'name': 'polygon', 'all_points_x': [447, 514...
20210712_141048_857A_ACCC8EAF31F3_240.jpg    [{'name': 'polygon', 'all_points_x': [319, 313...
20210712_141048_857A_ACCC8EAF31F3_270.jpg    [{'name': 'polygon', 'all_points_x': [248, 241...
20210712_141048_857A_ACCC8EAF31F3_30.jpg     [{'name': 'polygon', 'all_points_x': [335, 321...
20210712_141048_857A_ACCC8EAF31F3_300.jpg    [{'name': 'polygon', 'all_points_x': [622, 616...
20210712_141048_857A_ACCC8EAF31F3_330.jpg    [{'name': 'polygon', 'all_points_x': [443, 426...
20210712_141048_857A_ACCC8EAF31F3_360.jpg

In [5]:
def cut_original_images():
    if not os.path.exists(DEFAULT_PATH + 'img_cut/'):
        os.makedirs(DEFAULT_PATH + 'img_cut/')
    for img_idx in range(0, len(areas)):
        image_name = res[img_idx]
        cut_img = cv2.imread(DEFAULT_PATH + image_name)[:, 40:] # cut to 1280 % 32 == 0

        cv2.imwrite(DEFAULT_PATH + 'img_cut/' + image_name, cut_img)

cut_original_images()

In [6]:
def save_mask(is_cut=True):
    numbers_of_classes = 2

    fit_type_center = 'none' # ['rectangle', 'circle', 'none']
    thickness_center = 2
    color_center = (255, 0, 0)
    radius_center = 2

    color_mask = (0, 0, 255)
    color_polygon = (0, 255, 0)
    thickness_polygon = 2

    if is_cut:
        global_path = DEFAULT_PATH + 'mask_cut_' + str(numbers_of_classes) +"_class/"
        if not os.path.exists(global_path):
            os.makedirs(global_path)
    else:
        global_path = DEFAULT_PATH + 'mask_' + str(numbers_of_classes) +"_class/"
        if not os.path.exists(global_path):
            os.makedirs(global_path)

    for img_idx in range(0, len(areas)): # specific image
        image_name = res[img_idx]
        index_list = indexes[img_idx]
        if numbers_of_classes > 1:
            mask = np.zeros((orig_img.shape[0], orig_img.shape[1], 3), dtype=np.uint8)
        else:
            mask = np.zeros((orig_img.shape[0], orig_img.shape[1], 1), dtype=np.uint8)
        i = 1
        for index in index_list:
            poly = df.iloc[index]['region_shape_attributes']
            rr, cc = polygon(poly['all_points_y'], poly['all_points_x'])
            rr[rr > mask.shape[0] - 1] = mask.shape[0] - 1 # sometimes out of bounds
            cc[cc > mask.shape[1] - 1] = mask.shape[1] - 1

            if numbers_of_classes > 1:
                points = []
                for i in range(0, len(poly['all_points_y'])):
                    points.append([poly['all_points_x'][i], poly['all_points_y'][i]])
                if fit_type_center == 'none':
                    pass
                elif fit_type_center == 'circle' or len(points) < 5:
                    circle = cv2.minEnclosingCircle(np.asarray(points))
                    cv2.circle(mask,
                               (int(circle[0][0]), int(circle[0][1])),
                               radius=radius_center,
                               color=color_center,
                               thickness=thickness_center)
                elif fit_type_center == 'rectangle':
                    rect = cv2.minAreaRect(np.asarray(points))
                    cv2.circle(mask,
                               (int(rect[0][0]),int(rect[0][1])),
                               radius=radius_center,
                               color=color_center,
                               thickness=thickness_center)
                mask[rr, cc] = color_mask
                mask = cv2.polylines(mask, np.asarray([points]), True, color_polygon, thickness_polygon)
            else:
                mask[rr, cc] = 255

            i += 1
            if is_cut:
                cv2.imwrite(global_path + image_name[:-3] + "png", mask[:, 40:]) # convert to png, because of jpeg mixing color
            else:
                cv2.imwrite(global_path + image_name[:-3] + "png", mask)
    return global_path

In [7]:
def split_to_folders(res_path, split_rate=0.2, split_images=False):
    val_names = res[:int(split_rate * len(res))]
    test_names = res[:int(split_rate * len(res))]
    train_names = res[int(split_rate * len(res)):]
    if split_images:
        if not os.path.exists(DEFAULT_PATH + 'train/'):
            os.makedirs(DEFAULT_PATH + 'train/')
        if not os.path.exists(DEFAULT_PATH + 'test/'):
            os.makedirs(DEFAULT_PATH + 'test/')
        if not os.path.exists(DEFAULT_PATH + 'val/'):
            os.makedirs(DEFAULT_PATH + 'val/')
        # assume that we're copying cut images
        for name in val_names:
            shutil.copyfile(DEFAULT_PATH + 'img_cut/' + name, DEFAULT_PATH + 'val/' + name)
        for name in test_names:
            shutil.copyfile(DEFAULT_PATH + 'img_cut/' + name, DEFAULT_PATH + 'test/' + name)
        for name in train_names:
            shutil.copyfile(DEFAULT_PATH + 'img_cut/' + name, DEFAULT_PATH + 'train/' + name)

    else:
        if not os.path.exists(DEFAULT_PATH + 'trainannot/'):
            os.makedirs(DEFAULT_PATH + 'trainannot/')
        if not os.path.exists(DEFAULT_PATH + 'testannot/'):
            os.makedirs(DEFAULT_PATH + 'testannot/')
        if not os.path.exists(DEFAULT_PATH + 'valannot/'):
            os.makedirs(DEFAULT_PATH + 'valannot/')
        for name in val_names:
            shutil.copyfile(res_path + name[:-3] + 'png', DEFAULT_PATH + 'valannot/' + name[:-3] + 'png')
        for name in test_names:
            shutil.copyfile(res_path + name[:-3] + 'png', DEFAULT_PATH + 'testannot/' + name[:-3] + 'png')
        for name in train_names:
            shutil.copyfile(res_path + name[:-3] + 'png', DEFAULT_PATH + 'trainannot/' + name[:-3] + 'png')

In [8]:
res_path = save_mask(is_cut=True)
split_to_folders(res_path, split_images=True)