# Artificial Masks Generator

Hyper-generalized artificial masks dataset generator

In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
#imports

import os
import shutil
import cv2
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [None]:
from utils.maskgen import *
from utils.visualization import *
from utils.geometry import *

In [None]:
# important paths and names
PATH_DIR = os.path.abspath('.')

TRAIN_DATA_PATH = os.path.join(PATH_DIR, '../Datasets/curved/train')
VAL_DATA_PATH = os.path.join(PATH_DIR, '../Datasets/curved/val')
TEST_DATA_PATH = os.path.join(PATH_DIR, '../Datasets/curved/test')

waypoint_file_name = 'waypoints.csv'

## Parameters


In [None]:
N_SAMPLES_train = 10
N_SAMPLES_val = 10
N_SAMPLES_test = 10

CURVED = True

In [None]:
NROWS_MIN = 10 # min number of rows in a parcel
NROWS_MAX = 50 # max number of rows in a parcel

H,W = 800,800 # final image resolution

# minimum acceptable interrow distance
MIN_IRD = 10

# range of holes dimension 
HOLE_DIM = [3,6]

# % probability of generate a hole (for every single point of the rows)
HOLE_PROB = 0.1

# minimum initial and final points before/after a hole
HOLE_FRAME = 30

# range of row thickness 
RADIUS = [1,2]

# border (can't be 0)
BORDER = 40
BORDER_AREA = (H-2*BORDER)*(W-2*BORDER)

# angle variability
ANGLE_VAR = 1e-3

# border variability
BORDER_VAR = 2

# ratios for random zoom (max is 100)
RATIO = [90,100]

# max value of l for clipping start/end of rows 
MAX_L = 400

# max displacement of center points to create curved rows
MAX_CURVED_DISPL = 100

In [None]:
def gen_wp_new(line1,line2,index=0):
    """
        Compute wp between two adjacent lines from the points in index position (0: line starting point, -1: line ending point)
    """
    points1 = np.stack(line1[:2], axis=-1)
    points2 = np.stack(line2[:2], axis=-1)
    p1 = points1[index]
    p2 = points2[index]
    
    d1 = np.linalg.norm(p1-points2, 2, axis=1)
    d2 = np.linalg.norm(p2-points1, 2, axis=1)
    n1 = np.argmin(d1)
    n2 = np.argmin(d2)
    k = np.argmin((d1[n1], d2[n2]))
    pa,pb = (p2,points1[n2]) if k else (p1,points2[n1])
    m = np.mean((pa, pb), axis=0)
    return np.round(m).astype('int')


def gen_waypoints_new(row_lines):
    """
        Generate wp for all the rows
    """
    waypoints = []
    for row in range(1,len(row_lines)):    # no wp before the first and after the last row
        waypoints.append(gen_wp_new(row_lines[row-1],row_lines[row],index=0))
        waypoints.append(gen_wp_new(row_lines[row-1],row_lines[row],index=-1))
    return waypoints


In [None]:
def get_mask_and_wp():
    acceptable = False
    while not acceptable:
        original_nrows = random.randint(NROWS_MIN,NROWS_MAX)  # number of rows
        alpha = random.uniform(0, np.pi)                       # orientation

        if np.random.uniform()<0.75:                          # generate random borders (25% of time straight lines)
            done = False
            while not done:
                borders = gen_borders(BORDER,H,W)  
                done = check_borders_area(borders,BORDER_AREA)
        else:
            borders = [(np.pi/2,np.array((BORDER,BORDER))),(0,np.array((BORDER,H-BORDER))),
                       (-np.pi/2,np.array((W-BORDER,H-BORDER))),(np.pi,np.array((W-BORDER,BORDER)))]

        c = find_active_area_center(borders)
        Q,nrows = find_intrarow_distance(original_nrows,alpha,borders,c)

        centers = find_centers(nrows,alpha,c,Q)

        points = [] # start and ending point
        choice = np.random.rand()
        shorter_l_ratios = (1,1)
        clip_l = None
        if 0.3<choice<0.6: # 30% of times, shorter than border intersection, with border shape
            shorter_l_ratios = (0.25 + 0.75*np.random.rand(2))
        elif choice>0.6: # 30% of times, shorter than border intersection, clipped
            clip_l = (0.25 + 0.75*np.random.rand(2))*MAX_L
        for c in centers:        
            points.append(gen_start_and_end(alpha,c,borders,H,W,ANGLE_VAR,BORDER_VAR,shorter_l_ratios,clip_l))

        mask,row_lines,good = create_mask(points,borders,H,W,RADIUS,HOLE_PROB,HOLE_DIM,
                                          HOLE_FRAME,MAX_CURVED_DISPL,curved=CURVED)
        if not good:
            continue
        ird = get_ird(row_lines)
        if ird.min()>=MIN_IRD: # not too narrow rows
            acceptable = True
            wp = gen_waypoints(row_lines)
            mask,wp,centers,points = random_zoom(mask,wp,centers,points,RATIO,H,W)
    return mask,wp,centers,points

In [None]:
mask,wp,centers,points = get_mask_and_wp()

visualize_mask(mask, dim=(20,20), wp=wp, wp_class=[0,1] * (len(wp)//2), rad=6)

## Train dataset generation

In [None]:
if os.path.isdir(TRAIN_DATA_PATH):
    shutil.rmtree(TRAIN_DATA_PATH)
os.makedirs(TRAIN_DATA_PATH)

wp_dic = {}
i = 0
for sample in tqdm(range(N_SAMPLES_train)):
    mask,wp,centers,points = get_mask_and_wp()
    save_img(mask,sample,data_path=TRAIN_DATA_PATH)

    #save waypoints in the dict
    for index in range(len(wp)):
        wp_dic[i] = {'N_img':"img{}".format(sample), 'x_wp':wp[index][0], 'y_wp':wp[index][1], 'class': index%2}
        i +=1
    #visualize_mask(mask,wp=wp,rad=3,dim=(12,12))

#save the datafame
df = pd.DataFrame.from_dict(wp_dic, "index")
df.to_csv(TRAIN_DATA_PATH+'/waypoints.csv', index=False)

In [None]:
# visualize an example 

index = np.random.randint(N_SAMPLES_train)

mask = cv2.imread(TRAIN_DATA_PATH+f'/img{index}.png')/255
df = pd.read_csv(TRAIN_DATA_PATH+'/waypoints.csv')
points = df.loc[df['N_img'] == f'img{index}'].to_numpy()[:,1:].astype('uint32')
visualize_mask(mask, wp=points, dim=(10,10), rad=3)

##  Validation dataset generation

In [None]:
if os.path.isdir(VAL_DATA_PATH):
    shutil.rmtree(VAL_DATA_PATH)
os.mkdir(VAL_DATA_PATH)


wp_dic = {}
i = 0
for sample in tqdm(range(N_SAMPLES_val)):
    mask,wp,centers,points = get_mask_and_wp()
    save_img(mask,sample,data_path=VAL_DATA_PATH)

    #save waypoints in the dict
    for index in range(len(wp)):
        wp_dic[i] = {'N_img':"img{}".format(sample),'x_wp':wp[index][0],'y_wp':wp[index][1], 'class': index%2}
        i +=1

    #visualize_mask(mask,wp=wp,rad=3,dim=(12,12))

#save the datafame
df = pd.DataFrame.from_dict(wp_dic, "index")
df.to_csv(VAL_DATA_PATH+'/waypoints.csv', index=False)

In [None]:
# visualize an example 

index = np.random.randint(N_SAMPLES_val)

mask = cv2.imread(VAL_DATA_PATH+f'/img{index}.png')/255
df = pd.read_csv(VAL_DATA_PATH+'/waypoints.csv')
points = df.loc[df['N_img'] == f'img{index}'].to_numpy()[:,1:].astype('uint32')
visualize_mask(mask, wp=points, dim=(10,10), rad=3)

## Test dataset generation

In [None]:
if os.path.isdir(TEST_DATA_PATH):
    shutil.rmtree(TEST_DATA_PATH)
os.makedirs(TEST_DATA_PATH)

wp_dic = {}
i = 0
for sample in tqdm(range(N_SAMPLES_test)):
    mask,wp,centers,points = get_mask_and_wp()
    save_img(mask,sample,data_path=TEST_DATA_PATH)

    #save waypoints in the dict
    for index in range(len(wp)):
        wp_dic[i] = {'N_img':"img{}".format(sample),'x_wp':wp[index][0],'y_wp':wp[index][1], 'class': index%2}
        i +=1

    #visualize_mask(mask,wp=wp,rad=3,dim=(12,12))

#save the datafame
df = pd.DataFrame.from_dict(wp_dic, "index")
df.to_csv(TEST_DATA_PATH+'/waypoints.csv', index=False)

In [None]:
# visualize an example 

index = np.random.randint(N_SAMPLES_test)

mask = cv2.imread(TEST_DATA_PATH+f'/img{index}.png')/255
df = pd.read_csv(TEST_DATA_PATH+'/waypoints.csv')
points = df.loc[df['N_img'] == f'img{index}'].to_numpy()[:,1:].astype('uint32')
visualize_mask(mask, wp=points, dim=(10,10), rad=3)