In [3]:
import glob
import random_cropping
import pandas as pd
from PIL import Image
import random
from math import floor, ceil

In [4]:
desired_crop_size = 1024

DATA_PATH = "C:/Users/clieshou/Documents/Sogeti/The Ocean Cleanup/Plasticdebris_data/"
image_paths = glob.glob(DATA_PATH + 'images/shipcam/*')
OUTPUT_PATH = "shipcam_emptycrops/images"
label_file_name = "labels_split.csv"

In [5]:
df = pd.read_csv(os.path.join(DATA_PATH, label_file_name), delimiter=';')
df['filename'] = df['filename'].str.lower()

In [64]:
def define_center_sampling_region(image_width, image_height,desired_crop_size):
    """
    Steps:
    Determines from what region the center of the crop can be randomly selected.
    It does this by compensating desired crop size for the size of the bounding box, to avoid it not being fully included.
    It compensates for the fact that bounding boxes may be close to the edge of the image, didn't get to it yet
    """

    #we want to select the center of the crop randomly, but have to make sure that the entire bb falls within the crop
    center_sampling_region_xmin = desired_crop_size/2 
    center_sampling_region_ymin = desired_crop_size/2 
    center_sampling_region_xmax = image_width - desired_crop_size/2 
    center_sampling_region_ymax = image_height - desired_crop_size/2 

    #randomly getting the center within the allowed frame, using seed to get the same each time for the same bounding box. Not sure if there are any reasons not to do this
    # random.seed()
    crop_xcenter = random.randint(floor(center_sampling_region_xmin), ceil(center_sampling_region_xmax))
    # random.seed(bb_ymin)
    crop_ycenter = random.randint(floor(center_sampling_region_ymin), ceil(center_sampling_region_ymax))

    #make sure the center fall within the acceptable frame, where the entire crop will fall within the image
    if crop_xcenter < desired_crop_size/2:
        crop_xcenter = desired_crop_size/2
    if crop_xcenter > (image_width - desired_crop_size/2):
        crop_xcenter = (image_width - desired_crop_size/2)
    if crop_ycenter < desired_crop_size/2:
        crop_ycenter = desired_crop_size/2
    if crop_ycenter > (image_height - desired_crop_size/2):
        crop_ycenter = (image_height - desired_crop_size/2)

    return crop_xcenter, crop_ycenter

In [65]:
def generate_crop_per_bb(image_path, bb_df):
    """
    This functions aim is to obtain crops of a specified size from images of any size. 
    It does this by first checking whether the bounding box of interest is larger or smaller than desired.
    If larger, it downsamples the entire image first.
    Then, we determine from what region the center of the crop can be sampled such that the bb will always fully fall within the image.

    input:
    - i: index of the object of interest for the current image
    - image: the path to the image
    - df_image: subset of the total df, only contains rows of this image
    - downsample_marging: how much smaller should the bounding box be than the image, 1 is the same, 2 is half
    
    returns:
    - crop: cropped image
    - crop_coordinates: xmin, ymin, xmax and ymax of the crop within the original image
    """

    image_to_crop = Image.open(image_path)


    #initialize variables for the required data
    # bb_data = df_image.iloc[i, :]
    # bb_xmin, bb_ymin, bb_xmax, bb_ymax = bb_data['xmin'], bb_data['ymin'], bb_data['xmax'], bb_data['ymax']


In [70]:
desired_crop_size = 1024
df_image = df[df['filename'] == image_path.split("/")[-1]]

In [16]:
bb_df

filename    extra_g0027636.jpg
width                     4000
height                    3000
class                     hard
xmin                      3301
ymin                      1697
xmax                      3329
ymax                      1720
type                   Shipcam
Name: 0, dtype: object

In [14]:
imagename_list = df.filename.unique().tolist()

for image_name in imagename_list:
    image_df = df[df['filename'] == image_name].reset_index(drop=True)
    for bb_id in range(len(image_df)):
        bb_df = image_df.loc[bb_id, :]

    print(len(image_df))

2
1
1
1
5
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
3
3
4
19
1
1
2
1
1
1
1
1
1
1
1
3
1
1
2
1
1
1
1
1
1
1
1
1
1
3
2
2
1
1
2
5
6
3
2
5
3
6
5
4
4
4
3
4
3
1
2
2
2
1
4
1
1
7
4
1
1
2
1
2
2
1
1
2
2
4
4
2
3
3
2
3
3
3
1
1
1
1
1
1
8
6
6
1
3
3
2
2
3
3
2
2
2
1
4
3
4
4
4
2
2
1
4
2
2
1
4
2
2
1
2
1
1
4
2
2
3
1
3
2
6
23
2
1
1
2
1
1
1
2
3
1
5
1
4
4
6
4
6
3
6
7
8
7
4
4
2
6
2
1
1
1
2
1
1
1
3
4
3
4
1
2
1
1
1
3
2
1
1
1
1
2
1
2
1
3
1
3
2
5
2
1
2
1
2
4
1
2
1
1
1
1
1
1
1
2
1
1
1
1
2
1
1
1
2
1
1
2
2
2
2
3
1
3
1
1
4
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
2
3
1
1
1
3
1
1
2
1
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
3
1
1
2
1
1
3
1
1
2
2
1
1
1
2
2
1
1
1
1
1
1
1
3
1
1
1
2
1
1
1
1
2
1
3
1
2
2
1
2
2
4
4
5
1
3
1
2
1
2
1
2
1
2
1
1
1
1
2
2
4
5
4
1
1
1
1
1
1
1
1
1
1
4
1
2
1
1
2
3
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
6
2
3
11
1
1
1
2
1
3
1
2
1
1
1
1
1
1
2
4
1
1
1
2
16
1
2
1
6
1
1
4
6
3
1
38
1
3
4
8
18
1
2
1
1
1
1
1


In [9]:
for i in range(len(df)):
    
    image_name = df.loc[i, 'filename']

    
    

    print(image_name)

_0038_bl.jpg
2018.11.21.100m_dji_0038_bl.jpg
2018.11.21.100m_dji_0043_bl.jpg
2018.11.21.100m_dji_0046_tl.jpg
2018.11.21.100m_dji_0046_tl.jpg
2018.11.21.100m_dji_0046_tl.jpg
2018.11.21.100m_dji_0048_tr.jpg
2018.11.21.100m_dji_0048_tr.jpg
2018.11.21.100m_dji_0049_bl.jpg
2018.11.21.100m_dji_0049_bl.jpg
2018.11.21.100m_dji_0049_bl.jpg
2018.11.21.100m_dji_0049_bl.jpg
2018.11.21.100m_dji_0049_bl.jpg
2018.11.21.100m_dji_0050_bl.jpg
2018.11.21.100m_dji_0050_bl.jpg
2018.11.21.100m_dji_0050_tl.jpg
2018.11.21.100m_dji_0050_tr.jpg
2018.11.21.100m_dji_0050_tr.jpg
2018.11.21.100m_dji_0051_bl.jpg
2018.11.21.100m_dji_0051_tl.jpg
2018.11.21.100m_dji_0051_tl.jpg
2018.11.21.100m_dji_0051_tr.jpg
2018.11.21.100m_dji_0051_tr.jpg
2018.11.21.100m_dji_0051_tr.jpg
2018.11.21.100m_dji_0051_tr.jpg
2018.11.21.100m_dji_0052_bl.jpg
2018.11.21.100m_dji_0052_br.jpg
2018.11.21.100m_dji_0052_br.jpg
2018.11.21.100m_dji_0052_tr.jpg
2018.11.21.100m_dji_0053_bl.jpg
2018.11.21.100m_dji_0054_bl.jpg
2018.11.21.100m_dji_0054_br

In [73]:
crops_created = 0
while crops_created < 1000:
    
    random_select = random.randint(0, len(image_paths))
    image_path = image_paths[random_select]
    image_name = image_path.split("/")[-1].split("\\")[1]

    crop = generate_crop_per_bb(image_path, df_image, desired_crop_size)

    df_image = df[df['filename'] == image_name]

    image_path_crop = os.path.join(OUTPUT_PATH, image_name.split('.')[0] + '_crop.jpg')

    print(image_path_crop)
    crop.save(image_path_crop)

    crops_created += 1

shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg
shipcam_emptycrops/images\dsc00968_crop.jpg


KeyboardInterrupt: 

In [None]:
crop

In [11]:
image_path = image_paths[0]