# Imports

In [None]:
import os
import glob
import json
import numpy as np
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import imgaug.augmenters as iaa
import tensorflow as tf
import random
import scipy


# Creating the Directory for the Dataset on the Kaggle Output

In [None]:
t_pth = '/kaggle/input/hubmap-hacking-the-human-vasculature/train/'
annt = '/kaggle/input/hubmap-hacking-the-human-vasculature/polygons.jsonl'
path_train = Path('train_masks')
path_train.mkdir(exist_ok=True, parents=True)
pathtrain = Path('train_images')
pathtrain.mkdir(exist_ok=True, parents=True)

# Data Manipulation/Creating the Masks

In [None]:
#Acquiring the Annotations
annotations = {}
with open(annt, 'r') as f:
    for line in f:
        annotation = json.loads(line)
        image_id = annotation['id']
        image_annotations = annotation['annotations']
        annotations[image_id] = image_annotations

In [None]:
#Placing the annotations on an empty mask with the corresponding ID
def convert_to_mask(annotations):
    image_dimensions = (512, 512)

    mask = np.zeros(image_dimensions, dtype=np.uint8)
    for annotation in annotations:
        coordinates = np.array(annotation['coordinates'])
        coordinates = coordinates.reshape(-1, 1, 2)
        cv2.fillPoly(mask, [coordinates], 1)

    return mask

In [None]:
#Acquring the ids
keys = []
for i in annotations.keys():
    keys.append(i)

In [None]:
#Acquring the paths for each image
path = []
for i in keys: 
    z = t_pth + i + ".tif" 
    path.append(z)

In [None]:
#Augmentations for the Images and Masks
seq = iaa.Sequential([
                iaa.ChannelShuffle(0.35),
                iaa.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}),
                iaa.Affine(rotate=(-180, 180)),
                iaa.Affine(shear=(-16, 16)),
                iaa.Fliplr(0.5),
                iaa.GaussianBlur(sigma=(0, 3.0))
            ])

# Creating the Training Dataset

In [None]:
df_data2=[]
q = 0 
#Number of Training Images
for y in tqdm(keys[0:1388]): 

    annotation2 = annotations.get(y)
    prob = random.uniform(0, 1)
    #Only 50% of the Images are to have an Augmentation
    if prob>0.5:
        z = t_pth + y + ".tif" 
        mask2 = convert_to_mask(annotation2)
        img = Image.open(z)
        mask2 = Image.fromarray(mask2)
        #Resize if memory constraints
        #img = img.resize((128,128))
        #mask2 = mask2.resize((128,128))
        
        img = np.array(img)
        mask2 = np.array(mask2)
        input_img = np.expand_dims(img, axis=0)
        input_mask = np.expand_dims(mask2, axis = 0)
        input_mask = np.expand_dims(input_mask, axis =3)
    
    
    
        img_aug, mask_aug=seq(images = input_img, segmentation_maps=input_mask)
        img_aug = np.squeeze(img_aug)
        mask_aug = np.squeeze(mask_aug)
        img_aug = Image.fromarray(img_aug)
        mask_aug = Image.fromarray(mask_aug)
        #Creating path
        patha = pathtrain/f"{y}.png"
        pathc = path_train/f"{y}.png"
        #saving Path
        mask_aug.save(pathc)
        img_aug.save(patha)
#         mask2.save(pathc)
#         img.save(patha)
        
        
    else: 
        
        z = t_pth + y + ".tif" 
        mask2 = convert_to_mask(annotation2)
        img = Image.open(z)
        mask2 = Image.fromarray(mask2)
        #img = img.resize((128,128))
        #mask2 = mask2.resize((128,128))
        
        patha = pathtrain/f"{y}.png"
        pathc = path_train/f"{y}.png"
        mask2.save(pathc)
        img.save(patha)

# Repeat Process for Validation Images/ Excluding Augmentations

In [None]:
valid_path = Path('valid_masks')
valid_path.mkdir(exist_ok=True, parents=True)
validpath = Path('valid_images')
validpath.mkdir(exist_ok=True, parents=True)

In [None]:
for y in tqdm(keys[1388:1633]): 

    annotation2 = annotations.get(y)
    
    z = t_pth + y + ".tif" 
    img = Image.open(z)
    mask2 = convert_to_mask(annotation2)
    mask2 = Image.fromarray(mask2)
    #img = img.resize((128,128))
    #mask2 = mask2.resize((128,128))
    patha = validpath/f"{y}.png"
    pathc = valid_path/f"{y}.png"
    mask2.save(pathc)
    img.save(patha)