# From Intello to our usage : dataset adaptation
This notebook only needs to be ran once. After this, the dataset is ready for our implementation.

### Imports

In [47]:
import json
import os
from PIL import Image, ImageDraw
import numpy as np

%matplotlib inline
from matplotlib import pyplot as plt

### Rewriting file directories

In [30]:
base_dir_data = os.path.join('data')
data_dirs = ['train.json', 'val.json', 'test.json']

for data_dir in data_dirs:
    
    # Opening Json file with data and storing it in array
    with open(os.path.join(base_dir_data, data_dir)) as json_file:
        data_file = json.load(json_file)

    # Modifying data
    for image_data in data_file['images']:
        image_data['file_name'] = image_data['file_name'].replace('datasets/INTELLO/solarPanels/v2/images_800x600/images/', '')

    # Rewriting data back to file
    with open(os.path.join(base_dir_data, data_dir), 'w') as outfile:
        json.dump(data_file, outfile)

### From polygon points to mask
Our dataset is built different from the one in the tutorial we use for this project. The tutorial uses images and masks in image shape for the annotations. Ours is a list of points of a polygon for the annotations, so we fit ours to the method we're using through this function.

In [53]:
def pol_to_mask(img_width, img_height, pol_array):
    img = Image.new('L', (img_width, img_height), 0)
    ImageDraw.Draw(img).polygon(pol_array, outline=1, fill=1)
    mask = np.array(img)
    
    return mask

### Creating masks and exporting to masks directory
As there are mulitple masks per image, to make it easier for now, we will just take all the masks in an image and put them all together as if we only had one mask.

In [55]:
# Image parameters
width = 800
height = 600

for data_dir in data_dirs:

    # Opening Json file with data and storing it in array
    with open(os.path.join(base_dir_data, data_dir)) as json_file:
        data_file = json.load(json_file)

    # Modifying data
    for annot_data in data_file['annotations']:
        # Create mask as a numpy array
        np_mask = pol_to_mask(width, height, annot_data['segmentation'][0])
        
        # Store mask
        mask_name = str(annot_data['image_id']) + '.png'
        mask_path = os.path.join(base_dir_data, 'masks', mask_name)
        
        # Check if we need to complete mask
        if os.path.exists(mask_path):
            # Import current mask
            cur_mask = Image.open(mask_path)
            # Convert image to numpy array
            cur_mask_array = np.asarray(cur_mask)
            # Merge both masks
            np_mask = np.logical_or(np_mask, cur_mask_array)
            
        # Create image from array
        im = Image.fromarray(np_mask)
        
        # Save image
        im.save(mask_path)
