# From Intello to our usage : dataset adaptation
This notebook only needs to be ran once. After this, the dataset is ready for our implementation.
This notebook must be placed next to the "data" folder and the "data" needs to contain only the following : 
* "images" folder with all the images
* test.json with the information of which images belong to the test dataset and their segmentation
* train.json with the information of which images belong to the train dataset and their segmentation
* val.json with the information of which images belong to the test dataset and their segmentation

### Imports

In [None]:
import json
import os
import PIL
from PIL import ImageDraw
import numpy as np
from pathlib import Path
import shutil

%matplotlib inline
from matplotlib import pyplot as plt
import cv2
#import rasterio
from wand.image import Image

### Convert .tiff to .png
We did not manage to get coloured images, so we're just using black and white images, by keeping the red channel only.

In [None]:
for file in os.listdir('data/images'):
    if file.endswith('.tif'):
        base_dir = 'data/images'
        with Image(filename=os.path.join(base_dir,file)) as img:
            with img.convert('png') as converted:
                dst = file.replace('.tif', '.png')
                converted.save(filename=os.path.join(base_dir, dst))
        #src = rasterio.open(os.path.join(base_dir, file))
        #array = src.read(1) # Get red channel only
        #img = Image.fromarray(array)
        #dst = file.replace('.tif', '.png')
        #img.save(os.path.join(base_dir, dst))
        os.remove(path=os.path.join(base_dir, file))

### Rewriting file directories

In [None]:
base_dir_data = os.path.join('data')
data_dirs = ['train.json', 'val.json', 'test.json']

for data_dir in data_dirs:
    
    # Opening Json file with data and storing it in array
    with open(os.path.join(base_dir_data, data_dir)) as json_file:
        data_file = json.load(json_file)

    # Modifying data
    for image_data in data_file['images']:
        image_data['file_name'] = image_data['file_name'].replace('datasets/INTELLO/solarPanels/v2/images_800x600/', '')
        image_data['file_name'] = image_data['file_name'].replace('/data', 'data')
        image_data['file_name'] = image_data['file_name'].replace('.tif', '.png')

    # Rewriting data back to file
    with open(os.path.join(base_dir_data, data_dir), 'w') as outfile:
        json.dump(data_file, outfile)

### From polygon points to mask
Our dataset is built different from the one in the tutorial we use for this project. The tutorial uses images and masks in image shape for the annotations. Ours is a list of points of a polygon for the annotations, so we fit ours to the method we're using through this function.

In [None]:
def pol_to_mask(img_width, img_height, pol_array):
    img = PIL.Image.new('L', (img_width, img_height), 0)
    PIL.ImageDraw.Draw(img).polygon(pol_array, outline=1, fill=1)
    mask = np.array(img)
    
    return mask

### Creating masks and exporting to masks directory
As there are mulitple masks per image, to make it easier for now, we will just take all the masks in an image and put them all together as if we only had one mask per image.
For coherence, we only keep images for which masks exist.

In [None]:
# Image parameters
width = 800
height = 600

# Create masks folder if non existent
Path('data/masks').mkdir(parents=True, exist_ok=True)

for data_dir in data_dirs:

    # Opening Json file with data and storing it in array
    with open(os.path.join(base_dir_data, data_dir)) as json_file:
        data_file = json.load(json_file)

    # Modifying data
    for annot_data in data_file['annotations']:
        # Create mask as a numpy array
        np_mask = pol_to_mask(width, height, annot_data['segmentation'][0])
        
        # Store mask
        mask_name = str(annot_data['image_id']) + '.png'
        mask_path = os.path.join(base_dir_data, 'masks', mask_name)
        
        # Check if we need to complete mask
        if os.path.exists(mask_path):
            # Import current mask
            cur_mask = PIL.Image.open(mask_path)
            # Convert image to numpy array
            cur_mask_array = np.asarray(cur_mask)
            # Merge both masks
            np_mask = np.logical_or(np_mask, cur_mask_array)
            
        # Create image from array
        im = PIL.Image.fromarray(np_mask)
        
        # Save image
        im.save(mask_path)

### Split into correct folders

In [None]:
for data_dir in data_dirs:
    
    # Choose folder names
    folder_name = ''
    folder_name_annot = ''
    
    if data_dir == 'train.json':
        folder_name = 'train'
        folder_name_annot = 'trainannot'
    elif data_dir == 'val.json':
        folder_name = 'val'
        folder_name_annot = 'valannot'
    else:
        folder_name = 'test'
        folder_name_annot = 'testannot'
        
        
    # Opening Json file with data and storing it in array
    with open(os.path.join(base_dir_data, data_dir)) as json_file:
        data_file = json.load(json_file)
    
    
    # Image and mask paths
    img_paths = []
    mask_paths = []
        
    for mask_data in data_file['annotations']:
        
        # Image paths
        image_name = str(mask_data['image_id']) + '.png'
        image_path = os.path.join('data/images', image_name)
        if image_path not in img_paths:
            img_paths.append(image_path)
        
        # Mask paths
        mask_name = str(mask_data['image_id']) + '.png'
        mask_path = os.path.join('data/masks', mask_name)
        if mask_path not in mask_paths:
            mask_paths.append(mask_path)
    
    
    # Create necessary folders if non existent
    folder_name_path = os.path.join(base_dir_data, folder_name)
    folder_name_annot_path = os.path.join(base_dir_data, folder_name_annot)
    
    Path(folder_name_path).mkdir(parents=True, exist_ok=True)
    Path(folder_name_annot_path).mkdir(parents=True, exist_ok=True)
        
    # Create folders and move images and masks to the corresponding folders
    for file_dir in img_paths:
        if file_dir.endswith('.png'): # double check
            shutil.move(file_dir, folder_name_path)
    
    for mask_dir in mask_paths:
        if mask_dir.endswith('.png'): # double check
            shutil.move(mask_dir, folder_name_annot_path)