# Decode Json Files

Json files contain base64 encoded images and the location of the damage and its class in coco format. To use this data for UNet or basic Encoder-Decoder image segmentation model we need to convert the json files into 2 separate folders. One folders will contain the image in jpeg format and the other folder will contain the mask in numpy format where
- 0 - background
- 1 - scratch
- 2 - dent
- 3 - damage

## Imports

In [1]:
import os
import cv2
import json
import base64
import skimage.draw
import numpy as np

## Convert Damaage type to Class Number

In [2]:
def damage_type_to_class(damage_type):
    damage = damage_type.lower()
    if damage == 'scratch':
        return 1
    elif damage == 'dent':
        return 2
    else:
        return 3

## base64 Decoder

In [3]:
def string_to_img(img_string, path):
    '''
    Takes the image string in byte format and stores it in the given path
    '''
    # write the string in a temp file
    with open('tmp', 'w') as op:
        op.write(img_string)
    # read the file as binary format
    with open('tmp', 'rb') as ip:
        img_bin = ip.read()
    # decode the binary and store the resultant data as jpeg
    with open(path, 'wb') as op:
        op.write(base64.decodestring(img_bin))

## Annotation to Mask Converter

In [4]:
def add_annotation_to_mask(annotation, mask, damage_class):
    '''
    Takes annotations which is a list of points and then converts that into a numpy array with pixel level classification
    i.e. a set of points are converted into polygon and the corresponding pixels are classified based on damage class
    '''
    # take the masks and convert them to polygon
    an = np.array(annotation)
    an = an.squeeze()
    polygon = skimage.draw.polygon(an[:,1], an[:,0], mask.shape)
    
    # create the actual mask
    mask[polygon[0], polygon[1]] = damage_class
    return mask

## Putting it all together

In [5]:
def process_json(file, source, image_dest, mask_dest):
    file_path = os.path.join(source, file)
    with open(file_path, 'r') as op:
        json_data = json.load(op)
    # extract image in base64 format
    img_str = json_data['imageData']

    # extract the image name
    img_name = file[:-5] + '.jpeg'#json_data['imagePath']

    # store the image
    image_path = os.path.join(image_dest, img_name)
    string_to_img(img_str, image_path)

    img = cv2.imread(image_path)
    img_shape = img.shape[0:2]

    # dummy mask to hold annotations
    mask = np.zeros(img_shape)
    #print(len(json_data['shapes']))
    for shape in json_data['shapes']:
        # extract the damage type
        damage_type = shape['label']

        # extract the annotations
        annotation = shape['points']
        
        # store the mask
        damage = damage_type_to_class(damage_type)
        #print(damage)
        #print(np.any(mask==2))
        mask = add_annotation_to_mask(annotation, mask, damage)
        #print(np.any(mask==3))

    mask_path = os.path.join(mask_dest, img_name)
    np.save(mask_path, mask)
    #cv2.imwrite(mask_path, mask)

In [6]:
def convert_json_to_model_data(source, image_dest, mask_dest):
    '''
    takes in json files as input which contains the image in byte64 format and annotations in list of point format
    and stores the images in jpeg format in image_dest
    and stores the mask with pixel level annotation in mask_dest
    '''
    # read the list of json files
    file_list = [f for f in os.listdir(source) if f.endswith('.json')]
    print('Processing {} files'.format(len(file_list)))
    done_files = 0
    for file in file_list:
        process_json(file, source, image_dest, mask_dest)
        done_files += 1
        if done_files % 20 == 0:
            print('Done processing {} files'.format(done_files))
    
    print('Finished Processing')

## Convert train images

In [7]:
train_source_dir = r'train'
train_img_dir = r'train_images'
train_mask_dir = r'train_pixel_masks'

In [8]:
!rm -rf {train_img_dir}
!rm -rf {train_mask_dir}
!mkdir {train_img_dir}
!mkdir {train_mask_dir}

In [9]:
convert_json_to_model_data(train_source_dir, train_img_dir, train_mask_dir)

Processing 321 files
1
1
1


  op.write(base64.decodestring(img_bin))


2
1
1
2
2
3
1
1
4
1
1
1
1
1
2
1
1
Done processing 20 files
1
1
1
2
1
1
1
1
4
1
3
1
1
1
1
1
1
1
11
2
Done processing 40 files
1
1
1
2
1
2
2
1
1
2
1
1
1
1
1
4
1
2
1
1
Done processing 60 files
1
1
1
1
1
6
2
1
1
1
1
1
1
2
1
1
1
2
3
2
Done processing 80 files
2
1
1
1
1
1
1
1
1
2
4
1
1
1
21
3
1
1
12
1
Done processing 100 files
1
1
1
2
1
1
5
1
7
1
1
1
1
1
3
1
1
1
1
1
Done processing 120 files
1
3
1
1
2
1
1
1
1
2
1
1
1
1
1
2
1
1
1
1
Done processing 140 files
4
7
1
2
1
1
8
1
1
6
1
1
1
2
1
1
1
1
1
1
Done processing 160 files
2
2
1
4
4
1
1
1
1
2
4
1
4
1
1
4
7
1
1
1
Done processing 180 files
1
2
1
1
1
1
1
1
1
1
1
1
9
1
1
3
1
1
1
1
Done processing 200 files
1
2
1
3
2
1
1
1
1
1
1
2
1
4
2
4
1
2
1
1
Done processing 220 files
3
1
1
2
1
2
1
4
1
1
6
1
1
1
1
1
1
1
2
1
Done processing 240 files
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
1
1
1
Done processing 260 files
6
2
7
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
3
1
Done processing 280 files
1
2
1
6
3
2
1
1
1
1
2
3
1
1
4
1
1
1
1
1
Done processing 300 files
1
1
5
5
1
1
6
1
1

In [10]:
!rm tmp

## Convert test images

In [7]:
test_source_dir = r'test'
test_img_dir = r'test_images'
test_mask_dir = r'test_pixel_masks'

In [8]:
!rm -rf {test_img_dir}
!rm -rf {test_mask_dir}
!mkdir {test_img_dir}
!mkdir {test_mask_dir}

In [9]:
convert_json_to_model_data(test_source_dir, test_img_dir, test_mask_dir)

Processing 81 files


  op.write(base64.decodestring(img_bin))


Done processing 20 files
Done processing 40 files
Done processing 60 files
Done processing 80 files
Finished Processing


In [10]:
!rm tmp

In [70]:
json_data

{'version': '3.16.7',
 'flags': {},
 'shapes': [{'label': 'Dent',
   'line_color': None,
   'fill_color': None,
   'points': [[681.5909090909091, 214.04545454545453],
    [635.0, 226.54545454545453],
    [608.8636363636364, 204.95454545454547],
    [554.3181818181819, 272.0],
    [547.5, 334.5],
    [500.9090909090909, 415.1818181818182],
    [480.4545454545455, 445.8636363636364],
    [514.5454545454545, 443.59090909090907],
    [542.9545454545455, 407.22727272727275],
    [669.0909090909091, 398.1363636363636]],
   'shape_type': 'polygon',
   'flags': {}}],
 'lineColor': [0, 255, 0, 128],
 'fillColor': [255, 0, 0, 128],
 'imagePath': 'Bing_0043.jpeg',
 'imageData': '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCALaA7YDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUE