### Manage Region JSON Data

In [1]:
import os
import json
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
# Write json to file
def WriteJSON(obj,filename):
    try:
        with open(filename, 'w') as outfile:
            obj_json = json.dumps(obj, sort_keys=True, indent=4,default=str)
            outfile.write(obj_json)
    except Exception as e:
        print(e)
        print('File not written.')

# Read and return json object from file. If none, return empty object.
def ReadJSON(filename):
    try: 
        with open(filename, 'r') as infile:
            obj = json.load(infile)
    except Exception as e: 
        obj = [] 
    return obj

### Initialize JSON

In [3]:
def InitJSON(filename):
    
    init_file = "../datasets/pantograph/init_region_data.json"
    obj = ReadJSON(init_file)
    
    WriteJSON(obj,filename)

In [4]:
sets = ['train','val']
folder = sets[2]

filename = "../datasets/pantograph/"+folder+"/region_data_2.json"

#InitJSON(filename)

### Update Local File Paths

In [4]:
'''
Relable image path to local dir
'''

def UpdatePaths(filename):
    
    obj = ReadJSON(filename)

    for i in obj['images']:

        path = os.getcwd().replace('/dev','')
        path = path+"/"+"/".join(i['path'].split("/")[5:])
        i['path'] = path
#         print(path)

    WriteJSON(obj,filename)

In [5]:
# Update filepaths for all 3
sets = ['train','test','val']

for folder in sets:
    filename = "../datasets/pantograph/"+folder+"/region_data.json"
    UpdatePaths(filename)

### Delete Images & Annotations 

In [7]:
# Delete images and annotations

def ClearJSON(filename):
    obj = ReadJSON(filename)

    # Clear all images
    obj['images'] = []
    len(obj['images'])

    # Clear all annotations
    obj['annotations'] = []
    len(obj['annotations'])
    
    WriteJSON(obj,filename)

In [5]:
sets = ['train','test','val']
folder = sets[0]

filename = "../datasets/pantograph/"+folder+"/region_data.json"

# ClearJSON(filename)

### Read & Write to JSON

In [35]:
# Read in original

sets = ['train','test','val']
folder = sets[0]
filename = "../datasets/pantograph/"+folder+"/region_data.json"
obj = ReadJSON(filename)

print(obj.keys())

len(obj)

dict_keys(['images', 'categories', 'annotations'])


3

In [38]:
# Write updated

filename = "../datasets/pantograph/"+folder+"/region_data_2.json"
WriteJSON(obj,filename)

In [65]:
obj['annotations'][0]['category_id'] = 1

### Manage Categories

In [4]:
obj['categories']

[{'color': '#ff0000',
  'id': 1,
  'keypoints': ['L1', 'L2', 'L3', 'R1', 'R2', 'R3'],
  'name': 'front_bar',
  'skeleton': [[0, 1], [1, 2], [2, 3], [3, 4], [4, 5]],
  'supercategory': ''},
 {'color': '#00ff00',
  'id': 2,
  'keypoints': ['L1', 'L2', 'L3', 'R1', 'R2', 'R3'],
  'name': 'middle_bar',
  'skeleton': [[0, 1], [1, 2], [2, 3], [3, 4], [4, 5]],
  'supercategory': ''},
 {'color': '#0000ff',
  'id': 3,
  'keypoints': ['L1', 'L2', 'L3', 'R1', 'R2', 'R3'],
  'name': 'rear_bar',
  'skeleton': [[0, 1], [1, 2], [2, 3], [3, 4], [4, 5]],
  'supercategory': ''}]

In [None]:
obj['categories'][0].keys()

In [None]:
'''
Corrections
'''

obj['categories'][0]['color'] = '#ff0000'
obj['categories'][1]['color'] = '#00ff00'
obj['categories'][2]['color'] = '#0000ff'

for i in range(0,len(obj['categories'])):
    del obj['categories'][i]['creator']
    del obj['categories'][i]['metadata']

In [None]:
'''
add keypoints
'''

for i in range(0,len(obj['categories'])):
    obj['categories'][i]['keypoints'] = ['L1','L2','L3','R1','R2','R3']
    obj['categories'][i]['skeleton'] = [[0,1],[1,2],[2,3],[3,4],[4,5]]

### Manage Images

In [36]:
len(obj['images'])

216

In [37]:
# duplicate
tmp_imgs = []
tmp_annos = []
for img in obj['images']:
    
    for anno in obj['annotations']:
        tmp2 = anno
        if anno['image_id'] == img['id']:
            #print(anno['image_id'])
            tmp2['id'] = len(obj['annotations'])
            tmp_annos.append(tmp2)

        
    tmp = img
    tmp['file_name'] = tmp['file_name'].split('.')[0]+'_2.jpg'
    tmp['id'] = len(obj['images'])
    print(tmp['file_name'],tmp['id'])
    tmp_imgs.append(tmp)
    
    
obj['images'] = obj['images'] + tmp_imgs
print('number of images:',len(obj['images']))

obj['annotations'] = obj['annotations'] + tmp_annos
print('number of annotations:',len(obj['annotations']))

padd_to_swin_frame_20647.jpg 216
padd_to_swin_frame_9509.jpg 216
padd_to_swin_frame_26216.jpg 216
padd_to_swin_frame_42923.jpg 216
padd_to_swin_frame_31785.jpg 216
padd_to_swin_frame_48492.jpg 216
padd_to_swin_frame_59630.jpg 216
padd_to_swin_frame_70768.jpg 216
padd_to_swin_frame_65199.jpg 216
padd_to_swin_frame_87475.jpg 216
padd_to_swin_frame_81906.jpg 216
padd_to_swin_frame_93044.jpg 216
padd_to_swin_frame_165441.jpg 216
padd_to_swin_frame_120889.jpg 216
padd_to_swin_frame_154303.jpg 216
padd_to_swin_frame_109751.jpg 216
padd_to_swin_frame_148734.jpg 216
padd_to_swin_frame_126458.jpg 216
padd_to_swin_frame_143165.jpg 216
padd_to_swin_frame_115320.jpg 216
padd_to_swin_frame_171010.jpg 216
padd_to_swin_frame_182148.jpg 216
padd_to_swin_frame_176579.jpg 216
padd_to_swin_frame_187717.jpg 216
padd_to_swin_frame_198855.jpg 216
padd_to_swin_frame_204424.jpg 216
padd_to_swin_frame_193286.jpg 216
padd_to_swin_frame_226700.jpg 216
padd_to_swin_frame_221131.jpg 216
padd_to_swin_frame_209993.j

In [47]:
for file in glob.glob('../datasets/pantograph/train/*.jpg'):
    img = Image.open(file)
    
    newfile = file.split('.jpg')[0]+'_2.jpg'
    img.save(newfile)
#     print(newfile)

In [6]:
obj['images'][154]

{'category_ids': [1, 2, 3],
 'file_name': 'padd_to_swin_frame_242768.jpg',
 'height': 1080,
 'id': 154,
 'num_annotations': 3,
 'path': '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_242768.jpg',
 'width': 1920}

In [None]:
for i in range(0,len(obj['images'])):
    del obj['images'][i]['annotated']
    del obj['images'][i]['annotating']
    del obj['images'][i]['dataset_id']
    del obj['images'][i]['events']
    del obj['images'][i]['is_modified']
    del obj['images'][i]['metadata']
    del obj['images'][i]['milliseconds']
    del obj['images'][i]['regenerate_thumbnail']

In [4]:
os.getcwd().replace('/dev','')

'/Users/jessedecker/projects/rail_segmentation'

In [30]:
obj['images'] = obj['images'][:154]
len(obj['images'])

154

In [31]:
obj['images'][-1]

{'category_ids': [1, 2, 3],
 'file_name': 'padd_to_swin_frame_242768.jpg',
 'height': 1080,
 'id': 153,
 'num_annotations': 3,
 'path': '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_242768.jpg',
 'width': 1920}

In [6]:
# Clear all images
obj['images'] = []
len(obj['images'])

0

### Manage Annotations

In [44]:
len(obj['annotations'])

561

In [43]:
colors = [[255, 0, 0],[0, 255, 0],[0, 0, 255]]
category_ids = [1,2,3]

for category_id in category_ids:

    mt_anno = obj['annotations'][0]

    mt_anno['bbox'] = []
    mt_anno['keypoints'] = []
    mt_anno['segmentation'] = []

    mt_anno['num_keypoints'] = 0
    
    mt_anno['category_id'] = category_id
    mt_anno['color'] = colors[category_id-1]
    mt_anno['id'] = len(obj['annotations'])+category_id-1
    mt_anno['image_id'] = 178

    print(mt_anno)
    obj['annotations'].append(mt_anno)

{'area': 5463.6864, 'bbox': [], 'category_id': 1, 'color': [255, 0, 0], 'id': 558, 'image_id': 178, 'iscrowd': 0, 'keypoints': [], 'num_keypoints': 0, 'segmentation': []}
{'area': 5463.6864, 'bbox': [], 'category_id': 2, 'color': [0, 255, 0], 'id': 560, 'image_id': 178, 'iscrowd': 0, 'keypoints': [], 'num_keypoints': 0, 'segmentation': []}
{'area': 5463.6864, 'bbox': [], 'category_id': 3, 'color': [0, 0, 255], 'id': 562, 'image_id': 178, 'iscrowd': 0, 'keypoints': [], 'num_keypoints': 0, 'segmentation': []}


In [10]:
list(map(lambda x:x['image_id']==154,obj['annotations']))

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,


In [None]:
for anno in obj['annotations']:
    if anno['id'] > 462:
        anno['id'] += 1

# set anno and id for 463
anno['id']

In [None]:
obj['annotations'][0].keys()

In [None]:
'''
Drop unneeded columns
'''

for i in range(0,len(obj['annotations'])):
    del obj['annotations'][i]['creator']
    del obj['annotations'][i]['dataset_id']
    del obj['annotations'][i]['isbbox']
    del obj['annotations'][i]['events']
    del obj['annotations'][i]['metadata']
    del obj['annotations'][i]['milliseconds']
    del obj['annotations'][i]['height']
    del obj['annotations'][i]['width']

In [None]:
'''
Add  empty values to existing
'''

for i in range(0,len(obj['annotations'])):
    obj['annotations'][i]['keypoints'] = []
    obj['annotations'][i]['num_keypoints'] = 0

In [76]:
# t = obj['annotations'][0:558]
# t[-1]
u = obj['annotations'][561:]
# u[0]

obj['annotations'] =t+u
len(obj['annotations'])

597

In [80]:
for i,anno in enumerate(obj['annotations']):
    if i > 557:
        print(i,anno['id'])
        anno['id'] = i
        print(i,anno['id'])
        print()
        

558 561
558 558

559 562
559 559

560 563
560 560

561 564
561 561

562 565
562 562

563 566
563 563

564 567
564 564

565 568
565 565

566 569
566 566

567 570
567 567

568 571
568 568

569 572
569 569

570 573
570 570

571 574
571 571

572 575
572 572

573 576
573 573

574 577
574 574

575 578
575 575

576 579
576 576

577 580
577 577

578 581
578 578

579 582
579 579

580 583
580 580

581 584
581 581

582 585
582 582

583 586
583 583

584 587
584 584

585 588
585 585

586 589
586 586

587 590
587 587

588 591
588 588

589 592
589 589

590 593
590 590

591 594
591 591

592 595
592 592

593 596
593 593

594 597
594 594

595 598
595 595

596 599
596 596



In [49]:
len(obj['annotations'])

570

In [70]:
for anno in obj['annotations']:
    if len(anno['keypoints']) < 18:
#     if anno['num_keypoints'] < 6:
        print(anno['id'],anno['image_id'])

562 178
562 178
562 178


In [8]:
# Clear all annotations

obj['annotations'] = []
len(obj['annotations'])

0

### Validate Dataset

1. Does the number of keypoints value match the actual number of keypoints - done
2. Are the keypoints within the mask
3. Is the number of annotations for each image correct - done, workng but not correct.
4. Can I open them with the regular process -yes

In [3]:
from shapely import geometry

In [24]:
filename = "../datasets/pantograph/"+folder+"/region_data.json"
WriteJSON(obj,filename)

In [25]:
sets = ['train','val']
folder = sets[0]

filename = "../datasets/pantograph/"+folder+"/region_data.json"
obj = ReadJSON(filename)
print(obj.keys())

dict_keys(['annotations', 'categories', 'images'])


In [26]:
print("Number of images:",len(obj['images']))
print("Number of annotations:",len(obj['annotations']))

Number of images: 233
Number of annotations: 693


In [27]:
'''
Update the number of keypoints to match only vis categories 1&2
'''

for i in range(0,len(obj['annotations'])):
    total_kps = 0
    num_kp = 0
    for j in range(2,len(obj['annotations'][i]['keypoints']),3):
        vis = obj['annotations'][i]['keypoints'][j]
        if vis == 0:
            pass
            #print(vis,obj['annotations'][i]['keypoints'][j-1],obj['annotations'][i]['image_id'])
        elif len(obj['annotations'][i]['segmentation']) <= 0:
            print('No mask',obj['annotations'][i]['image_id'])
        else:
            num_kp+=1

    print(obj['annotations'][i]['num_keypoints'],':',num_kp)

    # set number of keypoints        
#     obj['annotations'][i]['num_keypoints'] = num_kp
#     print(obj['annotations'][i]['num_keypoints'])

6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
4 : 4
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
4 : 4
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
5 : 5
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
5 : 5
3 : 3
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 6
6 : 

In [28]:
'''
Validate keypoints labeled 2 are within their masks.
Validate keypoints labeled 1 are outside their masks

1. Open each anno
2. Create single polygon representation from segmentation list
3. Loop through each keypoint in anno
4. Create point from each keypoint
5. Check the relationship between visibility value and whether it is in or out.
    if vis is 2 and outside:
        update psd or  value
    elif vis is 1 and inside:
        update psd or value
'''

sets = ['train','val']
folder = sets[0]

filename = "../datasets/pantograph/"+folder+"/region_data.json"
obj = ReadJSON(filename)

for i,anno in enumerate(obj['annotations'][:]):
    
    segmentations = anno['segmentation']
    keypoints = anno['keypoints']
    image_id = anno['image_id']
    anno_id = anno['id']
    category_id = anno['category_id']
    #print('opening annotation',anno_id,'image',image_id,category_id)
    
    img_path = obj['images'][image_id]['path']
    
    polys = []
    errors = False
    for segmentation in segmentations:
        
        try:
            # Create np array from list
            mask = np.array(segmentation)
            #print(mask.shape)

            # Reshape
            mask = mask.reshape(int(mask.shape[0]/2),2)

            # Create polygon
            polygon = geometry.Polygon(mask)        
            polys.append(polygon)

        except Exception as e:
            pass
            #print("Error in masks",e,image_id)
            
        from shapely.ops import cascaded_union
    
    try:
        polygon = cascaded_union(polys)
    except Exception as e:
        print("Error joining polygons",e,image_id)
            
    try:

        # Create np array
        kps = np.array(keypoints)

        # Reshape
        kps = kps.reshape(int(kps.shape[0]/3),3)

        
        # Check each point in polygon
        for j,kp in enumerate(range(kps.shape[0])):

            pt = list(kps[kp])[:-1]
            vis = int(list(kps[kp])[-1])
            point = geometry.Point(pt)

            # Default case for labeled visible
            if point.within(polygon) and vis == 2:
                pass
            # Default case for labeled not visible
            elif vis == 1:
                pass
            # Error kp is inside mask but labled as not visible
            elif point.within(polygon) and vis == 1:
                print('Error. Keypoint inside mask.',anno_id,'image',image_id,category_id)
            # Error kp is visible but outside mask - LOOK FOR THESE
            elif vis == 2:
                if errors == False:
                    print(img_path)
                    errors = True
                print("Error. Keypoint outside mask.",'Annotion ID:',anno_id,'Image ID:',image_id,'Category ID:',category_id,'Keypoint:',j)
                
            # Error edge case - should bes -1,-1
            elif point.within(polygon) == False and point.within(polygon) and vis == 0:
                print('Big Error. Keypoint inside mask.')
            elif vis == 0:
                pass
            else:
                print('Oops. The vis was not 0-1-2',vis) 
            
    except Exception as e:
        print("Error in keypoints",e,image_id)

    print()
























































































































































































































































































































































































































































































































































































































































































































In [29]:
'''
Validate each image appears once.
'''

sets = ['train','val']
folder = sets[0]

filename = "../datasets/pantograph/"+folder+"/region_data.json"
obj = ReadJSON(filename)

# Validate each image appears once.
paths = [img['path'] for img in obj['images']]
print(len(paths))    
print(len(list(set(paths))))

233
233


In [19]:
'''
Validate each image has correct number of annotations
'''

for img in obj['images']:
    
    count = 0
    for anno in obj['annotations']:
        if img['id'] == anno['image_id']:
            count+=1
            
    if count >3:
        print('Too many annotations',img['id'],count)
    if count == 0:
        print('Not enough annotations',img['id'],count) 

In [298]:
'''
Check num annos is correct.
These are correct for now. the assignment of classes is based on boxes, but it should be based on mask or keypoint
'''

for img in obj['images']:
    if len(img['category_ids']) != img['num_annotations']:
        print(img['id'])

### Convert COCO format to model format - UNUSED

In [38]:
# Read in original

sets = ['train','test','val']
folder = sets[2]
filename = "../datasets/pantograph/"+folder+"/via_region_data.json"
obj = ReadJSON(filename)

print(obj.keys())

len(obj)

dict_keys(['padd_to_swin_frame_104182.jpg', 'padd_to_swin_frame_215562.jpg', 'padd_to_swin_frame_237838.jpg', 'padd_to_swin_frame_37354.jpg', 'swin_to_padd_frame_130516.jpg', 'swin_to_padd_frame_220500.jpg', 'swin_to_padd_frame_242996.jpg', 'swin_to_padd_frame_96772.jpg'])


8

In [35]:


collector = {}
for i in obj['images']:
    instance = {
        'fileref':"",
        'size':-1,
        'filename':i['file_name'],
        'base64_img_data':"",
        'file_attributes':{},
        'regions' : []
    }
    
    # Find each annotation
    for j in obj['annotations']:
        if i['id'] == j['image_id']:
#             print('match')
            for mask in j['segmentation']:
                
                X = []
                Y =  []
                for k in range(0,len(mask)):
                    if k%2 == 0:
                        X.append(mask[k])
                    else:
                        Y.append(mask[k])
                
                tmp = {}
                tmp['shape_attributes'] = {
                    'name':'polygon',
                    'all_points_x':X,
                    'all_points_y':Y
                }
                tmp['region_attributes'] = {}
                instance['regions'].append(tmp)
    
    instance['size'] = len(X)*len(Y)
    collector[i['file_name']] = instance
#     collector.append(instance)
#     print(i['file_name'])
# print(X)
# print()
# print(Y)

# for c in collector:
#     print(c)