## Generate Annotations
---
Create a COCO dataset from labeled images.

**Tasks**
1. 

**Assumptions**
1. Image sets are located in datatsets/train-test-val/annotations 
2. 26 annotations for each image with image name at root
3.  

In [1]:
import os
import json
import glob
from PIL import Image
import time
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Write json to file
def WriteJSON(obj,filename):
    try:
        with open(filename, 'w') as outfile:
            obj_json = json.dumps(obj, sort_keys=True, indent=4,default=str)
            outfile.write(obj_json)
    except Exception as e:
        print(e)
        print('File not written.')

# Read and return json object from file. If none, return empty object.
def ReadJSON(filename):
    try: 
        with open(filename, 'r') as infile:
            obj = json.load(infile)
    except Exception as e: 
        obj = [] 
    return obj

In [3]:
'''
Get all image paths in image folder
'''

# Get all paths in root folder
def GetSavedImageRoot(img_folder):
    return [img for img in glob.glob(img_folder+'/*.jpg')]

# Get all paths in train/test/val
def GetSavedImagePaths(img_folder):
    img_paths = []
    for i,folder in enumerate(['train','test','val']):
        img_paths += [img for img in glob.glob(img_folder+'/'+folder+'/*.jpg')]
    return img_paths

# Get names without paths for all in train/test/val
def GetSavedFrameNames(img_folder):
    paths = GetSavedImagePaths(img_folder)
    tmp = []
    for path in paths:
        if path.find('/swin_to_padd_'):
            tmp.append(path[path.find("/s")+1:])
        elif path.find('/padd_to_swin_'):
            tmp.append(path[path.find("/p")+1:])
    return tmp 

# Get all paths in root folder
def GetAnnotatedImages(img_folder):
    return [img[:-4].replace("annotations/","")+'.jpg' for img in glob.glob(img_folder+'/*.psd')]


# Get all paths in train/test/val
def GetOrigImagePaths(img_folder,dataset):
    img_paths = [img for img in glob.glob(img_folder+'/'+dataset+'/*.jpg')]
    return img_paths

In [4]:
'''
Boxes

Start at top-left (0,0) and find top-left corner of box.
Then find width/height of detection

Args:
    img: rbg image
    color: normalized rgb list 
    
Returns:
    "bbox": [x,y,width,height]
'''

def FindBBOX(img,color):
    
    # Init first/last - using -1 as catch elsewhere. (0,0) is a valid selection.
    first = (-1,-1)
    last = img.size
    
    # Find top left pixel
    for x in range(img.width):
        for y in range(img.height):
            r,g,b = img.getpixel((x,y))
            if (r,g,b) == color and first == (-1,-1):
                first = (x,y)
                break
    
    # Find bottom right pixel
    for x in range(img.width-1,0,-1):
        for y in range(img.height-1,0,-1):
            r,g,b = img.getpixel((x, y))
            if (r,g,b) == color and last == img.size:
                last = (x,y)
                break

    # Calculate height/width
    bbox_width = last[0]-first[0]
    bbox_height = last[1]-first[1]

    # Create return obj
    bbox = [first[0],first[1],bbox_width,bbox_height]
    
    return bbox


# Test FindBBOX
# t = 0

# filename = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/val/annotations/swin_to_padd_frame_220500_000'+str(t)+'s_0007_bbox.bmp'
# print(filename)
# img = Image.open(filename)
# # img.show()
# rgb_im = img.convert('RGB')

# colors = [(255,0,0),(0,255,0),(0,0,255)]
# color = colors[t]

# start_time = time.time()
# bbox = FindBBOX(rgb_im,color)
# print("--- %s seconds ---" % (time.time() - start_time))

# print(bbox)

In [5]:
'''
Keypoints

Args:
    img: rbg image
    color: normalized rgb list 
    
Returns:
    "keypoints": [x1,y1,v1,...,xk,yk,vk],
'''

def FindKeypoint(img,color):
    
    # Init point and color=white to indicate viz status not labeled
    point = (-1,-1)
    pnt_color = (255,255,255)
        
    # Find single pixel
    for x in range(img.width):
        for y in range(img.height):
            r,g,b = img.getpixel((x,y))
            # Check for cat color
            if (r,g,b) == color:
                point = (x,y)
                pnt_color = (r,g,b)
                break
            # Check for black indicating vis status labeled-not visible
            elif (r,g,b) == (0,0,0):
                point = (x,y)
                pnt_color = (r,g,b)
                break

    return point,pnt_color


# Test FindKeypoints
# t = 0
# # swin_to_padd_frame_220500_0002s_0004_L2
# filename = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/val/annotations/swin_to_padd_frame_220500_000'+str(t)+'s_0004_L2.bmp'
# print(filename)
# img = Image.open(filename)
# rgb_im = img.convert('RGB')

# colors = [(255,0,0),(0,255,0),(0,0,255)]
# color = colors[t]

# start_time = time.time()
# point,color = FindKeypoint(rgb_im,color)
# print("--- %s seconds ---" % (time.time() - start_time))

# print(point,color)

In [6]:
'''
Segmentation
All masks for single detection

Args:
    img: rbg image
    color: normalized rgb list 
    
Returns:
    "segmentation": RLE or [[x,y,x,y],]
'''

def FindMasks(img,color):
    
    # init list to collect masks
    masks = []
    
    # Get list of all pixels(x,y) that match a cat color
    pixels = FindMaskPixels(img,color)

    # Get first mask
    mask,curr_point = GetSingleMask(pixels)
    masks.append(mask)
    
    # If curr_point != last point, there are more masks
    while curr_point < len(pixels)-1:
        mask,endMask = GetSingleMask(pixels[curr_point:])
        masks.append(mask)
        curr_point += endMask
    
    return masks



# 2 of 3
# Get list of matching pixels(x,y) that match a color 
def FindMaskPixels(img,color):
    pixels = []
    for x in range(img.width):
        for y in range(img.height):
            r,g,b = img.getpixel((x,y))
            if (r,g,b) == color:
                pixels.append((x,y))
    return pixels


# 3 of 3
def GetSingleMask(pixels):

    mask = []
    curr_point=0
    for i in range(1,len(pixels)+1):
        
        # Init curr_point to last position
        curr_point = len(pixels)
        
        # Assume first pixel starts mask
        if i == 1:
            mask.append(pixels[i-1])
        
        # Get list of contiguous pixels
        # Don't check last
        if i < len(pixels):
            
            # Vertically(top to bottom) check each pixel and the one next to it to see if x or y is the same or 1 off. Looking for gaps of 1+ pixels. 
            if pixels[i-1][0] == pixels[i][0] or pixels[i-1][0]+1 == pixels[i][0] or pixels[i-1][1] == pixels[i][1] or pixels[i-1][1]+1 == pixels[i][1]:
                mask.append(pixels[i])
            # There's more than one mask
            else:
                # return curr position
                curr_point = i
                break
    
    # Prep mask
    mask = CleanMask(mask)
    return mask,curr_point

# Working. Reduce # pixels and format for fill/polygon
def CleanMask(mask):
    
    mask = sorted(mask, key=lambda x: x[0])
#     print(mask[:10])
    
    # Find exterior points only. Starting with x on left
    rows = [i[0] for i in mask]
    rows = set(rows)
    rows = list(sorted(rows))
#     print('rows',rows)
    
    # Find min/max for each x
    edges = []
    for row in rows:
        tmp = []
        for point in mask:
            if point[0] == row:
                tmp.append(point[1])
        edges.append((row,min(tmp)))
        edges.append((row,max(tmp)))
    mask = edges
    
    # Reverse 2nd half of list to get back to start
    tmp = []
    for i,point in enumerate(mask):
        if i != 0 and i%2 == 0:
            tmp.insert(int(len(tmp)/2),point)
        else:
            tmp.insert(int(len(tmp)/2)+1,point)
    mask = tmp
    
    # Flatten tuples 
    tmp = []
    for i in range(0,len(mask)):
        tmp.append(mask[i][0])
        tmp.append(mask[i][1])
    mask = tmp
        
    return mask



# Test FindMask
# t = 2
# # 'swin_to_padd_frame_220500_0000s_0006_mask'
# filename = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/val/annotations/swin_to_padd_frame_220500_000'+str(t)+'s_0006_mask.bmp'
# print(filename)
# img = Image.open(filename)

# colors = [(255,0,0),(0,255,0),(0,0,255)]
# color = colors[t]

# start_time = time.time()
# masks = FindMasks(img,color)
# print("--- %s seconds ---" % (time.time() - start_time))

# print(len(masks))
# for mask in masks:
#     print(len(mask))
# #     print(mask)

In [7]:
'''
Write JSON data from images

Loop images inside train/test/val

For each image
    0-3 detections
        1-3 bbox
        1+ masks
        0-18 keypoints
    For each detection
        1 bbox 
        1+ masks
        0-6 keypoints
        
Write image data once
Write annotation data each loop
'''

def GenerateAnnotations(paths,ANNOTATION_FILE,verbose=False):

    for path in paths:
        print()
        print('Annotating image:',path)

        # Set paths for labeled images 
        # Set anno folder
        annotations = "/".join(path.split('/')[:-1])+'/annotations/'

        # Get image name
        img_name = path.split('/')[-1]

        # Set base for anno images
        base = annotations+img_name[:-4]

        # Open region JSON
        obj = ReadJSON(ANNOTATION_FILE)

        # Check if exists
        imgExists = False
        # Get all paths
        for i,imgName in enumerate(obj['images']):
            if img_name == imgName['file_name']:
                imgExists = True
                img_id = obj['images'][i]['id']
                break
        # ID for new is += 1
        if imgExists == False:
            img_id = len(obj['images'])

        # red=front, green=middle, blue=rear
        colors = [(255,0,0),(0,255,0),(0,0,255)]

        # Init collector
        cat_ids = []

        # Loop each class and image
        # Groups = front_bar-middle_bar-rear_bar = R-G-B
        groups = ['_0000s','_0001s','_0002s'] 
        layers = ['_0007_bbox','_0006_mask','_0005_L1','_0004_L2','_0003_L3','_0002_R3','_0001_R2','_0000_R1']
        for i,group in enumerate(groups):
            # Init collectors for image
            bbox = []
            masks = []
            keypoints = []
            for j,layer in enumerate(layers):

                # Set tmp filename for img
                filename = base+group+layer+'.bmp'
    #             print(filename)

                # Open img as rgb image
                img = Image.open(filename)

                # Colors align to groups
                color = colors[i]
        #         print(color)

        #         # Manage boxes
                if j == 0:
                    bbox = FindBBOX(img,color)                
                    # Min threshold for detection - set class
                    if bbox[0] != -1:
                        cat_id = i+1
                        cat_ids.append(cat_id)
                    else:
                        cat_id = -1
                # Manage masks
                elif j == 1:
                    masks = FindMasks(img,color)
                # Manage keypoints
                elif j >= 2 and j <= 7:
                    # Get point and type
                    point,pnt_color = FindKeypoint(img,color)

                    # Append position to keypoints
                    keypoints.append(point[0])
                    keypoints.append(point[1])

                    # Append point viz - CHANGE - 0 is not labeled, i.e. not in picture, 
                    # 1 is labeled not visible
                    # 2 is labeled visible
                    if color == pnt_color:
                        keypoints.append(2)
                    elif pnt_color == (0,0,0):
                        keypoints.append(1)
                    else:
                        keypoints.append(0)

            # All at detection level
            print('bbox',color,bbox)
            print(len(masks),'masks')
            print(len(keypoints),'keypoints')

            
            # Write annotations to file
            # Only write if there's a detection
            if cat_id == -1:
                print("No detection to write")
                pass
            else:
                # Open region JSON
                obj = ReadJSON(ANNOTATION_FILE)
                if imgExists == True:
                    # Update anno data
                    for anno in obj['annotations']:
                        # Get curr anno
#                         print('annoImageID:',anno['image_id'])
#                         print('catID:',cat_id)
#                         print('annoCatID:',anno['category_id'])
                        if img_id == anno['image_id'] and cat_id == anno['category_id']:
                            anno_data = {
                                'color':colors[cat_id-1],
                                "segmentation": masks,
                                "num_keypoints": len(keypoints)/3,
                                "area": 5463.6864,
                                "iscrowd": 0,
                                "keypoints": keypoints,
                                "image_id": img_id,
                                "bbox": bbox,
                                "category_id": cat_id,
                                "id": anno['id']
                            }
                            print("Updating annotation data for existing image")
#                             print('AnnoID',anno['id'])
                            obj['annotations'][anno['id']] = anno_data
                            WriteJSON(obj,ANNOTATION_FILE)
                            break
                else:
                    # Write new anno data
                    anno_data = {
                        'color':colors[cat_id-1],
                        "segmentation": masks,
                        "num_keypoints": len(keypoints)/3,
                        "area": 5463.6864,
                        "iscrowd": 0,
                        "keypoints": keypoints,
                        "image_id": len(obj['images']),
                        "bbox": bbox,
                        "category_id": cat_id,
                        "id": len(obj['annotations'])
                    }
                    print("Writing annotation data for new image")
#                     print('AnnoID',anno['id'])
                    obj['annotations'].append(anno_data)
                    WriteJSON(obj,ANNOTATION_FILE)

        # Open region JSON
        obj = ReadJSON(ANNOTATION_FILE)

        # Write image data
        if imgExists == True:
            print("Updating image data", img_id)

            # update entry - name and id don't change. path/size/annos can all change
            img_data = {
                'category_ids':cat_ids, # Find from detections
                'file_name':img_name,
                'height':img.size[1],
                'id':img_id, # same as last one
                'num_annotations':len(cat_ids), # calc
                'path':path,
                'width':img.size[0]
            }
            obj['images'][img_id] = img_data
            WriteJSON(obj,ANNOTATION_FILE)

        else:
            print('Writing new image data')

            # Write new img data
            img_data = {
                'category_ids':cat_ids, # Find from detections
                'file_name':img_name,
                'height':img.size[1],
                'id':img_id, # increment from last one
                'num_annotations':len(cat_ids), # calc
                'path':path,
                'width':img.size[0]
            }
            obj['images'].append(img_data)
            WriteJSON(obj,ANNOTATION_FILE)    

### Lottery

Find images that have been annotated to generate data

In [63]:
'''
Check current number saved images 
'''

folder = 'train'

# Set path to root directory for saving images
PATH_TO_IMG_FOLDER = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph'

# img_folder = '/home/jupyter/GCP_Test/datasets/pantograph/train/annotations'
img_folder = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/'+folder+'/annotations'



# Get list of image paths in anno['images']
saved = GetOrigImagePaths(PATH_TO_IMG_FOLDER,folder)
print('Total saved images:',len(saved))


# img_paths = GetSavedImagePaths(PATH_TO_IMG_FOLDER)
annotated = GetAnnotatedImages(img_folder)
print('Current annotated images:',len(annotated))



imgs = [ i for i in saved if i not in annotated]
print('Remaining images:',len(imgs))
imgs.sort(reverse=True)
if len(imgs) > 0:
    print('\nNext image:',imgs[-1])
else:
    print("All images are labeled")

Total saved images: 402
Current annotated images: 232
Remaining images: 170

Next image: /Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_107144.jpg


In [23]:
# ! export alias psd="open -a ~/Applications/Adobe Photoshop 2020/Adobe Photoshop 2020.app"

# ! ../../../../Applications/Adobe Photoshop 2020/Adobe Photoshop 2020.app

! psd imgs[0]

/bin/sh: psd: command not found


### Generate Annotations

In [54]:
# Redo specific image
# paths = [img_paths[-3]]
paths = ["/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/swin_to_padd_frame_67368.jpg"]
paths

['/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/swin_to_padd_frame_67368.jpg']

In [61]:
'''
Check current number saved images in train/val/test
'''

folder = 'train'

# Set path to root directory for saving images
PATH_TO_IMG_FOLDER = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph'

# img_folder = '/home/jupyter/GCP_Test/datasets/pantograph/train/annotations'
img_folder = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/'+folder+'/annotations'

# img_paths = GetSavedImagePaths(PATH_TO_IMG_FOLDER)
img_paths = GetAnnotatedImages(img_folder)

print('Current annotated images:',len(img_paths))

'''
Get list of images to generate anntotations
'''

# Set path to  JSON
ANNOTATION_FILE = "../datasets/pantograph/"+img_paths[0].split("/")[-2]+"/region_data.json"
ANNOTATION_FILE

# Find images that haven't been annotated
obj = ReadJSON(ANNOTATION_FILE)
print('Images with data',len(obj['images']))


paths = [ i for i in img_paths if i not in [j['path'] for j in obj['images']]]
print('Remaining images:',len(paths))
paths

Current annotated images: 232
Images with data 230
Remaining images: 2


['/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_106090.jpg',
 '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_102221.jpg']

In [62]:
start_time = time.time()

GenerateAnnotations(paths,ANNOTATION_FILE,verbose=True)

print("--- %s seconds ---" % (time.time() - start_time))


Annotating image: /Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_106090.jpg
bbox (255, 0, 0) [397, 771, 1226, 153]
1 masks
18 keypoints
Writing annotation data for new image
bbox (0, 255, 0) [305, 850, 1432, 203]
1 masks
18 keypoints
Writing annotation data for new image
bbox (0, 0, 255) [464, 854, 1105, 152]
5 masks
18 keypoints
Writing annotation data for new image
Writing new image data

Annotating image: /Users/jessedecker/projects/rail_segmentation/datasets/pantograph/train/padd_to_swin_frame_102221.jpg
bbox (255, 0, 0) [387, 790, 1247, 144]
1 masks
18 keypoints
Writing annotation data for new image
bbox (0, 255, 0) [294, 854, 1432, 212]
1 masks
18 keypoints
Writing annotation data for new image
bbox (0, 0, 255) [462, 870, 1101, 143]
5 masks
18 keypoints
Writing annotation data for new image
Writing new image data
--- 204.83160305023193 seconds ---
