In [1]:
import json
import pandas as pd
import cv2

In [2]:
# f = open(f"/home/aidan/coco (copy)/annotations/instances_val2017.json", 'r')
f = open(f"/home/aidan/fiftyone/coco/annotations/instances_val2017.json", 'r')

# returns JSON object as
# a dictionary
data = json.load(f)

# Closing file
f.close()

In [3]:
annotations = pd.DataFrame(data['annotations'])

In [4]:
# Removing annotations with different format
annotations = annotations[annotations['iscrowd'] == 0]

In [6]:
# Cross joining the annotations on image level
annotationsMerged = pd.merge(annotations, annotations, on='image_id', suffixes=('_p', '_s'))

In [7]:
# Removing annotations with themselves
annotationsMerged = annotationsMerged[annotationsMerged['id_p']!= annotationsMerged['id_s']]

In [8]:
# Function to determine if bboxes intersect
def is_intersected(row):
    x1_p, y1_p, x2_p, y2_p = row['bbox_p'][0], row['bbox_p'][1], row['bbox_p'][0] + row['bbox_p'][2], row['bbox_p'][1] + row['bbox_p'][3]
    x1_s, y1_s, x2_s, y2_s = row['bbox_s'][0], row['bbox_s'][1], row['bbox_s'][0] + row['bbox_s'][2], row['bbox_s'][1] + row['bbox_s'][3]
    
    
    xLeft, yTop = max(x1_p, x1_s), max(y1_p, y1_s)
    
    xRight, yBottom = min(x2_p, x2_s), min(y2_p, y2_s)
    
    width = xRight - xLeft
    height = yBottom - yTop
    
    if (width < 0) | (height < 0):
        return False
    else:
        return True

In [9]:
# Computing indicator of bboxes that are intersected
annotationsMerged['intersection'] = annotationsMerged.apply(lambda row: is_intersected(row), axis=1)

In [10]:
# Getting only bboxes that are intersected
annotationsIntersected = annotationsMerged[annotationsMerged['intersection']]

In [11]:
annotationsNoIntersection = annotations[~annotations['image_id'].isin(annotationsIntersected['image_id'])]

In [12]:
annotationsIntersected['old_image_id'] = annotationsIntersected['image_id']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotationsIntersected['old_image_id'] = annotationsIntersected['image_id']


In [13]:
annotationsIntersected['image_id'] = annotationsIntersected['image_id'].astype(str) + annotationsIntersected['id_p'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotationsIntersected['image_id'] = annotationsIntersected['image_id'].astype(str) + annotationsIntersected['id_p'].astype(str)


In [14]:
def update_seg(row):
    # Computing extrema of primary bbox
    x1_min, y1_min, x2_max, y2_max = row['bbox_p'][0], row['bbox_p'][1], row['bbox_p'][0] + row['bbox_p'][2], row['bbox_p'][1] + row['bbox_p'][3]

    newSegSecondary = []
    # Iterating through the segs
    for segIndex, seg in enumerate(row['segmentation_s']):
        newSegValues = []
        for index in range(0,len(seg), 2):
            x,y = seg[index], seg[index+1]

            if (x >= x1_min) & (x <= x2_max) & (y >= y1_min) & (y <= y2_max):
                # Normalizing the coords
                newSegValues.append(x - x1_min)
                newSegValues.append(y - y1_min)
            
            if x < x1_min:
                newSegValues.append(0)
            elif x > x2_max:
                newSegValues.append(x2_max)
            else:
                newSegValues.append(x-x1_min)

                
            if y < y1_min:
                newSegValues.append(0)
            elif y > y2_max:
                newSegValues.append(y2_max)
            else:
                newSegValues.append(y-y1_min)

        # Have at least 4 points
        if len(newSegValues) >= 8:
            newSegSecondary.append(newSegValues)
            
    newSegPrimary = []
    # Iterating through the segs
    for segIndex, seg in enumerate(row['segmentation_p']):
        newSegValues = []
        for index in range(0,len(seg), 2):
            x,y = seg[index], seg[index+1]

            # Normalizing the coords
            newSegValues.append(x - x1_min)
            newSegValues.append(y - y1_min)
            
         
            
            assert (x - x1_min) >= 0.0
            assert (y - y1_min) >= 0.0
            
        newSegPrimary.append(newSegValues)
        
    row['segmentation_p'] = newSegPrimary
    row['segmentation_s'] = newSegSecondary
                

    return row

In [15]:
def update_seg_no_intersection(row):
    
    x1_min, y1_min = row['bbox'][0], row['bbox'][1]
            
    newSegPrimary = []
    # Iterating through the segs
    for segIndex, seg in enumerate(row['segmentation']):
        newSegValues = []
        for index in range(0,len(seg), 2):
            x,y = seg[index], seg[index+1]

            # Normalizing the coords
            newSegValues.append(x - x1_min)
            newSegValues.append(y - y1_min)
            
         
            
            assert (x - x1_min) >= 0.0
            assert (y - y1_min) >= 0.0
            
        newSegPrimary.append(newSegValues)
        
    row['segmentation'] = newSegPrimary
                

    return row

In [16]:
annotationsIntersected = annotationsIntersected.apply(lambda row: update_seg(row), axis=1)

In [17]:
annotationsIntersectedSecondary = annotationsIntersected[['segmentation_s', 'area_s', 'iscrowd_s',
       'bbox_s', 'category_id_s', 'id_s', 'image_id']]

In [18]:
annotationsIntersectedSecondary.columns = ['segmentation', 'area', 'iscrowd',
       'bbox', 'category_id', 'id', 'image_id']

In [19]:
annotationsIntersectedPrimary = annotationsIntersected[['segmentation_p', 'area_p', 'iscrowd_p', 'image_id', 'bbox_p',
       'category_id_p', 'id_p', 'old_image_id']]

In [20]:
annotationsIntersectedPrimary.columns = ['segmentation', 'area', 'iscrowd', 'image_id', 'bbox',
       'category_id', 'id', 'old_image_id']

In [21]:
annotationsIntersectedPrimary = annotationsIntersectedPrimary[annotationsIntersectedPrimary[['id']].groupby('id').cumcount() == 0]

In [22]:
annotationsNoIntersection = annotationsNoIntersection.apply(lambda row: update_seg_no_intersection(row), axis=1)

In [26]:
for index, row in annotationsNoIntersection.iterrows():
    image = cv2.imread(f"/home/aidan/fiftyone/coco/val2017/{str(row['image_id']).rjust(12, '0')}.jpg")
    
    x1_min, y1_min, x2_max, y2_max = row['bbox'][0], row['bbox'][1], row['bbox'][0] + row['bbox'][2], row['bbox'][1] + row['bbox'][3]
    
    image = image[int(y1_min):int(y2_max),int(x1_min):int(x2_max),:]
    
    cv2.cv2.imwrite(f"/home/aidan/fiftyone/coco-cropped/val2017/{str(row['image_id']).rjust(12, '0')}.jpg", image)


In [27]:
for index, row in annotationsIntersectedPrimary.iterrows():
    image = cv2.imread(f"/home/aidan/fiftyone/coco/val2017/{str(row['old_image_id']).rjust(12, '0')}.jpg")
    
    x1_min, y1_min, x2_max, y2_max = row['bbox'][0], row['bbox'][1], row['bbox'][0] + row['bbox'][2], row['bbox'][1] + row['bbox'][3]
    
    image = image[int(y1_min):int(y2_max),int(x1_min):int(x2_max),:]
    
    cv2.cv2.imwrite(f"/home/aidan/fiftyone/coco-cropped/val2017/{str(row['image_id']).rjust(12, '0')}.jpg", image)
    
 


In [28]:
newAnnotations = pd.concat([annotationsNoIntersection, annotationsIntersectedPrimary, annotationsIntersectedSecondary])

In [29]:
newAnnotations = newAnnotations.drop('old_image_id', axis=1)

In [30]:
newAnnotations['id'] = range(0, len(newAnnotations))

In [31]:
newAnnotations['image_id'] = newAnnotations['image_id'].astype(int)

In [32]:
data['annotations'] = newAnnotations.to_dict('records')

In [33]:
imageList = pd.DataFrame(newAnnotations['image_id'].apply(lambda image: str(image).rjust(12, '0') + '.jpg'))

In [34]:
imageList['width'] = newAnnotations['bbox'].apply(lambda box: int(box[2]))
imageList['height'] = newAnnotations['bbox'].apply(lambda box: int(box[3]))

In [35]:
imageList = imageList.rename({'image_id': 'file_name'}, axis=1)

In [36]:
imageList['id'] = newAnnotations['image_id']

In [37]:
data['images'] = imageList.to_dict('records')

In [38]:
# Dumping the new label
jsonObject = json.dumps(data)

with open(f"/home/aidan/fiftyone/coco-cropped/annotations/instances_val2017.json", "w") as outfile:
    outfile.write(jsonObject)