#### Creating Sample Json File for Testing Datapipeline

In [1]:
import json
import os
import numpy as np
import random
from PIL import Image
from glob import glob
from os.path import join, exists, basename

In [7]:
# path = '/data/ground-truth/jsons/v1.0.0.json'
# with open(path) as f:
#     file = json.load(f)

#### Utilities

In [18]:
# np.random.seed(42)
# def generate_random_images(num_images: int, path: str):
#     sizes = []
#     for i in range(num_images):
#         height, width = random.randint(50, 150), random.randint(100, 200)
#         sizes.append((height, width))
#         img = np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
#         img = Image.fromarray(img)
#         img.save(os.path.join(path, f"{i}.jpeg"))
#     return sizes

def generate_boxes(num_boxes, height, width):
    boxes = []
    for i in range(num_boxes):
        x = random.randint(0, width)
        y = random.randint(0, height)
        w = random.randint(0, width - x)
        h = random.randint(0, height - y)
        boxes.append([x, y, w, h])
    return boxes

#### Images Directory (Remains constant across vision type tasks)

Split level information and Directory remains same across test tasks

In [15]:
# Generate Images
# save_path = '/workspace/pest-monitoring-new/tests/helpers/resources/images'
# os.makedirs(save_path, exist_ok = True)
# sizes = generate_random_images(200, save_path)

# # Create Images list
# images = []
# image_paths = glob(join(save_path, '*.jpeg'))
# for i, image_path in enumerate(image_paths): 
#     images.append({
#         'id': eval(basename(image_path).split('.jpeg')[0]),
#         'file_path': image_path,
#         's3_url': None,
#         'width': sizes[i][1],
#         'height': sizes[i][0],
#         'date_captured': '0000:00:00 00:00:00'
#     })

#### Object Detection Test File

In [6]:
from PIL import Image

In [16]:
save_path = '/workspace/pest-monitoring-new/tests/helpers/resources/images'
image_paths = glob(join(save_path, "*.jpeg"))

images = []
for i, image_path in enumerate(image_paths):
    w, h = Image.open(image_path).size
    images.append({
        'id': eval(basename(image_path).split('.jpeg')[0]),
        'file_path': image_path,
        's3_url': None,
        'width': w,
        'height': h,
        'date_captured': '0000:00:00 00:00:00'
    })

# Create Split file
splits = [] 
image_ids = [x['id'] for x in images]
train_len = int(0.6 * len(images))
train_ids = image_ids[:train_len]
train_val_len = int(0.8 * len(images))
val_ids = image_ids[train_len:train_val_len]
test_ids = image_ids[train_val_len:]
for train_id in train_ids:
    splits.append({'image_id' : train_id, 'split' : 'train'})
for val_id in val_ids:
    splits.append({'image_id' : val_id, 'split' : 'val'})
for test_id in test_ids:
    splits.append({'image_id' : test_id, 'split' : 'test'})    

##### Objects of 5 classes

In [19]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Object Detection Test Json (5 object classes)',
    'contributor': 'Nikhil Shenoy',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-object-det-file.json',
    'date_created': '0000-00-00'
}


# Create test box annotations
box_annotations = []
n_object_classes = 5
i = 0
for image in images:
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    num_boxes = random.randint(0, 5)
    boxes = generate_boxes(num_boxes, height, width)
    
    for box in boxes:
        box_annotations.append({
            'id': i, 
            'image_id': image_id, 
            'category_id': random.randint(0, n_object_classes - 1), 
            'bbox': box
        })
        i += 1

# Create categories based on bounding boxes        
categories = []
for i, category_id in enumerate(range(n_object_classes)):
    categories.append({
        'id': i,
        'name': f'class_{category_id}',
        'supercategory': 'bounding box'
    })

# Empty Caption file because no image level annotations    
caption_annotations = []

# Create Split file
splits = [] 
image_ids = [x['id'] for x in images]
train_len = int(0.6 * len(images))
train_ids = image_ids[:train_len]
train_val_len = int(0.8 * len(images))
val_ids = image_ids[train_len:train_val_len]
test_ids = image_ids[train_val_len:]
for train_id in train_ids:
    splits.append({'image_id' : train_id, 'split' : 'train'})
for val_id in val_ids:
    splits.append({'image_id' : val_id, 'split' : 'val'})
for test_id in test_ids:
    splits.append({'image_id' : test_id, 'split' : 'test'})

file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)

#### Single Head Classification + Object Detection

In [21]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Test Split containing Object Detection + Image Binary Classification Test Json',
    'contributor': 'Nikhil Shenoy',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-object-detection-and-classification.json',
    'date_created': '0000-00-00'
}

# box annotations
box_annotations = []
n_object_classes = 5
i = 0
for image in images:
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    num_boxes = random.randint(0, 5)
    boxes = generate_boxes(num_boxes, height, width)
    
    for box in boxes:
        box_annotations.append({
            'id': i, 
            'image_id': image_id, 
            'category_id': random.randint(0, n_object_classes - 1), 
            'bbox': box
        })
        i += 1

# Empty Caption file because no image level annotations    
caption_annotations = []
n_classes = 2
for i, image in enumerate(images):
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    label = random.randint(0, n_classes - 1)
    
    caption_annotations.append({
        'id': i, 
        'image_id': image_id, 
        'category_id': 0, 
        'caption': str(label)
    })

# Create categories based on bounding boxes        
categories = []
categories.append({
    'id': 0, 
    'name': 'class_0', # basically task 0
    'supercategory': 'Image Level Categorical Label'
})
    
file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)

##### Single Head - Classification

#### Image Classification/Regression Dataset

##### Single Head - Binary Classification

In [12]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Test Split containing Image Binary Classification (Only one multi-class label) Test Json',
    'contributor': 'Apoorv Agnihotri',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-image-bin-clf-single-head-file.json',
    'date_created': '0000-00-00'
}

# Empty box annotations as no box level annotations
box_annotations = []

# Empty Caption file because no image level annotations    
caption_annotations = []
n_classes = 2
for i, image in enumerate(images):
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    label = random.randint(0, n_classes - 1)
    
    caption_annotations.append({
        'id': i, 
        'image_id': image_id, 
        'category_id': 0, 
        'caption': str(label)
    })

# Create categories based on bounding boxes        
categories = []
categories.append({
    'id': 0, 
    'name': 'class_0', # basically task 0
    'supercategory': 'Image Level Categorical Label'
})
    
file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)

##### Single Head - Classification

In [13]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Test Split containing Image Classification (Only one multi-class label) Test Json',
    'contributor': 'Nikhil Shenoy',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-image-clf-single-head-file.json',
    'date_created': '0000-00-00'
}

# Empty box annotations as no box level annotations
box_annotations = []

# Empty Caption file because no image level annotations    
caption_annotations = []
n_classes = 5
for i, image in enumerate(images):
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    label = random.randint(0, n_classes - 1)
    
    caption_annotations.append({
        'id': i, 
        'image_id': image_id, 
        'category_id': 0, 
        'caption': str(label)
    })

# Create categories based on bounding boxes        
categories = []
categories.append({
    'id': 0, 
    'name': 'class_0', # basically task 0
    'supercategory': 'Image Level Categorical Label'
})
    
file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)

##### Single Head - Regression

In [14]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Test Split containing Image Regression (Only one multi-class label) Test Json',
    'contributor': 'Nikhil Shenoy',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-image-reg-single-head-file.json',
    'date_created': '0000-00-00'
}

# Empty box annotations as no box level annotations
box_annotations = []

# Empty Caption file because no image level annotations    
caption_annotations = []
n_classes = 5
for i, image in enumerate(images):
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    label = random.randint(0, n_classes - 1)
    
    caption_annotations.append({
        'id': i, 
        'image_id': image_id, 
        'category_id': 0, 
        'caption': str(label)
    })

# Create categories based on bounding boxes        
categories = []
categories.append({
    'id': 0, 
    'name': 'class_0', # basically task 0
    'supercategory': 'Image Level Regressional Label'
})
    
file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)

##### Multi Head

In [15]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Test Split containing Image Classification (Multi-head multi-class label) Test Json',
    'contributor': 'Nikhil Shenoy',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-image-clf-multiple-head-file.json',
    'date_created': '0000-00-00'
}

# Empty box annotations as no box level annotations
box_annotations = []

# Empty Caption file because no image level annotations    
caption_annotations = []
n_heads = 4
n_classes = 5 # for classification heads
reg_heads = [1, 2]
clf_heads = [0, 3]
counter = 0
for image in images:
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    for head in range(n_heads):
        if head in reg_heads:
            caption_annotations.append({
                'id': counter, 
                'image_id': image_id, 
                'category_id': head, 
                'caption': str(random.randint(0, 100))
            })
        else:
            caption_annotations.append({
                'id': counter, 
                'image_id': image_id, 
                'category_id': head, 
                'caption': str(random.randint(0, n_classes - 1))
            })            
        counter += 1

# Create categories based on bounding boxes        
categories = []
for i, head in enumerate(range(n_heads)):
    if head in reg_heads:
        categories.append({
            'id': i, 
            'name': f'class_{head}', 
            'supercategory': 'Image Level Regressional Label'            
        })
    else:
        categories.append({
            'id': i, 
            'name': f'class_{head}', 
            'supercategory': 'Image Level Categorical Label'                
        })        
    
file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)

#### Generalized Dataset

In [16]:
# Info File
info = {
    'version': '000.000.000',
    'description': 'Test Split containing Generalized (Multi-head (4 heads, 2 regression heads and 2 classification)\
    multi-class label () + Box Annotations (5 classes)) Test Json',
    'contributor': 'Nikhil Shenoy',
    'url': '/workspace/pest-monitoring-new/tests/helpers/resources/jsons/test-general-file.json',
    'date_created': '0000-00-00'
}

# Empty box annotations as no box level annotations
# Create test box annotations
box_annotations = []
n_object_classes = 5
i = 0
for image in images:
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    num_boxes = random.randint(0, 5)
    boxes = generate_boxes(num_boxes, height, width)
    
    for box in boxes:
        box_annotations.append({
            'id': i, 
            'image_id': image_id, 
            'category_id': random.randint(0, n_object_classes - 1), 
            'bbox': box
        })
        i += 1

# Empty Caption file because no image level annotations    
caption_annotations = []
n_heads = 4
n_classes = 5 # for classification heads
reg_heads = [1, 2]
clf_heads = [0, 3]
counter = 0
for image in images:
    image_id = image['id']
    width = image['width']
    height = image['height']
    
    for head in range(n_heads):
        if head in reg_heads:
            caption_annotations.append({
                'id': counter, 
                'image_id': image_id, 
                'category_id': n_object_classes + head, 
                'caption': str(random.randint(0, 100))
            })
        else:
            caption_annotations.append({
                'id': counter, 
                'image_id': image_id, 
                'category_id': n_object_classes + head, 
                'caption': str(random.randint(0, n_classes - 1))
            })            
        counter += 1

# Create categories based on bounding boxes        
categories = []
for i, category_id in enumerate(range(n_object_classes)):
    categories.append({
        'id': i,
        'name': f'class_{category_id}',
        'supercategory': 'bounding box'
    })
    
for i, head in enumerate(range(n_heads)):
    if head in reg_heads:
        categories.append({
            'id': i + n_object_classes, 
            'name': f'class_{head + n_object_classes}', 
            'supercategory': 'Image Level Regressional Label'            
        })
    else:
        categories.append({
            'id': i + n_object_classes, 
            'name': f'class_{head + n_object_classes}', 
            'supercategory': 'Image Level Categorical Label'                
        })        
    
file = {
    'info': info, 
    'images': images, 
    'box_annotations': box_annotations, 
    'caption_annotations': caption_annotations, 
    'categories': categories, 
    'splits': splits
}
with open(info['url'], 'w') as f:
    json.dump(file, f)