# JSON Dataset Visualization

In [1]:
import matplotlib.pyplot as plt
import json
import os
from src.dataset.annotations_coco import COCOAnnotations
from src.dataset.dataset_coco import CocoDataset
from src.dataset.annotations_utils import to_dict
import cv2
import numpy as np
from mpl_toolkits.axes_grid1 import ImageGrid

Load images and annotation file

In [None]:
IMAGES_DIRECTORY_PATH = f""
ANNOTATIONS_FILE_PATH = f""

Use CocoDataset class to split the entire dataset into two subsets.

In [None]:
print("Dataset Information")

dataset = CocoDataset(IMAGES_DIRECTORY_PATH, ANNOTATIONS_FILE_PATH, seed=2023)
train_subset, test_subset = dataset.split(0.8, 0.2, random=True)

print("Training Set Information")
train_subset.preview_dataset()

print("Test Set Information")
test_subset.preview_dataset()

Show images from Training Set

In [None]:
grid_size = (5, 5)
np.random.shuffle(train_subset.tree.data["images"])
annotations_by_id = to_dict(train_subset.tree.data["annotations"], "id")    # id -> list of dicts
categories_by_id = to_dict(train_subset.tree.data["categories"], "id")  # id -> dict

# Visualize the images in a grid
fig = plt.figure(figsize=(15.0, 15.0))
grid = ImageGrid(fig, 111, nrows_ncols=(grid_size[0], grid_size[1]), axes_pad=0.1)

# Iterate over the images in the grid and write the image category on the image.
for i in range(grid_size[0]):
    for j in range(grid_size[1]):
        ax = grid[i * grid_size[1] + j]
        image_data = train_subset.tree.data["images"][i * grid_size[1] + j]
        image_name = image_data["file_name"]
        image_id = image_data["id"]

        cat = categories_by_id[annotations_by_id[image_id][0]["category_id"]][0]

        image = cv2.imread(os.path.join(IMAGES_DIRECTORY_PATH, image_name))
        cv2.putText(image, cat["name"], (5, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)

        ax.imshow(image)
        ax.axis("off")

plt.show()