In [72]:
# Mounting your Google Drive

# from google.colab import drive
# drive.mount('/content/drive')

In [74]:
# Switching to the directory of this notebook

# %cd /content/drive/MyDrive/EYOpenScienceDataChallenge/data/data_gradients_practice

In [75]:
# Ensuring the above process worked as expected

# !pwd

In [76]:
# install data gradients

# ! pip install data-gradients

In [77]:
# Download and extract your training set. There should be a single folder with all images and labelme style annotation jsons.

# !tar -xvf ./training_data_3.tar

In [78]:
# install labelme2coco; this will prepare the annotations in the COCO format, which is what datagradients deals with

# !pip install -U labelme2coco

In [79]:
# run labelme2coco

import labelme2coco

# set directory that contains labelme annotations and image files
labelme_folder = "./training_data_3"

# set export dir
export_dir = "./train_coco_format"

# set train split rate
train_split_rate = 0.85

# labelme2coco.convert(labelme_folder, export_dir, train_split_rate)

In [80]:
# loading data gradients dependencies

from data_gradients.managers.detection_manager import DetectionAnalysisManager
from data_gradients.datasets.detection.coco_detection_dataset import COCODetectionDataset

In [81]:
# reading in training and validation set annotations

train_set = COCODetectionDataset(root_dir="./train_coco_format/", split="train", year="")
val_set = COCODetectionDataset(root_dir="./train_coco_format/", split="val", year="")

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [82]:
# preparing the analytics manager object

manager = DetectionAnalysisManager(
    report_title="Training Data 3 Draft",
    train_data=train_set,
    val_data=val_set,
    class_names=train_set.class_names,
)

In [83]:
# labelme2coco only prepares the annotations

# to use data gradients, we need to also prepare the directory structure generated by labelme2coco
# in the right format, containing all the images, to be read in by the datagradients object

import json
import shutil
import os

with open('./train_coco_format/annotations/instances_train.json', 'r') as file:
    data = json.load(file)
train_files = []
for filedata in data["images"]:
  train_files.append(filedata['file_name'])

with open('./train_coco_format/annotations/instances_val.json', 'r') as file:
    data = json.load(file)
val_files = []
for filedata in data["images"]:
  val_files.append(filedata['file_name'])

source_dir = './training_data_3/'
train_dest_dir = './train_coco_format/images/train'
if not os.path.exists(train_dest_dir):
  os.makedirs(train_dest_dir)
val_dest_dir = './train_coco_format/images/val'
if not os.path.exists(val_dest_dir):
  os.makedirs(val_dest_dir)

for file in train_files:
  source_path = os.path.join(source_dir, file)
  destination_path = os.path.join(train_dest_dir, file)
  shutil.copy2(source_path, destination_path)

for file in val_files:
  source_path = os.path.join(source_dir, file)
  destination_path = os.path.join(val_dest_dir, file)
  shutil.copy2(source_path, destination_path)

In [None]:
# that's it - run the analytics manager!
# it's interactive

manager.run()

  - Executing analysis with: 
  - batches_early_stop: None 
  - len(train_data): 39 
  - len(val_data): 8 
  - log directory: /content/drive/MyDrive/EYOpenScienceDataChallenge/data/data_gradients_practice/logs/Training_Data_3_Draft 
  - Archive directory: /content/drive/MyDrive/EYOpenScienceDataChallenge/data/data_gradients_practice/logs/Training_Data_3_Draft/archive_20240226-132302 
  - feature extractor list: {'Image Features': [SummaryStats, ImagesResolution, ImageColorDistribution, ImagesAverageBrightness], 'Object Detection Features': [DetectionClassHeatmap, DetectionBoundingBoxArea, DetectionBoundingBoxPerImageCount, DetectionBoundingBoxSize, DetectionClassFrequency, DetectionClassesPerImageCount, DetectionBoundingBoxIoU, DetectionResizeImpact]}
[34;1m╔[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[0m[34;1m═[

Analyzing... :   0%|          | 0/39 [00:00<?, ?it/s]


--------------------------------------------------------------------------------
[33;1mIn which format are your images loaded ?[0m
--------------------------------------------------------------------------------

[34;1mOptions[0m:
[[34;1m0[0m] | RGB
[[34;1m1[0m] | BGR
[[34;1m2[0m] | LAB
[[34;1m3[0m] | Other

Your selection (Enter the [34;1mcorresponding number[0m) >>> 0
Great! [33;1mYou chose: `RGB`[0m

--------------------------------------------------------------------------------
[33;1m[33;1mWhich comes first[0m in your annotations, the class id or the bounding box?[0m
--------------------------------------------------------------------------------
Here's a sample of how your labels look like:
Each line corresponds to a bounding box.
tensor([[  2.0000, 421.4167, 254.7500,  67.5000,  89.1667],
        [  0.0000, 320.5833, 307.2500,  93.3333, 103.3333],
        [  2.0000, 369.7500, 388.0833,  57.5000,  54.1667]])
[34;1mOptions[0m:
[[34;1m0[0m] | Label comes fi

In [68]:
!rm -rf logs