In [6]:
import os 
import numpy as np
import pandas as pd
import brtdevkit
from brtdevkit.data import Dataset

# The two possible label maps
two_class = {'1': 'crop', '2': 'weed'}
six_class = {'0': 'ground', '1': 'other weed', '2': 'crop', '3': 'amaranth family', 
             '4': 'marestail', '5': 'giant ragweed', '6': 'kochia'}

In [2]:
# Examine the test set
test_dataset = Dataset.retrieve(name="2020-06-30_soy_2751_al_test")
print(test_dataset)

{
  "created_at": "2020-06-30T23:53:56.213000",
  "created_by": "5df82b3a5207f7a8800d6d38",
  "created_by_email": "andrei.polzounov@bluerivert.com",
  "description": "SOYBEANS active learning test dataset with 2751 images. Date range: 2020-05-09 15:23:14.636000 - 2020-06-15 20:12:44.382000. Active Learning. States: ['Kansas', 'Nebraska', 'Indiana']",
  "id": "5efbd094a2d966260361da63",
  "is_deleted": false,
  "kind": "annotation",
  "metadata_s3_bucket": "tartarus.datasets",
  "metadata_s3_key": "prod/annotation/5efbd094a2d966260361da63.jsonl",
  "name": "2020-06-30_soy_2751_al_test",
  "parameters": {},
  "request_s3_location": {
    "s3_bucket": "tartarus.datasets",
    "s3_key": "prod/dataset_request/5df82b3a5207f7a8800d6d38_7fc2352b402f42b99a9a91bbdddbd374.json"
  },
  "state": "ready"
}


In [3]:
# note this dataset does not have a bounding_box label map column
test_df = test_dataset.to_dataframe()
test_df.columns

Index(['id', 'is_deleted', 'collected_on', 'gnss', 'has_human_annotation',
       'uuid', 'artifacts_content_hash', 'angle_to_row',
       'camera_exposure_algo', 'camera_exposure_time', 'camera_focal_length',
       'camera_fstop', 'camera_gain', 'camera_height', 'camera_manufacturer',
       'camera_ppi', 'camera_revision', 'camera_sensor_name',
       'camera_sensor_type', 'camera_serial', 'cloud_cover', 'crop_damage',
       'crop_health', 'crop_height', 'crop_name', 'crop_residue',
       'experiment_id', 'farm', 'furrow_depth_inches', 'gps_course',
       'gps_quality', 'gps_speed', 'group_id', 'grower', 'icp_version',
       'isp_version', 'latitude', 'longitude', 'operating_field_name',
       'operator_name', 'project_name', 'raw_bit_depth', 'robot_name',
       'row_spacing', 'soil_color', 'tillage_practice', 'type',
       'weed_pressure', 'weeds', 'notes', 'artifact_nrg_0__id',
       'artifact_nrg_0_kind', 'artifact_nrg_0_content_hash',
       'artifact_nrg_0_s3_bucket', '

In [7]:
# Check for anomalies in pixelwise label map
anomalies = 0 

for i in range(len(test_df)):
    if test_df.annotation_pixelwise_0_label_map.values[i] != two_class:
        anomalies +=1

print(f'Out of {len(test_df)} images, {anomalies} lack the normal 2-class pixelwise label map images')

Out of 2751 images, 0 lack the normal 2-class pixelwise label map images


In [8]:
# Training Set, found to have all the label map anomalies
train_dataset = Dataset.retrieve(name="2020-06-30_soy_13736_al_train")
print(train_dataset)

{
  "created_at": "2020-06-30T23:53:45.498000",
  "created_by": "5df82b3a5207f7a8800d6d38",
  "created_by_email": "andrei.polzounov@bluerivert.com",
  "description": "SOYBEANS active learning train dataset with 13736 images. Date range: 2020-04-09 17:22:08.187000 - 2020-06-17 19:09:42.621000. Active Learning. States: ['Arkansas', 'Iowa', 'Louisiana', 'Texas', 'Illinois']",
  "id": "5efbd08934742910d8c5af08",
  "is_deleted": false,
  "kind": "annotation",
  "metadata_s3_bucket": "tartarus.datasets",
  "metadata_s3_key": "prod/annotation/5efbd08934742910d8c5af08.jsonl",
  "name": "2020-06-30_soy_13736_al_train",
  "parameters": {},
  "request_s3_location": {
    "s3_bucket": "tartarus.datasets",
    "s3_key": "prod/dataset_request/5df82b3a5207f7a8800d6d38_8930a9d8b3f74b099ceda23c756c822f.json"
  },
  "state": "ready"
}


In [9]:
# Examine columns
df = train_dataset.to_dataframe()
df.columns

Index(['id', 'is_deleted', 'collected_on', 'gnss', 'has_human_annotation',
       'uuid', 'artifacts_content_hash', 'angle_to_row',
       'camera_exposure_algo', 'camera_exposure_time',
       ...
       'annotation_bounding_box_0_kind',
       'annotation_bounding_box_0_nothing_to_annotate',
       'annotation_bounding_box_0_state',
       'annotation_bounding_box_0_label_map',
       'annotation_bounding_box_0_annotation_job_id',
       'annotation_bounding_box_0_vendor_metadata',
       'annotation_bounding_box_0_style',
       'annotation_bounding_box_0_bounding_boxes',
       'annotation_pixelwise_0_request_review',
       'annotation_pixelwise_0_review_requested_by'],
      dtype='object', length=101)

In [10]:
# check train set and examine labels

bb_normal=0
pw_normal = 0
pw_missing = 0
pw_six_class_ids = []
bb_six_class_ids = []
pw_missing_ids = []

for i in range(len(df)):
    if df.annotation_pixelwise_0_label_map.values[i] == two_class:
        pw_normal +=1
    if df.annotation_pixelwise_0_label_map.values[i] == six_class:
        pw_six_class_ids.append(df.id[i])
    if (df.annotation_bounding_box_0_label_map.values[i] != two_class) & (df.annotation_bounding_box_0_label_map.values[i] != six_class):
        bb_normal +=1
    if (df.annotation_pixelwise_0_label_map.values[i] != two_class) & (df.annotation_pixelwise_0_label_map.values[i] != six_class):
        pw_missing +=1
        pw_missing_ids.append(df.id[i])
    if (df.annotation_bounding_box_0_label_map.values[i] == six_class):
        bb_six_class_ids.append(df.id[i])
        
print(f'Out of {len(df)} total images:')
print(f'There are {pw_normal} normal 2-class pixelwise label map images')
print(f'There are {bb_normal} images without any bounding box label map')
print(f'There are {len(pw_six_class_ids)} 6-class pixelwise label map images')
print(f'There are {len(bb_six_class_ids)} images with the 6-class bounding box labels')
print(f'There are {len(pw_missing_ids)} images without any pixelwise label map')

Out of 13736 total images:
There are 13570 normal 2-class pixelwise label map images
There are 13668 images without any bounding box label map
There are 98 6-class pixelwise label map images
There are 68 images with the 6-class bounding box labels
There are 68 images without any pixelwise label map


In [24]:
# Examine label policy
# The normal two-class labels have two different pixelwise label policies

two_class_df = df[df.annotation_pixelwise_0_label_map.values == two_class]
print(two_class_df.groupby(['annotation_pixelwise_0_label_policy']).count()['id'])
print('')

# There is no bounding_box label policy for these images
print('annotation_bounding_box_0_label_policy:')
print(two_class_df.groupby(['annotation_bounding_box_0_label_policy']).count()['id'])

annotation_pixelwise_0_label_policy
5eb48622e154067231d81219    10453
5eb48622e154067231d8121a     3109
Name: id, dtype: int64

annotation_bounding_box_0_label_policy:
Series([], Name: id, dtype: int64)


In [22]:
# Examine label policy
# The six-class pixelwise labels have a different pixelwise label policy

six_class_df = df[df.annotation_pixelwise_0_label_map.values == six_class]
print(six_class_df.groupby(['annotation_pixelwise_0_label_policy']).count()['id'])
print('')

# There is also no bounding_box label policy for these images
print('annotation_bounding_box_0_label_policy:')
print(six_class_df.groupby(['annotation_bounding_box_0_label_policy']).count()['id'])

annotation_pixelwise_0_label_policy
5ee946767c87f8689a1d5926    98
Name: id, dtype: int64

annotation_bounding_box_0_label_policy:
Series([], Name: id, dtype: int64)


In [25]:
# The six-class bounding box annotation labels do not have a pixelwise label policy
bbox_six_class_df = df[df.annotation_bounding_box_0_label_map.values == six_class]
print('annotation_pixelwise_0_label_policy')
print(bbox_six_class_df.groupby(['annotation_pixelwise_0_label_policy']).count()['id'])
print('')

# But they do have a bounding_box label policy
print(bbox_six_class_df.groupby(['annotation_bounding_box_0_label_policy']).count()['id'])

annotation_pixelwise_0_label_policy
Series([], Name: id, dtype: int64)

annotation_bounding_box_0_label_policy
5edf040ddfbb6dad31a2a38c    68
Name: id, dtype: int64


In [45]:
# Display problem image ids to look at in Aletheia
print(f'{pw_six_class_ids[0]} is an example of an image id with the 6-class pixelwise label map')
print(f'{bb_six_class_ids[0]} is an example of an image id with the 6-class bounding box label map')
print(f'{pw_missing_ids[0]} is an example of an image id with no pixelwise label map, though it is labeled in Aletheia')

5ebee1a9371207bf39e2d9fb is an example of an image id with the 6-class pixelwise label map
5ebee1a7371207bf39e2d997 is an example of an image id with the 6-class bounding box label map
5ebee1a7371207bf39e2d997 is an example of an image id with no pixelwise label map
