In [1]:
import json
import pandas as pd

In [2]:
# Load the annotation data
with open('channel_islands_camera_traps.json', 'r') as f:
    lila = json.load(f)

### Preprocess Lila Dataset
1. Non-correct-bbox which is too large(>0.95) or no bounding box:
   - There are 115197 annotations
2. We don't want categories(id) empty(0), human(1), bird(5), other(6):
   - There are 16893 annotations

In [3]:
image_lookup = {img['id']: img for img in lila['images']}
no_bbox_annotations_all = []

for annotation in lila['annotations']:
    image_id = annotation['image_id']
    image = image_lookup.get(image_id)
    if image is not None:
        if 'bbox' in annotation:
            bbox = annotation['bbox']
            if bbox:
                width, height = image['width'], image['height']
                if bbox[2] >= 0.95 * width and bbox[3] >= 0.95 * height:
                    no_bbox_annotations_all.append(annotation)
            else:
                no_bbox_annotations_all.append(annotation)
        else:
            no_bbox_annotations_all.append(annotation)

print(f"Number of annotations without bounding boxes: {len(no_bbox_annotations_all)}")

Number of annotations without bounding boxes: 115197


In [4]:
image_lookup = {img['id']: img for img in lila['images']}
legit_anns = []
legit_images = []
for annotation in lila['annotations']:
    image_id = annotation['image_id']
    image = image_lookup.get(image_id)
    if image is not None:
        if ((bbox := annotation.get('bbox')) is not None) and (annotation.get('category_id') not in {0, 1, 5, 6}):
            width, height = image['width'], image['height']
            if bbox[2] < 0.95 * width or bbox[3] < 0.95 * height:
                # print(annotation)
                legit_anns.append(annotation)
                # print(image)
                legit_images.append(image)


In [5]:
len(legit_anns), len(legit_images)

(132090, 132090)

In [6]:
print(f'There are {len(legit_anns) - len(no_bbox_annotations_all)} in categories(id) empty(0), human(1), bird(5), other(6)' )

There are 16893 in categories(id) empty(0), human(1), bird(5), other(6)


In [7]:
len(lila['annotations']) - 132090

132231

In [8]:
print(len(lila['annotations']))

264321


In [9]:
[{'id': 0, 'name': 'empty'},
  {'id': 1, 'name': 'human'},
  {'id': 2, 'name': 'fox'},
  {'id': 3, 'name': 'skunk'},
  {'id': 4, 'name': 'rodent'},
  {'id': 5, 'name': 'bird'},
  {'id': 6, 'name': 'other'}]

[{'id': 0, 'name': 'empty'},
 {'id': 1, 'name': 'human'},
 {'id': 2, 'name': 'fox'},
 {'id': 3, 'name': 'skunk'},
 {'id': 4, 'name': 'rodent'},
 {'id': 5, 'name': 'bird'},
 {'id': 6, 'name': 'other'}]

In [10]:
df = pd.DataFrame(lila['annotations'])

counts = df.groupby(['category_id']).size().reset_index(name='counts')

# mapping the category_id to the category name
counts['category_name'] = counts['category_id'].map({0: 'empty', 1: 'human', 2: 'fox', 3: 'skunk', 4: 'rodent', 5: 'bird', 6: 'other'})
counts

Unnamed: 0,category_id,counts,category_name
0,0,114949,empty
1,1,5981,human
2,2,48150,fox
3,3,1071,skunk
4,4,82912,rodent
5,5,11099,bird
6,6,159,other


In [11]:
df = pd.DataFrame(legit_anns)

counts = df.groupby(['category_id']).size().reset_index(name='counts')

# mapping the category_id to the category name
counts['category_name'] = counts['category_id'].map({0: 'empty', 1: 'human', 2: 'fox', 3: 'skunk', 4: 'rodent', 5: 'bird', 6: 'other'})
counts

Unnamed: 0,category_id,counts,category_name
0,2,48120,fox
1,3,1071,skunk
2,4,82899,rodent


In [12]:
len(legit_anns), len(legit_images)

(132090, 132090)

In [13]:
for img in legit_images:
    img['original_relative_path'] = img['file_name']

### Animl Dataset

In [14]:
with open('14f460fffd7dc41f889093f6a1a1812a_coco.json', 'r') as f:
    animl = json.load(f)

In [15]:
animl_map = {animl['categories'][i]['id']:animl['categories'][i]['name'] for i in range(len(animl['categories']))}

In [16]:
df = pd.DataFrame(animl['annotations'])

counts = df.groupby(['category_id']).size().reset_index(name='counts')

# mapping the category_id to the category name
counts['category_name'] = counts['category_id'].map(animl_map)
counts

Unnamed: 0,category_id,counts,category_name
0,0,3,empty
1,5,10836,fox
2,8,3,animal
3,9,6791,lizard
4,15,1216,skunk
5,19,16852,rodent
6,20,14110,bird
7,21,2592,scrub jay
8,23,1,mouse


In [17]:
image_lookup = {img['id']: img for img in animl['images']}
legit_anns_animl = []
legit_images_animl = []
for annotation in animl['annotations']:
    image_id = annotation['image_id']
    image = image_lookup.get(image_id)
    if image is not None:
        if ((bbox := annotation.get('bbox')) is not None) and (annotation.get('category_id') not in {0, 8, 23}):
            width, height = image['width'], image['height']
            if bbox[2] < 0.95 * width or bbox[3] < 0.95 * height:
                # print(annotation)
                legit_anns_animl.append(annotation)
                # print(image)
                legit_images_animl.append(image)

In [18]:
len(legit_anns_animl), len(legit_images_animl)

(52304, 52304)

In [19]:
len(animl['annotations']), len(animl['images'])

(52404, 51226)

In [20]:
for annotation in legit_anns:
    if annotation['category_id'] == 2: # fox
        annotation['category_id'] = 5 # fox
    elif annotation['category_id'] == 3: # skunk
        annotation['category_id'] = 15 # skunk
    elif annotation['category_id'] == 4: # rodent
        annotation['category_id'] = 19 # rodent

In [21]:
df = pd.DataFrame(legit_anns)

counts = df.groupby(['category_id']).size().reset_index(name='counts')

# mapping the category_id to the category name
counts['category_name'] = counts['category_id'].map(animl_map)
counts

Unnamed: 0,category_id,counts,category_name
0,5,48120,fox
1,15,1071,skunk
2,19,82899,rodent


In [22]:
coco_data = {}
coco_data['images'] = legit_images + legit_images_animl
coco_data['annotations'] = legit_anns + legit_anns_animl
coco_data['categories'] = animl['categories']
coco_data['info'] = animl['info']

In [23]:
assert len(coco_data['images']) == len(legit_images) + len(legit_images_animl)

In [24]:
len(legit_images) + len(legit_images_animl)

184394

In [25]:
len(coco_data['images'])

184394

In [27]:
#with open('classifier-training/mdcache/v5.0b/mirav2_dataset.json', 'w') as f:
#    json.dump(coco_data, f)

In [28]:
#with open('classifier-training/mdcache/v5.0b/mirav2_dataset.json', 'r') as f:
#    d = json.load(f)

In [None]:
python animl-ml/classification/utils/cct_to_md.py \
  --input_filename ~/classifier-training/mdcache/v5.0b/mirav2_dataset.json \
  --output_filename ~/classifier-training/mdcache/v5.0b/mirav2_dataset_md.json

In [None]:
python animl-ml/classification/utils/crop_detections.py \
    ~/classifier-training/mdcache/v5.0b/mirav2_dataset_md.json \
    ~/crops/mirav2_dataset \
    --images-dir ~/images/mirav2_dataset \
    --threshold 0 --square-crops \
    --threads 50 \
    --logdir $BASE_LOGDIR
"""remember to change the dataset_name"""

There are only 1817 images failed to download or crop.

In [None]:
python ~/animl-ml/classification/utils/md_to_queried_images.py \
  --input_filename ~/classifier-training/mdcache/v5.0b/mirav2_dataset_md.json \
  --dataset mirav2_dataset \
  --output_filename $BASE_LOGDIR/queried_images.json

In [None]:
python CameraTraps/classification/create_classification_dataset.py \
    $BASE_LOGDIR \
    --mode csv splits \
    --queried-images-json $BASE_LOGDIR/queried_images.json \
    --cropped-images-dir ~/crops/ \
    --detector-output-cache-dir ~/classifier-training/mdcache --detector-version 5.0b \
    --threshold 0 \
    --min-locs 3 \
    --val-frac 0.2 \
    --test-frac 0.2 \
    --method random

In [None]:
python CameraTraps/classification/train_classifier.py \
    $BASE_LOGDIR \
    ~/crops \
    --model-name efficientnet-b3 --pretrained \
    --label-weighted \
    --epochs 50 --batch-size 80 --lr 3e-5 \ # I set batch-size 80 because 
    --weight-decay 1e-6 \
    --num-workers 4 \ 
    --logdir $BASE_LOGDIR --log-extreme-examples 3

In [None]:
49330 + 240459

In [None]:
tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0

In [None]:
python animl-ml/classification/utils/cct_to_md.py \
  --input_filename ~/classifier-training/mdcache/v5.0b/mirav2_rat.json \
  --output_filename ~/classifier-training/mdcache/v5.0b/mirav2_rat_md.json

### Island Conservation Dataset

In [29]:
# Load the annotation data
with open('images/island/metadata/island_conservation.json', 'r') as f:
    island = json.load(f)

In [30]:
island_map = {island['categories'][i]['id']:island['categories'][i]['name'] for i in range(len(island['categories']))}

In [31]:
df = pd.DataFrame(island['annotations'])

counts = df.groupby(['category_id']).size().reset_index(name='counts')

# mapping the category_id to the category name
counts['category_name'] = counts['category_id'].map(island_map)
counts

Unnamed: 0,category_id,counts,category_name
0,0,77670,empty
1,1,150,cow
2,2,845,donkey
3,3,6284,iguana
4,4,967,raven
5,5,4737,cat
6,6,160,dog
7,7,16338,rat
8,8,6237,human
9,9,967,unknown


In [32]:
image_lookup = {img['id']: img for img in island['images']}
legit_anns_island = []
legit_images_island = []
for annotation in island['annotations']:
    image_id = annotation['image_id']
    image = image_lookup.get(image_id)
    if image is not None:
        if ((bbox := annotation.get('bbox')) is not None) and (annotation.get('category_id') in {7}):
            width, height = image['width'], image['height']
            if bbox[2] < 0.95 * width or bbox[3] < 0.95 * height:
                # print(annotation)
                legit_anns_island.append(annotation)
                # print(image)
                legit_images_island.append(image)


In [33]:
len(legit_anns_island), len(legit_images_island)

(16338, 16338)

There are 123,000 camera trap images from 123 camera locations from 7 islands in 6 countries

In [56]:
from collections import Counter
paths = []
countries = []
for img in legit_images_island:
    path = img['file_name'].split('/')[0:2]
    countries.append(img['file_name'].split('/')[0])
    paths.append('/'.join(path))
count_camera = Counter(paths)
count_countries = Counter(countries)

In [75]:
count_countries = count_countries.items()

In [85]:
data = dict(count_countries)
df = pd.DataFrame.from_dict(data,orient='index', columns=['count'])
print(f'There are in total 16338 rats.')
df

There are in total 16338 rats.


Unnamed: 0,count
dominicanrepublic,207
ecuador1,5
chile,24
puertorico,52
palau,2681
ecuador2,45
micronesia,13324


In [60]:
animl['images'][0]

{'id': '3422ea65783a5775971eec489105eebe',
 'file_name': 'p_000020.jpg',
 'original_relative_path': 'X811459F/p_000020_3422ea65783a5775971eec489105eebe.jpg',
 'datetime': '2021-04-03T04:44:20.000Z',
 'location': 'Walnut drainage',
 'width': 2048,
 'height': 1536}

In [61]:
for img in legit_images_island:
    path = img['file_name'].split('/')[0:2]
    path = '/'.join(path)
    img['location'] = path

In [65]:
for img in legit_images_island:
    img['original_relative_path'] = img['file_name']

In [86]:
legit_images_island[0]

{'id': 'dominicanrepublic_camara116_cam11618mayo2017b_dominicanrepublic_cam11618mayo2017b_20170126_033638_img_0090',
 'file_name': 'dominicanrepublic/camara116/cam11618mayo2017b/dominicanrepublic_cam11618mayo2017b_20170126_033638_img_0090.jpg',
 'width': 1920,
 'height': 1080,
 'location': 'dominicanrepublic/camara116',
 'original_relative_path': 'dominicanrepublic/camara116/cam11618mayo2017b/dominicanrepublic_cam11618mayo2017b_20170126_033638_img_0090.jpg'}

In [87]:
for annotation in legit_anns_island:
    annotation['category_id'] = 22 # rat # change the id as same as animl's categories_id   

In [88]:
legit_anns_island[0]

{'id': '6e80a662-df2b-11ea-820f-000d3a74c7de',
 'image_id': 'dominicanrepublic_camara116_cam11618mayo2017b_dominicanrepublic_cam11618mayo2017b_20170126_033638_img_0090',
 'category_id': 22,
 'bbox': [982.0799999999999, 620.028, 294.912, 74.952]}

In [89]:
coco_data = {}
coco_data['images'] = legit_images + legit_images_animl + legit_images_island
coco_data['annotations'] = legit_anns + legit_anns_animl + legit_anns_island
coco_data['categories'] = animl['categories']
coco_data['info'] = animl['info']

In [90]:
assert len(coco_data['images']) == len(legit_images) + len(legit_images_animl) + len(legit_images_island)

In [91]:
assert len(coco_data['annotations']) == len(legit_anns) + len(legit_anns_animl) + len(legit_anns_island)

In [92]:
with open('mirav2_rat.json', 'w') as f:
    json.dump(coco_data, f)

In [94]:
with open('mirav2_rat.json', 'r') as f:
    mirav2_rat = json.load(f)

In [None]:
python animl-ml/classification/utils/cct_to_md.py \
  --input_filename ~/classifier-training/mdcache/v5.0b/mirav2_rat.json \
  --output_filename ~/classifier-training/mdcache/v5.0b/mirav2_rat_md.json

In [96]:
legit_anns_island

[{'id': '6e80a662-df2b-11ea-820f-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara116_cam11618mayo2017b_dominicanrepublic_cam11618mayo2017b_20170126_033638_img_0090',
  'category_id': 22,
  'bbox': [982.0799999999999, 620.028, 294.912, 74.952]},
 {'id': '6f73518c-df2b-11ea-96fa-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara107_cam10701agosto2017_dominicanrepublic_cam10701agosto2017_20161227_041818_img_0047',
  'category_id': 22,
  'bbox': [1127.04, 628.02, 273.024, 126.036]},
 {'id': '765fbb18-df2b-11ea-8df1-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara107_cam10717mayo2017_dominicanrepublic_cam10717mayo2017_20170401_035602_img_0039',
  'category_id': 22,
  'bbox': [521.088, 560.952, 364.032, 108.0]},
 {'id': '76d67240-df2b-11ea-85b9-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara107_cam10701agosto2017_dominicanrepublic_cam10701agosto2017_20170125_032728_img_0113',
  'category_id': 22,
  'bbox': [769.9200000000001, 540.972, 294.912, 131.976]},
 {'id': '77ad4

In [None]:
with open('check.json','w') as f:
    json.write(legit_anns_island)