In [52]:
import os
import json
import pandas as pd
import numpy as np
from glob import glob
from pycocotools.coco import COCO

In [42]:
def getDF(n_boxes, fns):
    
    date = [f.split('_')[-2] for f in fns]
    time = [f.split('_')[-1] for f in fns]
    cam = [int(f.split('_')[0][1]) for f in fns]
    meth = [f.split('_')[2] for f in fns]
    ssemis = [f.split('_')[1] for f in fns]
    counts = n_boxes

    dict_bois = {1:'oui',
                2:'non',
                3:'non',
                4:'oui',
                5:'oui',
                6:'non',
                7:'oui',
                8:'non'}

    dict_arro = {1:'non',
                2:'oui',
                3:'oui',
                4:'non',
                5:'non',
                6:'oui',
                7:'non',
                8:'oui'}

    date_semis = '220428'
    
    arr = np.stack([date, time, meth, counts, ssemis, date, cam, cam, cam], axis = 1)
    df = pd.DataFrame(arr, columns = ['date', 'heure', 'methode', 'abondance', 'sous-semis','J_T', 'bois', 'arrosage', 'n°cam'])

    df['arrosage'] = df['arrosage'].astype('int')
    df['arrosage'] = df['arrosage'].map(dict_arro)
    df['n°cam'] = df['n°cam'].astype('int')
    df['bois'] = df['bois'].astype('int')
    df['bois'] = df['bois'].map(dict_bois)


    df['date'] = pd.to_datetime(df['date'], format = '%y%m%d')
    df['J_T'] = pd.to_datetime(['220428' for i in range(len(df))], format = '%y%m%d')
    df['J_T'] = (df['date'] - df['J_T']).dt.days
    df['heure'] = pd.to_datetime(df['heure'], format= '%Hh%M').dt.time
    df.loc[df['methode']=='SN', 'sous-semis'] = 'SN'
    
    return df

# Get annotations

In [45]:
root = os.getcwd() + '/../../data/birds/annotations'
pathin = root + '/coco_annotations.json'

coco = COCO(pathin)
img_ids = list(sorted(coco.imgs.keys()))

# retrieve filenames and bird coutns from annotation file .json
img_fns = [coco.imgs[i]['file_name'].split('/')[-1].split('.')[0] for i in img_ids]
n_boxes = [len(coco.getAnnIds(imgIds=i)) for i in img_ids]

# create dict matching the filenames to bird counts
dict_fns = {img_fns[i]:n_boxes[i] for i in range(len(img_fns))}

# Initialize annotations

In [83]:
# Get all filenames from annotated video frames
full_fns = [p.split('/')[-1].split('.')[0]  for p in glob(root + '/../images/22*/*.png')]
empty_boxes = np.array([0 for i in range(len(full_fns))])

cond = [f in img_fns for f in full_fns]

In [84]:
# Order bird count and replace in empty_boxes
new_boxes = [dict_fns[full_fns[i]] for i in range(len(full_fns)) if full_fns[i] in img_fns]
empty_boxes[cond] = new_boxes
df_out = getDF(empty_boxes, full_fns)

In [85]:
df_out.head()

Unnamed: 0,date,heure,methode,abondance,sous-semis,J_T,bois,arrosage,n°cam
0,2022-04-29,06:00:00,ST,0,AA,1,oui,non,1
1,2022-04-29,06:01:00,ST,0,AA,1,oui,non,1
2,2022-04-29,06:02:00,ST,0,AA,1,oui,non,1
3,2022-04-29,06:03:00,ST,0,AA,1,oui,non,1
4,2022-04-29,06:04:00,ST,0,AA,1,oui,non,1


In [82]:
df_out.to_csv('output_cam_annotated.csv', index = False)

# Data Exploration

In [None]:
img_id = [a['image_id'] for a in data['annotations']]
anno_id = [a['id'] for a in data['annotations']]
area = [a['area'] for a in data['annotations']]
arr = np.stack([img_id,anno_id, area], axis = 1)
df = pd.DataFrame(data = arr, columns = ['img_id','anno_id','box_size'])

dict_img = {i['id']:i['file_name'] for i in data['images']}

In [None]:
count_anno = df.groupby('img_id').count().reset_index()
freq_df = count_anno[['img_id','anno_id']].groupby('anno_id').count().reset_index()
freq_df.columns = ['Nb birds', 'Nb images']
freq_df

In [None]:
cond = count_anno['anno_id'] > 20
count_anno['img_id'][cond]

In [None]:
for i in count_anno['img_id'][cond].values:
    print(dict_img[i])

In [None]:
print('Number of annotations :', len(df))
print('Number of images:', len(count_anno))