In [1]:
# !pip install python-docx

In [2]:
import glob
import os
import random
import math
import ast
import docx
import scipy

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from datetime import datetime
from scipy.spatial.distance import cdist

# comparison ArUco and CVAT
by : Arjen van Putten, Utrecht University, e-mail: a.vanputten1@uu.nl
Here we examine the results of ArUco tracking by comparing it to manual annotations.


### Dataset
The data used for the analysis was collected during phase 1 of the Imagen project. This data is of maximum 10 hens in small floor pens with options for hiding and perching. The data is collected within weeks after placing the ArUco markers on the hens, so the quality of the tags should be high. We do expect lower tracking due to feathers covering the ArUco markers.



In [3]:
workdir = 'C:/Users/Putte027/OneDrive - Universiteit Utrecht/Documents/data/CVAT_vs_ArUco/final/'

In [4]:
df = pd.read_csv(workdir + 'df.csv')
df.sample(50)

Unnamed: 0,Unnamed: 0.1,index,frame,ID_x,top_left,top_right,bottom_right,bottom_left,path_and_filename_x,video,...,frame_CVAT,corresponding_annotated_frame,x_BB,y_BB,cor_BB_x,cor_BB_y,dist_BB_ArUco,delete,cvat_data,visible_inter
245220,245220,239787,497.0,9.0,"(593.0, 658.0)","(621.0, 664.0)","(615.0, 692.0)","(587.0, 687.0)",ArUco/03_20220108075951_part2_1_ArUco_tracks.csv,03_20220108075951_part2_1,...,,24,,,,,,,,1.0
623394,623394,591782,1010.0,15.0,"(781.0, 718.0)","(773.0, 746.0)","(743.0, 736.0)","(750.0, 709.0)",ArUco/11_20220108105951_part1_4_ArUco_tracks.csv,11_20220108105951_part1_4,...,,50,,,,,,,,1.0
104055,104055,90648,2342.0,9.0,"(728.0, 485.0)","(740.0, 512.0)","(713.0, 526.0)","(704.0, 498.0)",ArUco/01_20220108115951_part1_6_ArUco_tracks.csv,01_20220108115951_part1_6,...,,117,,,,,,,,1.0
59687,59687,56706,490.0,5.0,"(1269.0, 481.0)","(1261.0, 509.0)","(1238.0, 500.0)","(1245.0, 472.0)",ArUco/01_20220108115951_part1_4_ArUco_tracks.csv,01_20220108115951_part1_4,...,,24,,,,,,,,1.0
129026,129026,132974,2366.0,3.0,"(640.0, 707.0)","(666.0, 692.0)","(681.0, 717.0)","(656.0, 733.0)",ArUco/01_20220108115951_part1_8_ArUco_tracks.csv,01_20220108115951_part1_8,...,,118,,,,,,,,1.0
554282,554282,532873,228.0,12.0,"(802.0, 589.0)","(819.0, 563.0)","(844.0, 579.0)","(828.0, 605.0)",ArUco/07_20220108115953_part2_6_ArUco_tracks.csv,07_20220108115953_part2_6,...,,11,,,,,,,,1.0
512359,512359,479461,199.0,12.0,"(1280.0, 424.0)","(1292.0, 388.0)","(1327.0, 400.0)","(1315.0, 435.0)",ArUco/07_20220108115953_part2_3_ArUco_tracks.csv,07_20220108115953_part2_3,...,,9,,,,,,,,1.0
470760,470760,445356,866.0,16.0,"(891.0, 354.0)","(884.0, 324.0)","(913.0, 315.0)","(920.0, 345.0)",ArUco/06_20211217112157_part1_9_ArUco_tracks.csv,06_20211217112157_part1_9,...,,43,,,,,,,,1.0
25309,25309,649460,1640.0,,,,,,,01_20220108115951_part1_2,...,82.0,82,1495.7,420.3,,,,,True,1.0
156721,156721,152244,1486.0,11.0,"(1527.0, 611.0)","(1499.0, 615.0)","(1492.0, 586.0)","(1520.0, 582.0)",ArUco/01_20220110073318_3_ArUco_tracks.csv,01_20220110073318_3,...,,74,,,,,,,,1.0


# Part 4

# 4.1 Examine the ArUco tracking quality



### 4.1.A general findings

# Table 1

We start off with some easy findings to get from this massive data frame. Mostly we focus on the numbers of frames in which we measure something to give a better understanding of the data. Then we zoom in step by step.

we labelled animals with u when we could not read there marker during a 2 minute video segment, this u was then added as a bool True in the column 'marker_obstructed'. We want to know the overall percentage of unreadable animals
we also want to see it over multiple videos.

In [5]:
nr_videos = df.video.nunique()
nr_videos_IB = df.loc[df['breed']=='IB'].video.nunique()
nr_videos_DW = df.loc[df['breed']=='DW'].video.nunique()
print('total nr videos: ', nr_videos)
print('total nr videos IB: ', nr_videos_IB, \
      ' which is', round(nr_videos_IB/nr_videos*100,1), '%')
print('total nr videos DW: ', nr_videos_DW, \
      ' which is', round(nr_videos_DW/nr_videos*100,1), '%')

total nr videos:  65
total nr videos IB:  40  which is 61.5 %
total nr videos DW:  25  which is 38.5 %


In [6]:
nr_original_videos = df.original_video.nunique()
nr_original_videos_DW = df.loc[df['breed']=='DW'].original_video.nunique()
nr_original_videos_IB = df.loc[df['breed']=='IB'].original_video.nunique()

print('total nr original videos: ', nr_original_videos)
print('total nr original videos IB: ', nr_original_videos_IB, \
      ' which is', round(nr_original_videos_IB/nr_original_videos*100,1), '%')
print('total nr original videos DW: ', nr_original_videos_DW, \
      ' which is', round(nr_original_videos_DW/nr_original_videos*100,1), '%')

total nr original videos:  13
total nr original videos IB:  8  which is 61.5 %
total nr original videos DW:  5  which is 38.5 %


In [7]:
nr_pens = df.pen.nunique()
nr_pens_IB = df.loc[df['breed']=='IB'].pen.nunique()
nr_pens_DW = df.loc[df['breed']=='DW'].pen.nunique()
print('nr pens: ', nr_pens)
print('nr pens IB: ', nr_pens_IB,\
      ' which is', round(nr_pens_IB/nr_pens*100,1), '%')
print('nr pens DW: ', nr_pens_DW, \
      ' which is', round(nr_pens_DW/nr_pens*100,1), '%')

nr pens:  11
nr pens IB:  6  which is 54.5 %
nr pens DW:  5  which is 45.5 %


In [8]:
sum_frames_with_annotated_data = df.loc[(df['bb_top']>=0.0),\
                                        ['video', 'frame']\
                                       ].groupby(['video']\
                                                ).nunique().sum().values[0].astype(int)
sum_frames_with_annotated_data_IB = df.loc[(df['bb_top']>=0.0)&(df['breed']=='IB'),\
                                           ['video', 'frame']\
                                          ].groupby(['video']\
                                                    ).nunique().sum().values[0].astype(int)
sum_frames_with_annotated_data_DW = df.loc[(df['bb_top']>=0.0)&(df['breed']=='DW'),\
                                           ['video', 'frame']\
                                          ].groupby(['video']\
                                                    ).nunique().sum().values[0].astype(int)
print('nr frames with annotations: ', sum_frames_with_annotated_data)
print('nr frames with annotations IB: ', sum_frames_with_annotated_data_IB, \
      ' which is', round(sum_frames_with_annotated_data_IB/sum_frames_with_annotated_data*100,1), '%')
print('nr frames with annotations DW: ', sum_frames_with_annotated_data_DW, \
      ' which is', round(sum_frames_with_annotated_data_DW/sum_frames_with_annotated_data*100,1), '%')

nr frames with annotations:  7813
nr frames with annotations IB:  4807  which is 61.5 %
nr frames with annotations DW:  3006  which is 38.5 %


In [9]:
nr_annotations = df.x_BB.count()
nr_annotations_IB = df.loc[df['breed']=='IB'].x_BB.count()
nr_annotations_DW = df.loc[df['breed']=='DW'].x_BB.count()
print('nr annotations: ', nr_annotations)
print('nr annotations IB: ', nr_annotations_IB, ' which is', round(nr_annotations_IB/nr_annotations*100,1), '%')
print('nr annotations DW: ', nr_annotations_DW, ' which is', round(nr_annotations_DW/nr_annotations*100,1), '%')

nr annotations:  71488
nr annotations IB:  41533  which is 58.1 %
nr annotations DW:  29955  which is 41.9 %


In [10]:
sum_frames_with_marker_data = df.loc[(df['tl_x']>=0.0),\
                                        ['video', 'frame']\
                                       ].groupby(['video']\
                                                ).nunique().sum().values[0].astype(int)
sum_frames_with_marker_data_IB = df.loc[(df['tl_x']>=0.0)&(df['breed']=='IB'),\
                                           ['video', 'frame']\
                                          ].groupby(['video']\
                                                    ).nunique().sum().values[0].astype(int)
sum_frames_with_marker_data_DW = df.loc[(df['tl_x']>=0.0)&(df['breed']=='DW'),\
                                           ['video', 'frame']\
                                          ].groupby(['video']\
                                                    ).nunique().sum().values[0].astype(int)
print('nr frames with marker tracks: ', sum_frames_with_marker_data)
print('nr frames with marker tracks IB: ', sum_frames_with_marker_data_IB, \
      ' which is', round(sum_frames_with_marker_data_IB/sum_frames_with_marker_data*100,1), '%')
print('nr frames with marker tracks DW: ', sum_frames_with_marker_data_DW, \
      ' which is', round(sum_frames_with_marker_data_DW/sum_frames_with_marker_data*100,1), '%')

nr frames with marker tracks:  139893
nr frames with marker tracks IB:  81653  which is 58.4 %
nr frames with marker tracks DW:  58240  which is 41.6 %


In [11]:
nr_markers = df.tl_x.count()
nr_markers_IB = df.loc[df['breed']=='IB'].tl_x.count()
nr_markers_DW = df.loc[df['breed']=='DW'].tl_x.count()
print('nr markers: ', nr_markers)
print('nr markers IB: ', nr_markers_IB, ' which is', round(nr_markers_IB/nr_markers*100,1), '%')
print('nr markers DW: ', nr_markers_DW, ' which is', round(nr_markers_DW/nr_markers*100,1), '%')

nr markers:  648772
nr markers IB:  252977  which is 39.0 %
nr markers DW:  395795  which is 61.0 %


In [12]:
sum_frames_with_data = df.loc[:,['video', 'frame']\
                             ].groupby(['video']\
                                       ).nunique().sum().values[0].astype(int)
sum_frames_with_data_IB = df.loc[(df['breed']=='IB'),\
                                 ['video', 'frame']\
                                ].groupby(['video']\
                                          ).nunique().sum().values[0].astype(int)
sum_frames_with_data_DW = df.loc[(df['breed']=='DW'),\
                                 ['video', 'frame']\
                                ].groupby(['video']\
                                          ).nunique().sum().values[0].astype(int)
print('nr frames with any positions: ', sum_frames_with_data)
print('nr frames with any positions IB: ', sum_frames_with_data_IB, \
      ' which is', round(sum_frames_with_data_IB/sum_frames_with_data*100,1), '%')
print('nr frames with any positions DW: ', sum_frames_with_data_DW, \
      ' which is', round(sum_frames_with_data_DW/sum_frames_with_data*100,1), '%')

nr frames with any positions:  140703
nr frames with any positions IB:  82367  which is 58.5 %
nr frames with any positions DW:  58336  which is 41.5 %


In [13]:
sum_frames_with_marker_and_annotations = df.loc[(df['tl_x']>=0.0)&(df['bb_top']>=0.0),\
                                                ['video', 'frame']\
                                                ].groupby(['video']\
                                                          ).nunique().sum().values[0].astype(int)
sum_frames_with_marker_and_annotations_IB = df.loc[(df['tl_x']>=0.0)&(df['bb_top']>=0.0)&(df['breed']=='IB'),\
                                                ['video', 'frame']\
                                                ].groupby(['video']\
                                                          ).nunique().sum().values[0].astype(int)
sum_frames_with_marker_and_annotations_DW = df.loc[(df['tl_x']>=0.0)&(df['bb_top']>=0.0)&(df['breed']=='DW'),\
                                                ['video', 'frame']\
                                                ].groupby(['video']\
                                                          ).nunique().sum().values[0].astype(int)

print('nr frames with both positions: ', sum_frames_with_marker_and_annotations)
print('nr frames with both positions IB: ', sum_frames_with_marker_and_annotations_IB, \
      ' which is', round(sum_frames_with_marker_and_annotations_IB/sum_frames_with_marker_and_annotations*100,1), '%')
print('nr frames with both positions DW: ', sum_frames_with_marker_and_annotations_DW, \
      ' which is', round(sum_frames_with_marker_and_annotations_DW/sum_frames_with_marker_and_annotations*100,1), '%')


nr frames with both positions:  7003
nr frames with both positions IB:  4093  which is 58.4 %
nr frames with both positions DW:  2910  which is 41.6 %


## 4.2 Table 2 expected vs found

#### naming of animals requires calculation in 2 steps:
since names of animals are not unique, we sum data per video.

We could also have done it per bird per video, but this way we could (now excluded) also check for any strong deviations in the complete df for videos. As expected we did not see any, but this way we could check.


### 4.2.A column to distinguish between unidentified animals per video
since te same code is used for all unidentified birds, we link the manual annotation number.

In [14]:
df['combiID'] = df['ID_CVAT'].astype(str) + '_' + df['ID_ArUco'].astype(str)
df.head()

Unnamed: 0,Unnamed: 0.1,index,frame,ID_x,top_left,top_right,bottom_right,bottom_left,path_and_filename_x,video,...,corresponding_annotated_frame,x_BB,y_BB,cor_BB_x,cor_BB_y,dist_BB_ArUco,delete,cvat_data,visible_inter,combiID
0,0,0,1.0,12.0,"(292.0, 849.0)","(289.0, 821.0)","(308.0, 818.0)","(311.0, 847.0)",ArUco/01_20220108115951_part1_1_ArUco_tracks.csv,01_20220108115951_part1_1,...,0,,,,,,,,1.0,1.0_12.0
1,1,1,2.0,12.0,"(292.0, 849.0)","(289.0, 821.0)","(307.0, 818.0)","(311.0, 846.0)",ArUco/01_20220108115951_part1_1_ArUco_tracks.csv,01_20220108115951_part1_1,...,0,,,,,,,,1.0,1.0_12.0
2,2,2,3.0,12.0,"(293.0, 849.0)","(289.0, 821.0)","(308.0, 818.0)","(311.0, 846.0)",ArUco/01_20220108115951_part1_1_ArUco_tracks.csv,01_20220108115951_part1_1,...,0,,,,,,,,1.0,1.0_12.0
3,3,3,4.0,12.0,"(293.0, 849.0)","(290.0, 821.0)","(308.0, 818.0)","(312.0, 845.0)",ArUco/01_20220108115951_part1_1_ArUco_tracks.csv,01_20220108115951_part1_1,...,0,,,,,,,,1.0,1.0_12.0
4,4,4,5.0,12.0,"(294.0, 848.0)","(290.0, 820.0)","(309.0, 817.0)","(312.0, 845.0)",ArUco/01_20220108115951_part1_1_ArUco_tracks.csv,01_20220108115951_part1_1,...,0,,,,,,,,1.0,1.0_12.0


### 4.2.B.1 tracking quality per video - groupby element animals_p_vid

In [15]:
# groupby data per video:
animals_p_vid = df[['video', 'nr_animals', 'breed', 'pen']\
                  ].groupby('video').first().reset_index()
animals_p_vid.sample(5)

Unnamed: 0,video,nr_animals,breed,pen
35,06_20211217112157_part1_3,9,IB,2
18,02_20220111065955_part2_WC,10,DW,19
37,06_20211217112157_part1_5,9,IB,2
27,05_20220108114710_part_2,9,IB,10
57,11_20220108105951_part1_4,10,IB,14


### 4.2.B.2 add to groupby element animals_p_vid
in the next part we add columns with aggregated data per video.
Important: I checked every added column. For these, all videos contain data and thus adding information to a groupby element this way is possible.


In [16]:
# get the number of unique ArUco Ids per video - unidentified animals not included
animals_p_vid['nr_ArUco'] = df.loc[(df['ID_ArUco']!=9999), ['video', 'ID_ArUco']\
                                ].groupby(['video']).nunique().reset_index()['ID_ArUco']
# get the unique number of frames per video with data
animals_p_vid['frames'] = df.loc[(df['ID_ArUco']!=9999), ['video', 'frame']\
                                ].groupby(['video']).nunique().reset_index()['frame']
# get the unique number of annotated frames with  per video
animals_p_vid['annotated_frames'] = df.loc[(df['ID_ArUco']!=9999), ['video', 'frame_tracked']\
                                          ].groupby(['video']).nunique().reset_index()['frame_tracked']
# get the number of annotated animals per video
animals_p_vid['annotated_ID'] = df.loc[:, ['video', 'combiID']\
                                ].groupby(['video']).nunique().reset_index()['combiID']
# get the number of hidden animals from the available columns
animals_p_vid['hidden'] = animals_p_vid['nr_animals'] - animals_p_vid['annotated_ID']
# get the number of visible and identified animals
animals_p_vid['annotated_and_readable_ID'] = df.loc[(df['ID_ArUco']!=9999), ['video', 'combiID']\
                                                    ].groupby(['video']).nunique().reset_index()['combiID']
# get the number of unidentified animals per video
# since we have all information already, we choose the 2nd calculation option:
# animals_p_vid['unreadable'] = df.loc[(df['ID_ArUco']==9999), ['video', 'ID_CVAT']\
#                                 ].groupby(['video']).nunique().reset_index()['ID_CVAT']
animals_p_vid['unreadable2'] = animals_p_vid['annotated_ID'] - animals_p_vid['nr_ArUco']
animals_p_vid.sample(5) # let's have a look

Unnamed: 0,video,nr_animals,breed,pen,nr_ArUco,frames,annotated_frames,annotated_ID,hidden,annotated_and_readable_ID,unreadable2
35,06_20211217112157_part1_3,9,IB,2,9,2392,120,9,0,9,0
47,07_20220108115953_part2_6,10,DW,12,9,2400,120,10,0,9,1
33,06_20211217112157_part1_10,9,IB,2,7,2396,120,9,0,7,2
58,11_20220108105951_part1_5,10,IB,14,7,2397,120,10,0,7,3
24,03_20220108075951_part2_5,11,DW,17,10,2366,119,10,1,10,0


### 4.2.B.3 add to groupby element animals_p_vid
in the next part we add columns with aggregated data per video.
Important: Here I noticed the aggregated column does not match the index. therefore we make a new frame and then merge it back.

In [17]:
# get the total number of tracked frames:
frames_gb = df[['frame', 'video']].groupby('video').count().reset_index()
animals_p_vid = animals_p_vid.merge(frames_gb, on='video') # merge to groupby
# get the number of frames (tracked ArUco) on which the animals were visible
frames_gb2 = df.loc[df['visible_inter']==1.0,['frame', 'video']\
                    ].groupby('video').count().reset_index()
frames_gb2['frames_visible'] = frames_gb2['frame'] # rename for name 'frame' is used
animals_p_vid = animals_p_vid.merge(frames_gb2[['frames_visible','video']],\
                                    on='video', how='outer') # merge to groupby
# get the number of frames (tracked ArUco) on which the animals were NOT visible
frames_gb3 = df.loc[df['visible_inter']!=1.0,['frame', 'video']\
                    ].groupby('video').count().reset_index()
frames_gb3['frames_not_visible'] = frames_gb3['frame'] # rename for name 'frame' is used
animals_p_vid = animals_p_vid.merge(frames_gb3[['frames_not_visible','video']\
                                              ], on='video', how='outer') # merge to groupby
# get the number of annotated frames (manual CVAT bounding boxes) marked as visible
frames_gb4 = df.loc[df['visible_inter']==1.0,['frame_tracked', 'video']\
                    ].groupby('video').count().reset_index()
frames_gb4['annotations_visible'] = frames_gb4['frame_tracked'] # rename for name 'frame_tracked' doesn't explain 
animals_p_vid = animals_p_vid.merge(frames_gb4[['annotations_visible','video']\
                                              ], on='video', how='outer')# merge to groupby
# get the number of annotated frames (manual CVAT bounding boxes) marked as NOT visible
frames_gb5 = df.loc[df['visible_inter']!=1.0,['frame_tracked', 'video']\
                    ].groupby('video').count().reset_index()
frames_gb5['annotations_occluded'] = frames_gb5['frame_tracked'] # rename for name 'frame_tracked' doesn't explain 
animals_p_vid = animals_p_vid.merge(frames_gb5[['annotations_occluded','video']\
                                              ], on='video', how='outer') # merge to groupby
animals_p_vid.sample(5) # always have a look

Unnamed: 0,video,nr_animals,breed,pen,nr_ArUco,frames,annotated_frames,annotated_ID,hidden,annotated_and_readable_ID,unreadable2,frame,frames_visible,frames_not_visible,annotations_visible,annotations_occluded
26,05_20220108114710_part_1,9,IB,10,7,2401,121,9,0,7,2,6620,6607,13.0,1076,13.0
27,05_20220108114710_part_2,9,IB,10,5,2063,120,9,0,5,4,3757,3688,69.0,1011,69.0
19,02_20220111112402_part2,11,IB,3,7,2455,123,9,2,7,2,7009,6999,10.0,1090,10.0
55,11_20220108105951_part1_2,10,IB,14,9,2394,120,10,0,9,1,11215,11014,201.0,1097,91.0
3,01_20220108115951_part1_4,11,DW,9,10,2402,121,10,1,10,0,18287,18192,95.0,1120,90.0


### 4.2.B.4 add to groupby element animals_p_vid
For the final columns to be made, we use the existing columns

In [18]:
# since there is no data on hidden animals, but it should be there, we calculate it.
# completely hidden animals should have contributed a nr of frames:
animals_p_vid['hidden_missing_annotations'] = animals_p_vid['hidden'] * animals_p_vid['annotated_frames']
# get the maximum number of possible annotations:
animals_p_vid['expected_annotations_max'] = animals_p_vid['annotated_frames'] * animals_p_vid['nr_animals']
# get the number ot annotations for animals that are visible at least once in a video, but they hide completely.
animals_p_vid['partly_hidden_missing_annotations'] = animals_p_vid['expected_annotations_max'] - animals_p_vid['annotations_visible'] - animals_p_vid['annotations_occluded'] - animals_p_vid['hidden_missing_annotations']
animals_p_vid.sample(5) # let's have a look

Unnamed: 0,video,nr_animals,breed,pen,nr_ArUco,frames,annotated_frames,annotated_ID,hidden,annotated_and_readable_ID,unreadable2,frame,frames_visible,frames_not_visible,annotations_visible,annotations_occluded,hidden_missing_annotations,expected_annotations_max,partly_hidden_missing_annotations
47,07_20220108115953_part2_6,10,DW,12,9,2400,120,10,0,9,1,14726,14599,127.0,1073,127.0,0,1200,0.0
54,11_20220108105951_part1_1,10,IB,14,8,2400,120,9,1,8,1,11762,10911,851.0,960,117.0,120,1200,3.0
44,07_20220108115953_part2_3,10,DW,12,9,2402,121,10,0,9,1,15592,15456,136.0,1087,123.0,0,1210,0.0
14,02_20220111065954_part2_BD,10,IB,3,4,1270,121,10,0,4,6,2377,2173,204.0,1012,198.0,0,1210,0.0
43,07_20220108115953_part2_2,10,DW,12,9,887,120,10,0,9,1,6299,6095,204.0,996,204.0,0,1200,0.0


### 4.2.B.5 - results Table 2
based on the aggregated information per video, we calculate the results regarding the visibility of animals.

In [19]:
max_annotations_expected_total = animals_p_vid.expected_annotations_max.sum()
max_annotations_expected_IB = animals_p_vid[animals_p_vid['breed']=='IB'].expected_annotations_max.sum()
max_annotations_expected_DW = animals_p_vid[animals_p_vid['breed']=='DW'].expected_annotations_max.sum()
print('max nr annotations are total: ', max_annotations_expected_total,\
      ' IB: ', max_annotations_expected_IB, ' DW: ', max_annotations_expected_DW)

hidden_total = animals_p_vid.hidden_missing_annotations.sum() + animals_p_vid.partly_hidden_missing_annotations.sum()
hidden_IB = animals_p_vid[animals_p_vid['breed']=='IB'].hidden_missing_annotations.sum() + \
animals_p_vid[animals_p_vid['breed']=='IB'].partly_hidden_missing_annotations.sum()
hidden_DW = animals_p_vid[animals_p_vid['breed']=='DW'].hidden_missing_annotations.sum() + \
animals_p_vid[animals_p_vid['breed']=='DW'].partly_hidden_missing_annotations.sum()
print('max nr missing annotations are total: ', hidden_total,\
      ' IB: ', hidden_IB, ' DW: ', hidden_DW)

perc_hidden_total = round((hidden_total / max_annotations_expected_total)*100,1)
perc_hidden_IB = round((hidden_IB / max_annotations_expected_IB)*100,1)
perc_hidden_DW = round((hidden_DW / max_annotations_expected_DW)*100,1)

print('max % missing annotations are total: ', perc_hidden_total, '%',\
      ' IB: ', perc_hidden_IB, '%', ' DW: ', perc_hidden_DW, '%')

occluded_total = animals_p_vid.annotations_occluded.sum()
occluded_IB = animals_p_vid[animals_p_vid['breed']=='IB'].annotations_occluded.sum()
occluded_DW = animals_p_vid[animals_p_vid['breed']=='DW'].annotations_occluded.sum()
print('max nr missing annotations are total: ', occluded_total,\
      ' IB: ', occluded_IB, ' DW: ', occluded_DW)

perc_occluded_total = round((occluded_total / max_annotations_expected_total)*100,1)
perc_occluded_IB = round((occluded_IB / max_annotations_expected_IB)*100,1)
perc_occluded_DW = round((occluded_DW / max_annotations_expected_DW)*100,1)
print('max % missing annotations are total: ', perc_occluded_total, '%',\
      ' IB: ', perc_occluded_IB, '%', ' DW: ', perc_occluded_DW, '%')

visible_total = animals_p_vid.annotations_visible.sum()
visible_IB = animals_p_vid[animals_p_vid['breed']=='IB'].annotations_visible.sum()
visible_DW = animals_p_vid[animals_p_vid['breed']=='DW'].annotations_visible.sum()
print('max nr occluded annotations are total: ', visible_total,\
      ' IB: ', visible_IB, ' DW: ', visible_DW)

perc_visible_total = round((visible_total / max_annotations_expected_total)*100,1)
perc_visible_IB = round((visible_IB / max_annotations_expected_IB)*100,1)
perc_visible_DW = round((visible_DW / max_annotations_expected_DW)*100,1)
print('max % visible annotations are total: ', perc_visible_total, '%',\
      ' IB: ', perc_visible_IB, '%', ' DW: ', perc_visible_DW, '%')


max nr annotations are total:  76572  IB:  44832  DW:  31740
max nr missing annotations are total:  5084.0  IB:  3299.0  DW:  1785.0
max % missing annotations are total:  6.6 %  IB:  7.4 %  DW:  5.6 %
max nr missing annotations are total:  8216.0  IB:  4309.0  DW:  3907.0
max % missing annotations are total:  10.7 %  IB:  9.6 %  DW:  12.3 %
max nr occluded annotations are total:  63272  IB:  37224  DW:  26048
max % visible annotations are total:  82.6 %  IB:  83.0 %  DW:  82.1 %


## 4.3 - Returned markers of Table 3
calculated directly from the original data frame, we calculate these numbers on the percentages of found markers.

### 4.3.A Results Table 3 - Expected markers found

In [20]:
# nr ArUco found visible animals.
# tl_x is used for it only occurs in the ArUco data
nr_ArUco_markers_found_on_visible = df.loc[(df['visible_inter']==1.0) & (df['tl_x']>=0), ['tl_x']].count()[0]
nr_ArUco_markers_found_on_visible_IB = df.loc[(df['visible_inter']==1.0) & (df['tl_x']>=0) & (df['breed']=='IB'),\
                                              ['tl_x']].count()[0]
nr_ArUco_markers_found_on_visible_DW = df.loc[(df['visible_inter']==1.0) & (df['tl_x']>=0) & (df['breed']=='DW'),\
                                              ['tl_x']].count()[0]
# now per hybrid and for the total let's look at visibility:
nr_visible_annotations = df.loc[(df['visible']==1.0), ['x_BB']].count()[0]
nr_visible_annotations_IB = df.loc[(df['visible']==1.0) & (df['breed']=='IB'), ['x_BB']].count()[0]
nr_visible_annotations_DW = df.loc[(df['visible']==1.0) & (df['breed']=='DW'), ['x_BB']].count()[0]

fps = 20 # required for the calculation

expected_markers_found = round((nr_ArUco_markers_found_on_visible/(nr_visible_annotations*fps))*100,1)
expected_markers_found_IB = round((nr_ArUco_markers_found_on_visible_IB/(nr_visible_annotations_IB*fps))*100,1)
expected_markers_found_DW = round((nr_ArUco_markers_found_on_visible_DW/(nr_visible_annotations_DW*fps))*100,1)

print('% expected_markers_found', expected_markers_found, '%')
print('% expected_markers_found_IB', expected_markers_found_IB, '%')
print('% expected_markers_found_DW', expected_markers_found_DW, '%')

% expected_markers_found 51.0 %
% expected_markers_found_IB 33.8 %
% expected_markers_found_DW 75.6 %


### 4.3.B Results Table 3 - ArUco found compared to maximum
This will be an important fraction for in any normal situation this is what we have.

In [21]:
# per video nr animals and nr frames
gb_for_max = df.loc[:,['video', 'nr_animals', 'breed']].groupby(['video']).first().reset_index()
gb_for_max['frames'] = df.loc[:,['video', 'frame_tracked']].groupby(['video']).nunique().reset_index()['frame_tracked']
gb_for_max['max_annotations'] = gb_for_max['nr_animals']*gb_for_max['frames']

maximum_nr_frames = gb_for_max.max_annotations.sum() * fps
maximum_nr_frames_IB = gb_for_max[gb_for_max['breed']=='IB'].max_annotations.sum() * fps
maximum_nr_frames_DW = gb_for_max[gb_for_max['breed']=='DW'].max_annotations.sum() * fps

# print('maximum_nr_frames', maximum_nr_frames)
# print('maximum_nr_frames_IB', maximum_nr_frames_IB)
# print('maximum_nr_frames_DW', maximum_nr_frames_DW)

print('ArUco found compared to maximum: ',\
      round(nr_ArUco_markers_found_on_visible/(maximum_nr_frames)*100,1), '%')
print('ArUco found compared to maximum IB: ',\
      round(nr_ArUco_markers_found_on_visible_IB/(maximum_nr_frames_IB)*100,1), '%')
print('ArUco found compared to maximum DW: ',\
      round(nr_ArUco_markers_found_on_visible_DW/(maximum_nr_frames_DW)*100,1), '%')

ArUco found compared to maximum:  42.2 %
ArUco found compared to maximum IB:  28.1 %
ArUco found compared to maximum DW:  62.1 %


### 4.3.C Results Table 3 - Identity linked within 2 minute video
This will be an important fraction for in any normal situation this is what we have.

In [24]:
# since some animals will have no information, we add information to the groupby element via groupby-merge
#first the number observed animals
nr_observed = df.loc[:,['video', 'ID_CVAT']].groupby(['video']).nunique().reset_index()
nr_observed['nr_observed_animals'] = nr_observed['ID_CVAT'] # rename
gb_for_max = gb_for_max.merge(nr_observed[['video','nr_observed_animals']], how='outer')
# number of animals identified per video
nr_ArUco_pv = df.loc[df['ID_ArUco']<=40,['video', 'ID_CVAT']].groupby(['video']).nunique().reset_index()
nr_ArUco_pv['nr_IDs_identified'] = nr_ArUco_pv['ID_CVAT']
gb_for_max = gb_for_max.merge(nr_ArUco_pv[['video','nr_IDs_identified']], how='outer')
# number of animals not identified
nr_ArUco_pv2 = df.loc[df['ID_ArUco']==9999,['video', 'ID_CVAT']].groupby(['video']).nunique().reset_index()
nr_ArUco_pv2['nr_IDs_not_identified'] = nr_ArUco_pv2['ID_CVAT']
gb_for_max = gb_for_max.merge(nr_ArUco_pv2[['video','nr_IDs_not_identified']], how='outer')

# small test to see if it all worked:
gb_for_max['test'] = gb_for_max['nr_observed_animals'] - gb_for_max['nr_IDs_identified'] - gb_for_max['nr_IDs_not_identified']
print('small test to see if calculations are correct: should say zero: ', gb_for_max.test.sum())
print('okay great - at least for me')


small test to see if calculations are correct: should say zero:  0.0
okay great - at least for me


In [25]:
perc_ID_linked_pv = round((gb_for_max.nr_IDs_identified.sum() / gb_for_max.nr_observed_animals.sum())*100,1)
perc_ID_linked_pv_IB = round((gb_for_max[gb_for_max['breed']=='IB'].nr_IDs_identified.sum() / \
                              gb_for_max[gb_for_max['breed']=='IB'].nr_observed_animals.sum())*100,1)
perc_ID_linked_pv_DW = round((gb_for_max[gb_for_max['breed']=='DW'].nr_IDs_identified.sum() / \
                              gb_for_max[gb_for_max['breed']=='DW'].nr_observed_animals.sum())*100,1)
print('perc_ID_linked_pv', perc_ID_linked_pv, '%')
print('perc_ID_linked_pv_IB', perc_ID_linked_pv_IB, '%')
print('perc_ID_linked_pv_DW', perc_ID_linked_pv_DW, '%')

perc_ID_linked_pv 75.2 %
perc_ID_linked_pv_IB 61.5 %
perc_ID_linked_pv_DW 94.0 %
