In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
%load_ext autoreload
%autoreload 2

In [19]:
import sys
sys.path.append('..')

import os
import numpy as np
from analysis import info_datasets, unique_datasets_list, add_paths

root_dataset = os.path.join('/mnt', 'data', 'turtles', 'datasets', 'datasets')
root_dataframe = os.path.join('/mnt', 'data', 'turtles', 'datasets', 'dataframes')

info_datasets = unique_datasets_list(info_datasets)
info_datasets = add_paths(info_datasets, root_dataset, root_dataframe)

datasets = []
for info_dataset in info_datasets:
    dataset = info_dataset[0].from_file(info_dataset[2], info_dataset[3], **info_dataset[1])
    datasets.append(dataset)

In [28]:
[x**2 for x in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [42]:
import pandas as pd

def number_individuals(df):
    identities = df['identity'].unique()
    if 'unknown' in list(identities):
        return len(identities)-1
    else:
        return len(identities)

def number_small(df, ratio=0.1):
    df_red = df[df['identity'] != 'unknown']
    counts = df_red['identity'].value_counts().to_numpy()
    return np.sum(counts <= ratio*np.max(counts))

def average_photo_number(df):
    df_red = df[df['identity'] != 'unknown']
    counts = df_red['identity'].value_counts().to_numpy()
    return '$' + str(np.round(np.mean(counts), 1)) + '\pm' + str(np.round(np.std(counts), 1)) + '$'

def get_checkmark(condition):
    if condition:
        return r'\cmark'
    else:
        return r'\xmark'
    
def create_dict(i):
    return {
        'name': i.__class__.__name__ + ' \cite{' + i.cite + '}',
        #'source': '\cite{' + i.cite + '}',
        'year': i.year,
        'images': len(i.df[i.df['identity'] != 'unknown']),
        'individuals': number_individuals(i.df),
        'individuals small': number_small(i.df),
        'span': i.span,
        'wild': get_checkmark(i.wild),
        'pattern': get_checkmark(i.unique_pattern),
        'full frame': get_checkmark(i.full_frame),     
    }

df = pd.DataFrame([create_dict(dataset) for dataset in datasets])

latex = df.to_latex(index=False, escape = False, float_format="%.0f")
print(latex)

\begin{tabular}{lrrrrllll}
\toprule
                                            name &  year &   images &  individuals &  individuals small &        span &    wild & pattern & full frame \\
\midrule
            AAUZebraFishID \cite{bruslund2020re} &  2020 &     6672 &            6 &                  0 &       1 day &  \xmark &  \xmark &     \cmark \\
        AerialCattle2017 \cite{andrew2017visual} &  2017 &    46340 &           23 &                  4 &       1 day &  \xmark &  \cmark &     \xmark \\
                          ATRW \cite{li2019atrw} &  2019 &     5415 &          182 &                121 &       short &  \xmark &  \cmark &     \cmark \\
                        BelugaID \cite{belugaid} &  2022 &     5902 &          788 &                635 &   2.1 years &  \cmark &  \xmark &     \xmark \\
        BirdIndividualID \cite{ferreira2020deep} &  2019 &    51934 &           50 &                  0 &     15 days &  \xmark &  \xmark &     \cmark \\
               CTai \cite{freyt

In [40]:
[dataset.wild for dataset in datasets]

[False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 True]

In [None]:
import numpy as np
import matplotlib.pyplot as plt

year_min = 2016
year_max = 2022

years = np.array([dataset.year for dataset in datasets])
counts = [sum(years < year_min)]
for year in range(year_min, year_max+1):
    counts.append(sum(year == years))
y_max = np.max(counts)
    
xlabel = np.concatenate((['older'], np.array(range(year_min, year_max+1))))
plt.plot(counts)
plt.xticks(ticks=range(len(xlabel)), labels=xlabel);
plt.yticks(ticks=range(0,y_max+1), labels=range(0,y_max+1));
plt.savefig('counts.jpg', bbox_inches = 'tight')