In [None]:
import sys
sys.path.append('..')

from wildlife_datasets import datasets, loader

root_dataset = '/mnt/data/turtles/datasets/datasets'
root_dataframe = '/mnt/data/turtles/datasets/dataframes'

dataset_names = [dataset_name for dataset_name in datasets.dataset_names]
dataset_names = [dataset_name for dataset_name in datasets.dataset_names 
                    if 'Segmented' not in dataset_name.__name__
                        and 'SeaTurtleIDHeads' != dataset_name.__name__
                ]

datasets = loader.load_datasets(dataset_names, root_dataset, root_dataframe)

In [None]:
import numpy as np
import pandas as pd
import unicodedata

def number_individuals(df):
    identities = df['identity'].unique()
    if 'unknown' in list(identities):
        return len(identities)-1
    else:
        return len(identities)

def number_images(df):
    return sum(df['identity'] != 'unknown')
    
def average_photo_number(df):
    df_red = df[df['identity'] != 'unknown']
    counts = df_red['identity'].value_counts().to_numpy()
    return '$' + str(np.round(np.mean(counts), 1)) + '\pm' + str(np.round(np.std(counts), 1)) + '$'

def create_dict(i):
    return {
        'year': i.metadata['year'],
        '$n_{\\rm img}$': number_images(i.df),
        '$n_{\\rm ind}$': number_individuals(i.df),
        'span': i.metadata['span'],
        'source': 'video' if i.metadata['from_video'] else 'photo',
        'pose': i.metadata['pose'],
        'timestamp': 'date' in i.df.columns,
        'wild': i.metadata['wild'],
        #'pattern': i.metadata['unique_pattern'],
        'more species': len(i.metadata['animals']) > 1,
    }

names = [dataset.__class__.__name__ for dataset in datasets]
df = pd.DataFrame([create_dict(dataset) for dataset in datasets], index=names)

df_print = df.copy()
df_print.replace(True, "\u2714", inplace=True)
df_print.replace(False, "\u2716", inplace=True)
df_print

In [None]:
import numpy as np
import matplotlib.pyplot as plt

year_min = 2016
year_max = 2022

years = np.array([dataset.metadata['year'] for dataset in datasets])
counts = [sum(years < year_min)]
for year in range(year_min, year_max+1):
    counts.append(sum(year == years))
y_max = np.max(counts)
    
xlabel = np.concatenate((['older'], np.array(range(year_min, year_max+1))))
plt.plot(counts)
plt.xticks(ticks=range(len(xlabel)), labels=xlabel);
plt.yticks(ticks=range(0,y_max+1), labels=range(0,y_max+1));
#plt.savefig('counts.jpg', bbox_inches = 'tight')

In [None]:
def rotate_text(c, angle=90):
    return '\\rotatebox{90}{' + "\_".join(c.split("_")) + '}'

def convert_latex(df, datasets, add_citations=True):    
    df = df.copy()
    if add_citations:
        cites = [' \cite{' + dataset.metadata['cite'] + '}' for dataset in datasets]
        df.insert(loc=0, column = 'name', value=df.index+cites)
    else:
        df.insert(loc=0, column = 'name', value=df.index)
    header = [rotate_text(c) if df[c].dtypes.name == 'bool' else c for c in df.columns]
    df.replace(True, r'\cmark', inplace=True)
    df.replace(False, r'\xmark', inplace=True)
    return df.to_latex(index=False, escape=False, float_format="%.0f", header=header)

print(convert_latex(df, datasets))

In [None]:
def is_splittable(i):
    cond1 = i.metadata['pose'] == 'double'
    cond2 = False
    return cond1 | cond2

def create_dict(i):
    return {
        '$n_{\\rm img}$': int(number_images(i.df) >= 1000),
        '$n_{\\rm ind}$': int(number_individuals(i.df) >= 100),
        'span': int('year' in i.metadata['span'] or 'long' in i.metadata['span']),
        'source': int(not i.metadata['from_video']),
        'pose': int(i.metadata['pose'] != 'single'),
        'time': int('date' in i.df.columns),
        'wild': int(i.metadata['wild']),
        'tough photos': int(not i.metadata['clear_photos']),
        'splittable': int(is_splittable(i)),
    }

names = [dataset.__class__.__name__ for dataset in datasets]
df = pd.DataFrame([create_dict(dataset) for dataset in datasets], index=names)
df['difficulty'] = df.sum(axis=1).values
df

In [None]:
print(convert_latex(df, datasets, add_citations=False))