In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd

## Download module
- Download is done with context manager that uses download directory as cwd.
    - Lot of repetetive code is reused.
    - Functions then be easily used in code / jupyter notebooks without any path issues.
- Download scripts are accessable from the 'dowload' namespace using dot notation.


In [2]:
from wildlife_datasets import downloads

# 'downloads' module contains all download scripts.
downloads.friesian_cattle_2015

# Then all download scripts can be used in code as follows:
downloads.test.get_data('data/Test')

10.txt.utf-8: 4.46MB [00:01, 2.72MB/s]                            


## Dataset module

In [3]:
from wildlife_datasets import datasets

# Use download script from within dataset class
datasets.Test.download.get_data('data/Test')

10.txt.utf-8: 4.46MB [00:01, 2.67MB/s]                            


### Metadata
- metadata are created as csv table:
- metadata for each class are dictionary. Missing values should be handled via ***get('item', None)***

In [4]:
table = [
    dict(    
        name = 'AerialCattle2017',
        licenses = 'Non-Commercial Government Licence for public sector information',
        licenses_url = 'https://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/',
        url = 'https://data.bris.ac.uk/data/dataset/3owflku95bxsx24643cybxu3qh',
        cite = 'andrew2017visual',
        animals = ['Friesian cattle'],
        real_animals = True,
        year = 2017,
        reported_n_total = 46340,
        reported_n_identified = 46340,
        reported_n_photos = 46340,
        reported_n_individuals = 23,
        wild = False,
        clear_photos = True,
        pose = 'single', # from the top
        unique_pattern = True,
        from_video = True,
        full_frame = False,
        ),
    dict(
        name = 'FriesianCattle2015',
        licenses = 'Non-Commercial Government Licence for public sector information',
        licenses_url = 'https://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/',
        url = 'https://data.bris.ac.uk/data/dataset/wurzq71kfm561ljahbwjhx9n3',
        cite = 'andrew2016automatic',
        animals = ['Friesian cattle'],
        real_animals = True,
        year = 2016,
        reported_n_total = 83+294, # train+test
        reported_n_identified = 83+294, # train+test
        reported_n_photos = 83+294, # train+test
        reported_n_individuals = 40,
        wild = False,
        clear_photos = True,
        pose = 'single', # from the top
        unique_pattern = True,
        from_video = True,
        full_frame = False,
        span = '1 day',
        comments = (
            'reported_n_total: 83 train + 294 test. '
            'reported_n_identified: 83 train + 294 test. '
            'reported_n_photos: 83 train + 294 test. '
            )
    ),
    dict(
        name='Test',
    )
]

df = pd.DataFrame(table)
df = df.set_index('name')
df.to_csv('wildlife_datasets/datasets/metadata.csv')

In [6]:
# this table is used in metadata object
datasets.metadata['FriesianCattle2015']

{'licenses': 'Non-Commercial Government Licence for public sector information',
 'licenses_url': 'https://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/',
 'url': 'https://data.bris.ac.uk/data/dataset/wurzq71kfm561ljahbwjhx9n3',
 'cite': 'andrew2016automatic',
 'animals': "['Friesian cattle']",
 'real_animals': True,
 'year': 2016.0,
 'reported_n_total': 377.0,
 'reported_n_identified': 377.0,
 'reported_n_photos': 377.0,
 'reported_n_individuals': 40.0,
 'wild': False,
 'clear_photos': True,
 'pose': 'single',
 'unique_pattern': True,
 'from_video': True,
 'full_frame': False,
 'span': '1 day',
 'comments': 'reported_n_total: 83 train + 294 test. reported_n_identified: 83 train + 294 test. reported_n_photos: 83 train + 294 test. '}

In [7]:
# and is accessable as classatrribute
datasets.FriesianCattle2015.metadata

{'licenses': 'Non-Commercial Government Licence for public sector information',
 'licenses_url': 'https://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/',
 'url': 'https://data.bris.ac.uk/data/dataset/wurzq71kfm561ljahbwjhx9n3',
 'cite': 'andrew2016automatic',
 'animals': "['Friesian cattle']",
 'real_animals': True,
 'year': 2016.0,
 'reported_n_total': 377.0,
 'reported_n_identified': 377.0,
 'reported_n_photos': 377.0,
 'reported_n_individuals': 40.0,
 'wild': False,
 'clear_photos': True,
 'pose': 'single',
 'unique_pattern': True,
 'from_video': True,
 'full_frame': False,
 'span': '1 day',
 'comments': 'reported_n_total: 83 train + 294 test. reported_n_identified: 83 train + 294 test. reported_n_photos: 83 train + 294 test. '}