# Test-EBird-Dec2020

# Description
Examine bulk data from eBird for December 2020

https://ebird.org/science/use-ebird-data/download-ebird-data-products  
https://ebird.org/data/download  


# Environment

## Library Imports

In [None]:
import warnings
# warnings.simplefilter('always') # 'error' to break\n", \"always\"

warnings.filterwarnings("ignore", category=RuntimeWarning, module='geopandas')

In [None]:
import sys
sys.path.append('common')
sys.path.append('textextractor')
sys.path.append('taxonomy')

import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import geopandas as gpd

from shapely import geometry
from shapely.geometry import Point

## Local Imports

In [None]:
import common_jupyter

# https://medium.com/@rrfd/cookiecutter-data-science-organize-your-projects-atom-and-jupyter-2be7862f487e
from common_paths import *

from local_translation_context import LocalTranslationContext
from taxonomy import Taxonomy
from ebird_extras import EBirdExtra
from parameters import Parameters

from ebird_basic_dataset import use_basic_dataset

from count_day_tasks import summarize_checklists, create_full_circle_summary, get_participants, \
    subids_for_pete_dunten, add_bob_hirt, get_personal_checklist_details

from datetime_manipulation import create_count_week

from locations_map import create_coverage_map, create_potential_duplicates_map
from utilities_kml import build_geodata, build_location_data, update_geo_data_with_clustering, build_location_meta
from ebird_visits import transform_visits, visits_in_circle
from checklist_manipulation import create_checklist_meta, write_checklist_meta, find_location_near_duplicates

# Code

In [None]:
def visits_in_circle(ebirders, geo_data, circle_code, visits):
    # Also filters by participants
    circle_geometry = geo_data[(geo_data.CircleCode == circle_code) &
                               (geo_data.type == 'circle')].geometry.values[0]

    # Note that by construction, visits only contains data for dates we care about
    # so we don't need to filter for that. We pass them to get_details grouped by date though.
    mask = [pt.within(circle_geometry) for pt in visits.geometry.values]
    if ebirders is not None:
        mask &= visits.Name.isin(ebirders)
    visits_of_interest = visits[mask].sort_values(by=['locId'])

    return visits_of_interest


# Initializations

In [None]:
# Initializations
print(f'Start : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
print('Initializing...')

create_project_paths()

# Overrides - Credentials
# See Samples/eBirdCredentials.yml for an example
my_credentials_storage_path = Path('/Volumes/TSecure3/other/')
eBirdCredential_path = my_credentials_storage_path / 'eBirdCredentials.yml'

# -----------------------------------------------------------------------------------------
# Override - This will find the correct parameter file out of many in Local folder
# -----------------------------------------------------------------------------------------
circle_prefix = 'CACR-2020-'

# Parameters
parameters = Parameters(local_parameters_path, system_parameters_path, circle_prefix, False)

local_translation_context = LocalTranslationContext(local_parameters_path, system_parameters_path)
local_translation_context.reload() # DEBUG; allows test/edit cycle without restarting kernel (singleton)

# Singletons
country = parameters.parameters.get('NationalCode', 'US')
ebird_extra = EBirdExtra(eBirdCredential_path, cache_path, country)
taxonomy = Taxonomy(cache_path, ebird_extra)

# Convenient Parameters
circle_code = parameters.parameters.get('CircleAbbrev', 'XXXX')
date_of_count = parameters.parameters['CountDate']
count_week_start = parameters.parameters.get('CountWeekStart', date_of_count)
count_week_end = parameters.parameters.get('CountWeekEnd', date_of_count)
region_codes = [xs.strip() for xs in parameters.parameters['eBirdRegion'].split(',')]

# Will drop any dates in the future
count_week = create_count_week(count_week_start, count_week_end)

print('Initialization complete')

# Main

In [None]:
if __name__ == '__main__':

    geo_data = build_geodata(parameters)
    
    # May need bootstrapping
    participants = get_participants(circle_prefix)

    xdates = [date_of_count] #if count_day_only else count_week
    visits = ebird_extra.get_visits_for_dates(region_codes, xdates)
    print(f'Checklists filed in count circle: {visits.shape[0]}')
    visits = transform_visits(visits)
    
    # Add bulk data extras here
    visits = use_basic_dataset(visits, xdates, region_codes)
    print(f'Checklists after basic dataset: {visits.shape[0]}')

    visits_of_interest = visits_in_circle(participants, geo_data, circle_code, visits)
    visits_of_interest.shape, visits.shape

In [None]:
1/0

# Experiments