# SCVAS-Count

# Description

Count day tasks for all four SCVAS count circles  
- SAN JOSE CBC (CASJ)
- PALO ALTO CBC (CAPA)
- CALERO-MORGAN HILL (CACR)
- MOUNT HAMILTON (CAMH)

Prior to count day, run Service-Parse so that we have single and double column versions of the official checklist
for the circle. Confirm that the annotations are correct. Service-Parse looks for files named e.g. 
CASJ-2020-<otherstuff> that are CSV, Excel or PDF.

We don't have a solution this year to the list of ebird names. To work around this, create the filers_matrix first
then make the participants list from that.

# Environment

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
# warnings.simplefilter('always') # 'error' to break\n", \"always\"
# warnings.simplefilter('error') # 'error' to break\n", \"always\"


warnings.filterwarnings("ignore", category=RuntimeWarning, module='geopandas')
# /Users/john/.pyenv/versions/py386/lib/python3.8/site-packages/geopandas/geodataframe.py:422: 
# RuntimeWarning: Sequential read of iterator was interrupted. Resetting iterator. This can negatively 
# impact the performance. for feature in features_lst:

In [None]:
# /Users/john/.pyenv/versions/py387/lib/python3.8/site-packages/xlsxwriter/worksheet.py:2590: UserWarning: Must have at least one data row in in add_table()
#   warn("Must have at least one data row in in add_table()")

warnings.filterwarnings("error", "Must have at least one data row in in add_table()", category=UserWarning, module='xlsxwriter')



## Library Imports

In [None]:
import sys
sys.path.append('common')
sys.path.append('textextractor')
sys.path.append('taxonomy')

import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import geopandas as gpd

## Local Imports

In [None]:
import common_jupyter

# https://medium.com/@rrfd/cookiecutter-data-science-organize-your-projects-atom-and-jupyter-2be7862f487e
from common_paths import *

from local_translation_context import LocalTranslationContext
from taxonomy import Taxonomy
from ebird_extras import EBirdExtra
from parameters import Parameters

from count_day_tasks import summarize_checklists, create_full_circle_summary, get_participants, \
    get_personal_checklist_details, check_prerequisites, additional_count_checklists, process_additional_subids

from ebird_basic_dataset import use_basic_dataset

from datetime_manipulation import create_count_week
from checklist_manipulation import create_checklist_meta

from write_final_checklist import write_final_checklist_spreadsheet, excel_columns, \
    sheet_info_for_party_efforts, sheet_info_for_party_details, sheet_info_for_rarities, sheet_info_for_filers
from autoparty import sheet_info_for_autoparty, generate_autoparty
from locations_map import create_coverage_map, create_potential_duplicates_map
from utilities_kml import build_geodata, build_location_data, update_geo_data_with_clustering, \
    build_location_meta, add_pseudo_location_data
from ebird_visits import transform_visits, visits_in_circle
from utilities_clustering import generate_cluster_table, plot_elbow_curve
from filers_matrix import create_filers_matrix
from checklist_manipulation import create_checklist_meta, write_checklist_meta, find_location_near_duplicates
from checklist_manipulation import construct_team_details, construct_team_efforts

In [None]:
from IPython import get_ipython
ipython = get_ipython()

# autoreload extension
if 'autoreload' not in ipython.extension_manager.loaded:
    get_ipython().run_line_magic('load_ext', 'autoreload')

get_ipython().run_line_magic('autoreload', '2')

# Code

# Initialization

In [None]:
# Initializations
print(f'Start : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
print('Initializing...')

# Overrides - Credentials
# See Samples/eBirdCredentials.yml for an example
my_credentials_storage_path = Path('/Volumes/TSecure3/other/')
eBirdCredential_path = my_credentials_storage_path / 'eBirdCredentials.yml'

# -----------------------------------------------------------------------------------------
# Override - This will find the correct parameter file out of many in Local folder
# -----------------------------------------------------------------------------------------
circle_prefix = 'CAMD-2022-'

# Parameters
parameters = Parameters(local_parameters_path, system_parameters_path, circle_prefix, False)

local_translation_context = LocalTranslationContext(local_parameters_path, system_parameters_path)
local_translation_context.reload() # DEBUG; allows test/edit cycle without restarting kernel (singleton)

# Singletons
country = parameters.parameters.get('NationalCode', 'US')
ebird_extra = EBirdExtra(eBirdCredential_path, cache_path, country)
taxonomy = Taxonomy(cache_path, ebird_extra)

# Convenient Parameters
circle_code = parameters.parameters.get('CircleAbbrev', 'XXXX')
date_of_count = parameters.parameters['CountDate']
count_week_start = parameters.parameters.get('CountWeekStart', date_of_count)
count_week_end = parameters.parameters.get('CountWeekEnd', date_of_count)
region_codes = [xs.strip() for xs in parameters.parameters['eBirdRegion'].split(',')]

# Will drop any dates in the future
count_week = create_count_week(count_week_start, count_week_end)

print('Initialization complete')

# Main

In [None]:
if __name__ == '__main__':
    create_project_paths()
    
    count_day_only = True # set to False to process whole count week
    
    if not check_prerequisites(circle_prefix):
        raise
        
    geo_data = build_geodata(parameters)
    
    # May need bootstrapping
    participants = get_participants(circle_prefix)

    xdates = [date_of_count] if count_day_only else count_week
    visits = ebird_extra.get_visits_for_dates(region_codes, xdates)
    print(f'Checklists filed in count circle: {visits.shape[0]}')
    visits = transform_visits(visits)
    
    # Add bulk data extras here
    visits = use_basic_dataset(visits, xdates, region_codes)
    print(f'Checklists after basic dataset: {visits.shape[0]}')

    ######## EMCT only!
#     if participants is not None:
#         mask = visits.Name.isin(participants)
#     visits_of_interest = visits[mask].sort_values(by=['locId'])
    
    visits_of_interest = visits
    visits_of_interest = visits_in_circle(participants, geo_data, circle_code, visits)
    
    cluster_table, centers_df = None, None
    geo_data, cluster_table, centers_df = generate_cluster_table(visits_of_interest, geo_data, parameters, True)

    hotspots, center_pt = ebird_extra.get_hotspots(region_codes)
    location_data = build_location_data(hotspots, visits)
    location_data = add_pseudo_location_data(location_data, parameters)

    circle_matrix, unique_circle_filers = create_filers_matrix(circle_prefix, visits_of_interest, location_data)
        
    print('\n',', '.join(unique_circle_filers))
    print('\n***** ADJUST EBIRDERS IF NECESSARY *****\n')         

In [None]:
%%time

# Subids in e.g. CACR-2022-AdditionalSubIds.txt are one per line (not comma separated)
personal_checklists = pd.DataFrame()
additional_subids = process_additional_subids(circle_prefix, date_of_count)
# # additional_subids = ['S124229095'] # hack
if additional_subids:
    print(f'additional_subids: {len(additional_subids[date_of_count])}')
    print(f'additional_subids: {additional_subids[date_of_count]}')

# additional_subids = None # Hack
if not visits_of_interest.empty:
    personal_checklists = get_personal_checklist_details(visits_of_interest,
                                       xdates, additional_subids,
                                       ebird_extra, taxonomy)


In [None]:
pre_csv_personal_checklists = personal_checklists.copy()
personal_checklists = additional_count_checklists(None, xdates, taxonomy, personal_checklists) #circle_prefix
print(pre_csv_personal_checklists.shape, personal_checklists.shape)

# Create some meta data
checklist_meta, near_duplicates = create_checklist_meta(personal_checklists, visits_of_interest, location_data)

location_meta = build_location_meta(geo_data, personal_checklists, location_data, parameters, cluster_table)

if visits_of_interest.empty:
    mm = create_coverage_map(visits, parameters, geo_data, centers_df, near_duplicates)
else:
    mm = create_coverage_map(visits_of_interest, parameters, geo_data, centers_df, near_duplicates)

In [None]:
visits_of_interest.shape

In [None]:
visits.shape

In [None]:
visits

In [None]:
list(set(visits.Name))

In [None]:
# EMCT
# personal_checklists.head()

In [None]:
# personal_checklists.shape

In [None]:
# set(personal_checklists.Name)

In [None]:
# Service-Parse writes to outputs_path
template_path = outputs_path / f'{circle_prefix}Single.xlsx'

rarities_df = summarize_checklists(personal_checklists, taxonomy, template_path,
                         parameters, checklist_meta, geo_data, location_data, location_meta)

print('\n***** ADJUST SECTOR CHECKLISTS IF NECESSARY *****\n')

In [None]:
%%time

additional_sheets = [
    sheet_info_for_party_efforts(construct_team_efforts(checklist_meta)),
    sheet_info_for_party_details(construct_team_details(checklist_meta, location_data)),
    sheet_info_for_rarities(rarities_df),
    sheet_info_for_filers(circle_matrix),
    sheet_info_for_autoparty(generate_autoparty(checklist_meta, location_data))
]
    
summary = create_full_circle_summary(template_path, taxonomy, 
                                     local_translation_context, parameters, additional_sheets)

In [None]:
print(f'Done  : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

In [None]:
1/0

Debug

In [None]:
summary = summary_base.copy()
# team_cols = set()
summary_common_names = list(summary.CommonName.values)

checklist_meta = checklist_meta.copy()[checklist_meta.sharing != 'secondary']
checklist_meta.sort_values(by=['location_group', 'locId', 'obsDt', 'groupId', 'Name'],
                           na_position='first', inplace=True)

sector_subids = list(personal_checklists.subId.values)
sector_checklist_meta = checklist_meta[checklist_meta.subId.isin(sector_subids)]


# Experiments

In [None]:
gsummary

In [None]:
taxonomy_reference_path = base_path / 'taxonomy' / 'reference'
taxonomy_path = taxonomy_reference_path / 'ABA_Checklist-8.0.7.csv'
xheader = None
tx = pd.read_csv(taxonomy_path, dtype=str, header=xheader, low_memory=False, skiprows=3).fillna('')
tx.columns = ['Group', 'CommonName', 'NomCommun', 'ScientificName', 'Code4', 'v5']
tx

In [None]:
tx[tx.CommonName!=''].reset_index(drop=True)

In [None]:
tx.iloc[0]

In [None]:
nom de l'espèce
NomCommun

In [None]:
circle_prefix

In [None]:
circle_prefixZ = None
for fpath in inputs_count_path.glob('*.csv'):
    if circle_prefixZ and not fpath.stem.startswith(circle_prefixZ):
        continue
    name = fpath.stem
    print(name)

In [None]:
pre_csv_personal_checklists = personal_checklists.copy()
personal_checklists = additional_count_checklists(None, xdates, taxonomy, pre_csv_personal_checklists) #circle_prefix



In [None]:
set(checklist_meta.Name)

In [None]:
mask.any()

In [None]:
participants

In [None]:
visits.shape

In [None]:
visits_of_interest.empty

In [None]:
inputs_count_path

In [None]:
additional_subids

In [None]:
circle_prefix

In [None]:
# set(visits.Name) #'Saldivar' in 

In [None]:
# set(personal_checklists.Name)

In [None]:
name = 'Annette Teng'
name = 'Michelle Nelson'
# set(personal_checklists[personal_checklists.Name==name].subId)

In [None]:
# set(personal_checklists[personal_checklists.Name=='Amy Sanchez'].subId)

In [None]:
# additional_count_checklists(None, xdates, taxonomy, personal_checklists)

In [None]:
date_of_count

In [None]:
unique_circle_filers

In [None]:
!pwd

In [None]:
!open  /Users/john/xdevelopment/birding/automatingcbc

# Experiment

In [None]:
vis = ebird_extra.get_visits_for_dates(region_codes, xdates)

In [None]:
vis

In [None]:
# curl --location -g --request GET 'https://api.ebird.org/v2/product/lists/{{regionCode}}/{{y}}/{{m}}/{{d}}'
curl --location -g --request GET 'https://api.ebird.org/v2/product/lists/US-CA-085/2022/10/08'

In [None]:
cf = ebird_extra.get_checklist_feed_for_region_on_date('US-CA-085', '2022-10-08')
# xdate is e.g. '2020-12-26'

In [None]:
cf.shape

In [None]:
cf.head()

In [None]:
# Global Big Day—14 May 2022
cf2 = ebird_extra.get_checklist_feed_for_region_on_date('US-CA-085', '2022-05-14')
cf2.shape


In [None]:
cf2.memory_usage(deep=True).sum()/1024

In [None]:
rs = ebird_extra.get_regional_statistics_on_a_date('US-CA-085', 2022, 10, 8)
rs

In [None]:
ebird_extra.get_regional_statistics_on_a_date('US-CA', 2022, 10, 8)

In [None]:
sl = ebird_extra.get_species_list_at_a_location('US-CA-085')
sl

In [None]:
# https://ebird.org/region/US-CA-085?yr=all&m=

# Debug

In [None]:
cdict

In [None]:
detailed_checklists = []
for subid in subids:
    cdict = ebird_extra.get_checklist(subid)
    # if cdict is None:
    #     continue
    # print(subid, cdict)
    # Birdathon iOS version 1.4.1 adds the subAux field, which breaks
    # turning this into a dataframe directly
    if 'subAux' in cdict.keys():
        del cdict['subAux']

    if 'subAuxAi' in cdict.keys():
        del cdict['subAuxAi']

    checklist = pd.DataFrame(cdict)
    # Not every checklist has groupId, so add if not there
    # We need it later for detecting duplicate checklists (e.g. shared)
    if 'groupId' not in checklist.columns:
        checklist['groupId'] = None
    if not checklist.empty:
        detailed_checklists.append(checklist)


In [None]:
detailed_checklists

In [None]:
cdict

In [None]:
visits.shape