# SCVAS-Count

# Description

Count day tasks for all four SCVAS count circles  
- SAN JOSE CBC (CASJ)
- PALO ALTO CBC (CAPA)
- CALERO-MORGAN HILL (CACR)
- MOUNT HAMILTON (CAMH)

Prior to count day, run Service-Parse so that we have single and double column versions of the official checklist
for the circle. Confirm that the annotations are correct. Service-Parse looks for files named e.g. 
CASJ-2020-<otherstuff> that are CSV, Excel or PDF.

We don't have a solution this year to the list of ebird names. To work around this, create the filers_matrix first
then make the participants list from that.

# Environment

In [None]:
import warnings
# warnings.simplefilter('always') # 'error' to break\n", \"always\"

warnings.filterwarnings("ignore", category=RuntimeWarning, module='geopandas')
# /Users/john/.pyenv/versions/py386/lib/python3.8/site-packages/geopandas/geodataframe.py:422: 
# RuntimeWarning: Sequential read of iterator was interrupted. Resetting iterator. This can negatively 
# impact the performance. for feature in features_lst:

## Library Imports

In [None]:
import sys
sys.path.append('common')
sys.path.append('textextractor')
sys.path.append('taxonomy')

import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import geopandas as gpd

## Local Imports

In [None]:
import common_jupyter

# https://medium.com/@rrfd/cookiecutter-data-science-organize-your-projects-atom-and-jupyter-2be7862f487e
from common_paths import *

from local_translation_context import LocalTranslationContext
from taxonomy import Taxonomy
from ebird_extras import EBirdExtra
from parameters import Parameters

from count_day_tasks import summarize_checklists, create_full_circle_summary, get_participants, \
    subids_for_pete_dunten, add_bob_hirt, get_personal_checklist_details

from datetime_manipulation import create_count_week
from checklist_manipulation import create_checklist_meta

from write_final_checklist import write_final_checklist_spreadsheet, excel_columns, \
    sheet_info_for_party_efforts, sheet_info_for_party_details, sheet_info_for_rarities, sheet_info_for_filers
from autoparty import sheet_info_for_autoparty, generate_autoparty
from locations_map import create_coverage_map, create_potential_duplicates_map
from utilities_kml import build_geodata, build_location_data, update_geo_data_with_clustering, build_location_meta
from ebird_visits import transform_visits, visits_in_circle
from utilities_clustering import generate_cluster_table, plot_elbow_curve
from filers_matrix import create_filers_matrix
from checklist_manipulation import create_checklist_meta, write_checklist_meta, find_location_near_duplicates
from checklist_manipulation import construct_team_details, construct_team_efforts

# Code

# Initialization

In [None]:
# Initializations
print(f'Start : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
print('Initializing...')

# Overrides - Credentials
# See Samples/eBirdCredentials.yml for an example
my_credentials_storage_path = Path('/Volumes/TSecure3/other/')
eBirdCredential_path = my_credentials_storage_path / 'eBirdCredentials.yml'

# -----------------------------------------------------------------------------------------
# Override - This will find the correct parameter file out of many in Local folder
# -----------------------------------------------------------------------------------------
circle_prefix = 'CAPA-2020-'

# Parameters
parameters = Parameters(local_parameters_path, system_parameters_path, circle_prefix, False)

local_translation_context = LocalTranslationContext(local_parameters_path, system_parameters_path)
local_translation_context.reload() # DEBUG; allows test/edit cycle without restarting kernel (singleton)

# Singletons
country = parameters.parameters.get('NationalCode', 'US')
ebird_extra = EBirdExtra(eBirdCredential_path, cache_path, country)
taxonomy = Taxonomy(cache_path, ebird_extra)

# Convenient Parameters
circle_code = parameters.parameters.get('CircleAbbrev', 'XXXX')
date_of_count = parameters.parameters['CountDate']
count_week_start = parameters.parameters.get('CountWeekStart', date_of_count)
count_week_end = parameters.parameters.get('CountWeekEnd', date_of_count)
region_codes = [xs.strip() for xs in parameters.parameters['eBirdRegion'].split(',')]

# Will drop any dates in the future
count_week = create_count_week(count_week_start, count_week_end)

print('Initialization complete')

# Main

In [None]:
if __name__ == '__main__':
    create_project_paths()
    
    count_day_only = True # set to False to process whole count week
    
    geo_data = build_geodata(parameters)
    
    # May need bootstrapping
    participants = get_participants(circle_prefix)

    xdates = [date_of_count] if count_day_only else count_week
    visits = ebird_extra.get_visits_for_dates(region_codes, xdates)
    print(f'Checklists filed in count circle: {visits.shape[0]}')
    visits = transform_visits(visits)
    
    visits_of_interest = visits_in_circle(participants, geo_data, circle_code, visits)

    cluster_table, centers_df = None, None
    geo_data, cluster_table, centers_df = generate_cluster_table(visits_of_interest, geo_data, parameters, True)

    hotspots, center_pt = ebird_extra.get_hotspots(region_codes)
    location_data = build_location_data(hotspots, visits)
    
    circle_matrix, unique_circle_filers = create_filers_matrix(circle_prefix, visits_of_interest, location_data)
        
    print('\n',', '.join(unique_circle_filers))
    print('\n***** ADJUST EBIRDERS IF NECESSARY *****\n')         

In [None]:
%%time

additional_subids = subids_for_pete_dunten(parameters) if circle_code == 'CACR' else None

personal_checklists = get_personal_checklist_details(visits_of_interest,
                                   xdates, additional_subids,
                                   ebird_extra, taxonomy)

if circle_code == 'CACR':
    personal_checklists = add_bob_hirt(xdates, taxonomy, personal_checklists)

# Create some meta data
checklist_meta, near_duplicates = create_checklist_meta(personal_checklists, visits_of_interest, location_data)

location_meta = build_location_meta(geo_data, personal_checklists, location_data, cluster_table)

mm = create_coverage_map(visits_of_interest, parameters, geo_data, centers_df, near_duplicates)

In [None]:
# Service-Parse writes to outputs_path
template_path = outputs_path / f'{circle_prefix}Single.xlsx'

rarities_df = summarize_checklists(personal_checklists, taxonomy, template_path,
                         parameters, checklist_meta, geo_data, location_data, location_meta)

print('\n***** ADJUST SECTOR CHECKLISTS IF NECESSARY *****\n')

In [None]:
%%time

additional_sheets = [
    sheet_info_for_party_efforts(construct_team_efforts(checklist_meta)),
    sheet_info_for_party_details(construct_team_details(checklist_meta, location_data)),
    sheet_info_for_rarities(rarities_df),
    sheet_info_for_filers(circle_matrix),
    sheet_info_for_autoparty(generate_autoparty(checklist_meta, location_data))
]
    
summary = create_full_circle_summary(template_path, taxonomy, 
                                     local_translation_context, parameters, additional_sheets)

In [None]:
print(f'Done  : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

In [None]:
1/0

# Experiments