# Description
Calculate the proportion of checklists with at least Barn Swallow observation for each grid cell and year.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
os.getcwd()

In [None]:
path = '/Users/alvastrand/Documents/OU/Research/data/'
os.chdir(path)

In [None]:
os.getcwd()

In [None]:
pd.set_option('max_columns', 100)
pd.set_option('max_rows', 100)

In [None]:
start_date = '0101'
end_date = '0731'
month = 'Apr'
year = '2020'
countries_states = 'US_states_east_Mississippi'

In [None]:
subdir = 'output/'
filename = "obligate_aerial_insectivores_ebird_species_codes.csv"

df = pd.read_csv(subdir + filename)

print(len(df))

In [None]:
df.head(2)

In [None]:
def name_of_function(species, start_date, end_date, month, year, *args):
    
    subdir = 'eBird/ebd_output/'
    
    if args != ():
        
        countries_states = args[0]
        
        filename = 'ebd_' + countries_states + '_' + species + '_' + start_date + '_' + end_date + \
        '_complete_zerofilled_grid_cells_rel' + month + '-' + year + '.csv'
        print(filename)
    
    df = pd.read_csv(subdir + filename)

    print(df['species_observed'].value_counts())
    # False: 6,657,547 checklists that do not have a Barn Swallow observation.
    # True: 802,259 checklists that have at least one Barn Swallow observation.

    df['species_observed_binary_values'] = np.where(df['species_observed'] == True, 1.0, 0.0)

    print(df.shape)
    # (7,459,806, 36)

    # Calculate the total number of checklists for each grid cell and date
    df_cnt = df[['checklist_id', 'grid_cell', 'observation_date']].groupby(['grid_cell', 'observation_date']).count()
    df_cnt.rename(columns={'checklist_id': 'nb_checklists'}, inplace=True)
    print(df_cnt.shape)
    # (272,264, 1)

    # Calculate the number of checklists with at least one Barn Swallow observation for each grid cell and date
    df_sum = df[['species_observed_binary_values', 'grid_cell', 'observation_date']].groupby([
        'grid_cell', 'observation_date']).sum()
    df_sum.rename(columns={'species_observed_binary_values': 'nb_checklists_species'}, inplace=True)
    print(df_sum.shape)
    # (272,264, 1)

    df_grp = df_cnt.merge(df_sum, left_index=True, right_index=True)
    print(df_grp.shape)
    # (272,264, 2)

    # Calculate the proportion of checklists with at least one Barn Swallow observation for each grid cell and date
    df_grp['prop'] = df_grp['nb_checklists_species']/df_grp['nb_checklists']
    df_grp.reset_index(inplace=True)
    print(df_grp.shape)
    # (272,264, 5)
    # Number of combinations of grid cells and dates that have a proportion equal to 0
    print(len(df_grp[df_grp['prop'] == 0]))
    # 195,710
    # Number of combinations of grid cells and dates that have a proportion greater than 0
    print(len(df_grp[df_grp['prop'] > 0]))
    # 76,554
    df_grp['observation_date_dt'] = pd.to_datetime(df_grp['observation_date'])
    df_grp['year'] = df_grp['observation_date'].astype(str).str[:4]
    print(df_grp.shape)
    # (272,264, 7)

    df_prop_greater_than = df_grp[df_grp['prop'] > 0]
    # Number of grid cells that have at least one proportion that is greater than 0
    print(len(df_prop_greater_than['grid_cell'].unique()))
    # 94
    print(df_prop_greater_than.shape)
    # (76,554, 7)
    # 76,554 combinations of grid cells and dates have a proportion that is greater than 0.

    # Calculate the number of proportions greater than 0 for each grid cell and date
    df_prop_greater_than_cnt = df_prop_greater_than[['grid_cell', 'year', 'prop']].groupby([
        'grid_cell', 'year']).count()
    df_prop_greater_than_cnt.reset_index(inplace=True)
    df_prop_greater_than_cnt.rename(columns={'prop': 'nb_prop'}, inplace=True)
    print(df_prop_greater_than_cnt.shape)
    # (918, 3)
    # 918 combinations of grid cells and years have at least one proportion that is greater than 0.

    df_nb_prop = df_prop_greater_than_cnt['nb_prop'].value_counts().to_frame().reset_index()
    df_nb_prop.rename(columns={'index': 'nb_prop', 'nb_prop': 'count'}, inplace=True)

    # df_nb_prop.head(2)
    # *-03-20 - *-06-29: 33 combinations of grid cells and years have 89 proportions greater than 0 (largest number 
    # of combinations)

    # Smallest number of proportions greater than 0 encountered for a given grid cell and year
    print(min(df_nb_prop['nb_prop']))
    # 1
    # Largest number of proportions greater than 0 encountered for a given grid cell and year
    print(max(df_nb_prop['nb_prop']))
    # 151
    
    if args != ():
        
        countries_states = args[0]
        
        filename = 'ebd_' + countries_states + '_' + species + '_' + start_date + '_' + end_date + \
        '_complete_zerofilled_grid_cells_proportions_rel' + month + '-' + year + '.csv'
        print(filename)

    df_grp.to_csv(subdir + filename, index=False)
    
    return df, df_cnt, df_sum, df_prop_greater_than, df_prop_greater_than_cnt, df_nb_prop, df_grp

In [None]:
# species = 'barswa'

# df_grid_cells, df_cnt, df_sum, df_prop_greater_than, df_prop_greater_than_cnt, df_nb_prop, \
# df_grp = name_of_function(species, start_date, end_date, month, year, countries_states)

In [None]:
# df_grid_cells.head(2)

In [None]:
# df_cnt.head(2)

In [None]:
# df_sum.head(2)

In [None]:
# df_prop_greater_than.head(2)

In [None]:
# df_prop_greater_than_cnt.head(2)

# 1964? 1976?

In [None]:
# df_nb_prop.head(2)

In [None]:
# df_grp.head(2)

In [None]:
species_cnt = 0

for i in range(len(df)):
    
    print(i)
  
    species = df['species_code'].iloc[i]
    print(species)
    
    if ((species == 'souwpw1') | (species == 'bucnig') | (species == 'compoo') | (species == 'whtswi') | 
        (species == "blkswi") | (species == 'barswa')):
        continue
    
    df_grid_cells, df_cnt, df_sum, df_prop_greater_than, df_prop_greater_than_cnt, df_nb_prop, \
    df_grp = name_of_function(species, start_date, end_date, month, year, countries_states)
    
    species_cnt += 1

In [None]:
print(species_cnt)