# Description:
Create grid and assign eBird checklists to cells in the grid.

In [None]:
import os
import pandas as pd
import numpy as np
import math

In [None]:
pd.set_option('max_columns', 100)
pd.set_option('max_rows', 100)

In [None]:
os.getcwd()

In [None]:
path = '/Users/alvastrand/Documents/OU/Research/data/'
# Change current working directory
os.chdir(path)

In [None]:
os.getcwd()

In [None]:
start_date = '0101'
end_date = '0731'
month = 'Apr'
year = '2020'
countries_states = 'US_states_east_Mississippi'

In [None]:
subdir = 'output/'
filename = 'obligate_aerial_insectivores_ebird_species_codes.csv'

df = pd.read_csv(subdir + filename)

print(len(df))

In [None]:
df.head(2)

In [None]:
min_lat = math.floor(23.88817)
print(min_lat)
# 23
max_lat = math.ceil(48.2235)
print(max_lat)
# 49
min_lon = math.floor(-92.88799)
print(min_lon)
# -93
max_lon = math.ceil(-66.94957) + 1
# -66.9502
print(max_lon)
# -65

# Create list of latitudes
list_lat = range(min_lat, max_lat, 2)
print(list_lat)
# range(23, 49, 2)
# Get length of the list
print(len(list_lat))
# 13

# Create list of longitudes
list_lon = range(min_lon, max_lon, 2)
print(list_lon)
# range(-93, -65, 2)
# Get length of the list
print(len(list_lon))
# 14

In [None]:
def get_grid_cell_ids(start_date, end_date, month, year, *args):
    
    list_grid_cell_ids = []
    list_min_lat = []
    list_max_lat = []
    list_min_lon = []
    list_max_lon = []

    ID = 0

    for i in range(len(list_lat)):
        for j in range(len(list_lon)):

            # If we've reached the index of the last element of the list of latitudes
            if i == len(list_lat) - 1:
                bounding_lat = list_lat[i] + 2
            else:
                bounding_lat = list_lat[i + 1]

            # If we've reached the index of the last element of the list of longitudes
            if j == len(list_lon) - 1:
                bounding_lon = list_lon[j] + 2
            else:
                bounding_lon = list_lon[j + 1]

            print(ID)
            print(list_lat[i], bounding_lat)
            print(list_lon[j], bounding_lon)

            list_grid_cell_ids.append(ID)
            list_min_lat.append(list_lat[i])
            list_max_lat.append(bounding_lat)
            list_min_lon.append(list_lon[j])
            list_max_lon.append(bounding_lon)

            ID += 1

    df_ids = pd.DataFrame(data={'grid_cell': list_grid_cell_ids, 'min_lat': list_min_lat, 'max_lat': list_max_lat, 
                                'min_lon': list_min_lon, 'max_lon': list_max_lon})

    print(df_ids.shape)
    
    subdir = 'eBird/ebd_output/'
    
    if args != ():
        
        countries_states = args[0]

        filename = 'ebd_' + countries_states + '_' + start_date + '_' + end_date + \
        '_complete_zerofilled_grid_cell_ids_rel' + month + '-' + year + '.csv'
        print(filename)
    
    df_ids.to_csv(subdir + filename, index=False)
    
    return df_ids

In [None]:
def name_of_function(species, start_date, end_date, month, year, *args):
    
    subdir = 'eBird/ebd_output/'
    
    if args != ():
        
        countries_states = args[0]
        
        filename = 'ebd_' + countries_states + '_' + species + '_' + start_date + '_' + end_date + \
        '_complete_zerofilled_rel' + month + '-' + year + '.csv'
        print(filename)
    
    df = pd.read_csv(subdir + filename)

    print(df.shape)
    # (7459806, 34)

    df['grid_cell'] = np.nan

    print(df.shape)
    # (7459806, 35)

    ID = 0
    
    for i in range(len(list_lat)):
        for j in range(len(list_lon)):
            
            # If we've reached the index of the last element of the list of latitudes
            if i == len(list_lat) - 1:
                bounding_lat = list_lat[i] + 2
            else:
                bounding_lat = list_lat[i + 1]
            
            # If we've reached the index of the last element of the list of longitudes
            if j == len(list_lon) - 1:
                bounding_lon = list_lon[j] + 2
            else:
                bounding_lon = list_lon[j + 1]
            
            # Assign the ID to the grid_cell column on rows that meet conditions
            df.loc[(df['latitude'] > list_lat[i]) & 
                   (df['latitude'] <= bounding_lat) & 
                   (df['longitude'] > list_lon[j]) & 
                   (df['longitude'] <= bounding_lon), 'grid_cell'] = ID
            
            print(ID)
            print(list_lat[i], bounding_lat)
            print(list_lon[j], bounding_lon)
            
            ID += 1

    df['grid_cell'] = df['grid_cell'].astype(int)
    
    print(df['grid_cell'].isnull().values.any())
    
    print(df['grid_cell'].min())
    # 4
    print(df['grid_cell'].max())
    # 180
    # Get number of unique grid cells
    print(len(df['grid_cell'].unique()))
    # 108
    
    if args != ():
        
        countries_states = args[0]
        
        filename = 'ebd_' + countries_states + '_' + species + '_' + start_date + '_' + end_date + \
        '_complete_zerofilled_grid_cells_rel' + month + '-' + year + '.csv'
        print(filename)
        
    df.to_csv(subdir + filename, index=False)
    
    return df

In [None]:
# df_ids = get_grid_cell_ids(start_date, end_date, month, year, countries_states)

In [None]:
# df_ids.head(2)

In [None]:
# species = 'barswa'

# df_grid_cells = name_of_function(species, start_date, end_date, month, year, countries_states)

In [None]:
# df_grid_cells.head(2)

In [None]:
species_cnt = 0

for i in range(len(df)):
    
    print(i)
  
    species = df['species_code'].iloc[i]
    print(species)
    
    if ((species == 'souwpw1') | (species == 'bucnig') | (species == 'compoo') | (species == 'whtswi') | 
        (species == "blkswi") | (species == 'barswa')):
        continue
    
    df_grid_cells = name_of_function(species, start_date, end_date, month, year, countries_states)
    
    species_cnt += 1

In [None]:
print(species_cnt)