## Code Setup

In [None]:
import os
import pandas as pd
import scgt
import sys
sys.path.append(os.path.join(os.getcwd(), "birdmaps"))

In [None]:
# If True, assumes everything is running locally.
IS_LOCAL = True
# We should not do the run locally, except in rare cases in testing.
DO_RUN = False

# Path to main directory
# LOCAL_PATH = os.path.join(os.getcwd(), "data/CA-EcoScape-Paper")
LOCAL_PATH = "data/CA-Final"
DATA_PATH = LOCAL_PATH

In [None]:
import ebird_db, bird_runs
from scgt import GeoTiff

## Bird Run Definition

In [None]:
bird_run = bird_runs.BirdRun(DATA_PATH)

birds = []

birds.append(bird_run.get_bird_run(
    "acowoo", "Acorn Woodpecker",
    do_validation=True, run_name="Paper10000",
    hop_distance=2, num_spreads=20,
    num_simulations=10000))

birds.append(bird_run.get_bird_run(
    "stejay", "Steller's Jay",
    do_validation=True, run_name="Paper10000",
    hop_distance=2, num_spreads=3,
    num_simulations=10000))

birds.append(bird_run.get_bird_run(
    "stejay", "Steller's Jay",
    do_validation=True, run_name="Paper10000",
    hop_distance=1, num_spreads=6,
    num_simulations=10000))


For each bird, we compute a Pandas dataframe with data for each square where checklists
have occurred.  Thus, we read the csv produced by `GenerateValidationData.ipynb` into
a Pandas dataframe, and for each row of the dataframe, we add information about the
amounts of habitat and repopulation.

This is a time-consuming operation, as we need to access the repopulation
file for each of the squares.  You need to run this once only for each bird run;
then you can analyze the resulting data as much as you like.

In [None]:
max_distance = 2
date_range = ("2012-01-01", "2018-12-31")
num_sample_squares = 20000 # Sampling number for the squares.

validation = ebird_db.Validation()
for bird in birds:

    if bird.do_validation:
        repop = GeoTiff.from_file(bird.repopulation_fn)
        hab = GeoTiff.from_file(bird.habitat_fn)

        obs_fn = bird_run.get_observations_all_fn(
            bird.obs_path, max_distance=max_distance,
            date_range="-".join(date_range),
            num_squares=num_sample_squares)

        # This reads information on each square: how many checklists, birds, etc.
        with open(obs_fn) as f:
            df = pd.read_csv(obs_fn)

        # Augments the dataframe with the values for each square of repopulation and habitat.
        validation.get_repop_ratios(df, repop, hab, tile_scale=3)
        # Computes a repopulation range.
        df["RepopRange"] = df.apply(lambda row : int(row["avg_repop"] * 10) / 10, axis=1)
        # Computes birds and sightings per checklist.
        df["ObsRatio"] = df["NumBirdChecklists"] / df["NumChecklists"]
        df["BirdRatio"] = df["NumBirds"] / df["NumChecklists"]

        # Writes the resulting dataset.
        df.to_csv(bird.obs_csv_path)
        print("Done with", bird.nickname, bird.hop_distance, bird.num_spreads)