## Validation for Connectivity as Patch Size

This generates the dataframes that are used for Omniscape validation, from the observation data. 

In [6]:
import os
import pandas as pd
from birdmaps import ebird_db
from ecoscape_utilities.bird_runs import BirdRun
from scgt import GeoTiff

In [7]:
DATA_PATH = "data/CA-Final"

In [8]:
bird_run = BirdRun(DATA_PATH)

def create_bird_runs(target):
    """Creates bird runs for the specified output target."""
    birds = []

    birds.append(bird_run.get_bird_run(
        "acowoo", "Acorn Woodpecker", run_name=target))

    birds.append(bird_run.get_bird_run(
        "stejay", "Steller's Jay", run_name=target))

    for bird in birds:

        # Creates output folder, if missing.
        bird_run.createdir_for_file(bird.repopulation_fn)
        bird_run.createdir_for_file(bird.gradient_fn)

    return birds


For each bird, we compute a Pandas dataframe with data for each square where checklists
have occurred.  Thus, we read the csv produced by `GenerateValidationData.ipynb` into
a Pandas dataframe, and for each row of the dataframe, we add information about the
amounts of habitat and repopulation.

This is a time-consuming operation, as we need to access the repopulation
file for each of the squares.  You need to run this once only for each bird run;
then you can analyze the resulting data as much as you like.

In [9]:
birds = create_bird_runs("patch_sizes_torch")

In [10]:
max_distance = 2
date_range = ("2012-01-01", "2018-12-31")
num_sample_squares = 20000 # Sampling number for the squares.

for bird in birds:
    repop = GeoTiff.from_file(bird.repopulation_fn)
    hab = GeoTiff.from_file(bird.habitat_fn)
    obs_fn = bird_run.get_observations_all_fn(
        bird.obs_path, max_distance=max_distance,
        date_range="-".join(date_range),
        num_squares=num_sample_squares)
    
    validation = ebird_db.Validation(obs_fn, bird.habitat_fn)

    # Augments the dataframe with the values for each square of repopulation and habitat.
    df = validation.get_repop_ratios(repop, hab, tile_scale=3, div_by_255=False)
    # Computes birds and sightings per checklist.
    df["ObsRatio"] = df["NumBirdChecklists"] / df["NumChecklists"]
    df["BirdRatio"] = df["NumBirds"] / df["NumChecklists"]

    # Writes the resulting dataset.
    df.to_csv(bird.obs_csv_path)
    print("Done with", bird.nickname)

Done with acowoo
Done with stejay
