In [None]:
#@title Imports. { vertical-output: true }
from pathlib import Path
from chirp.projects.agile2.agile_modeling_state import agile2_config, agile2_state, download_embeddings, Helpers

# Configuration

Here we set some configuration for names and local filepaths and initialize our agile modeling workflow.

Your Ecosounds "auth_token" can be found by logging in to https://www.ecosounds.org, then clicking on your profile picture in the top left. You can copy your auth token from this profile page. 

In [None]:
config = agile2_config(
  #@markdown Location of database containing audio embeddings.
  # db_path="/phil/perch_projects/ci/agile/hoplite_test/db/db.sqlite", #@param {type:'string'}
  db_path="", #@param {type:'string'}
  search_dataset_name="", #@param {type:'string'}
  #@markdown Identifier (eg, name) to attach to labels produced during validation.
  annotator_id="", #@param {type:'string'}
  baw_config= {
    'auth_token': "",  #@param {type:'string'}
    'domain': 'api.ecosounds.org'
  },
  embeddings_folder="", #@param {type:'string'}
)

config.from_json("../../../local/esa/agile_config.json")

agile = agile2_state(config)

# Linking to google drive

We will need somewhere to read and write files. This colab environment where the notebook is running does not persist between sessions, so we will link to google drive for access to persistent storage. 

In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
except:
    print("colab not available")

In [None]:
from pathlib import Path

# If you followed the above instructions for creating a shortcut to the Drive folder, 
# you should be able to navigate to this directory in the left hand "Files" menu 
# in this Colab (indicated by the Folder icon on the far left menu).

base_folder = '/content/drive/My Drive/'

# This is the location on google drive that this tutorial will use to save data.
working_folder = base_folder + 'esa2024_data/'

if not config.db_path or config.db_path == "":
  config.db_path = working_folder + 'hoplite_db/db.sqlite'

if not config.embeddings_folder or config.embeddings_folder == "":
  config.embeddings_folder = working_folder + 'embeddings/'

if not config.labeled_examples_folder or config.labeled_examples_folder == "":
  config.labeled_examples_folder = working_folder + 'labeled_examples/'


Path(config.labeled_examples_folder).mkdir(exist_ok=True)

In [None]:
# Download audio embeddings to the working folder
# this might take a while

download_embeddings('powerful_owl_subset', config.embeddings_folder)

# Create embeddings database

Here we retrieve the files of embeddings for the recordings that we will be searching in and put them in the right format for working with them. 

In [None]:
# using the downloaded embeddings, create a database of embeddings.
# This database links labels to embeddings so we can train our classifier
# this might take a while
agile.create_database(config.embeddings_folder)

In [None]:
agile.initialize()

# Search

Here, we take a single example and find the examples in our search set which most closely match that example. This is a way to get started with a labelled training set.

In [None]:
#@title Load query audio. { vertical-output: true }

# Put your labelled examples in a folder on your mounted Google Drive, 
# then specify the path here. 
path_to_labeled_examples = config.labeled_examples_folder
audio_files = Helpers.list_audio_files(path_to_labeled_examples)

# choose one of the audio examples in the labeled examples folder
query_uri = audio_files[0]

# or specify a path or url
#@markdown The `query_uri` can be a URL, filepath, or Xeno-Canto ID
#@markdown (like `xc777802`, containing an Eastern Whipbird (`easwhi1`)).
#query_uri = "../../../local/esa/20230513T150000+0700_Site-109_1376880___755.0.wav"  #@param {type:'string'}

agile.embed_query(query_uri)

In [None]:
#@markdown Our target call-type label
query_label = 'powerful_owl'  #@param {type:'string'}
#@markdown Number of results to retrieve.
num_results = 40  #@param
#@markdown Number of (randomly selected) database entries to search over.
sample_size = 1_000_000  #@param
#@markdown When margin sampling, target this logit.
target_score = 1.0  #@param

agile.search_with_query(query_label, num_results, sample_size, target_score)

In [None]:
#@title Save data labels. { vertical-output: true }

agile.save_labels()

# Classify

In [None]:
#@title Classifier training. { vertical-output: true }

#@markdown Set of labels to classify. If None, auto-populated from the DB.
target_labels = None  #@param
learning_rate = 1e-3  #@param
weak_neg_weight = 0.05  #@param
l2_mu = 0.000  #@param
num_steps = 128  #@param
train_ratio = 0.01  #@param
batch_size = 128  #@param
weak_negatives_batch_size = 128  #@param
loss_fn_name = 'bce'  #@param ['hinge', 'bce']
agile.train_classifier(target_labels, learning_rate, weak_neg_weight, l2_mu, num_steps, train_ratio, batch_size, weak_negatives_batch_size, loss_fn_name)


In [None]:
#@title Review Classifier Results. { vertical-output: true }
#@markdown Our target call-type label
query_label = 'ciff'  #@param {type:'string'}
#@markdown Number of results to retrieve.
num_results = 40  #@param
#@markdown Number of (randomly selected) database entries to search over.
sample_size = 1_000_000  #@param
#@markdown When margin sampling, target this logit.
target_score = 3.0  #@param

agile.search_with_classifier(query_label, num_results, sample_size, target_score)


In [None]:
#@title Save data labels. { vertical-output: true }

agile.save_labels()