In [None]:
from utils.pigeon.annotate import annotate
import json
import pandas as pd
from pandas import json_normalize
from utils.helper import get_filenames
import os
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile


def set_pandas_display_options() -> None:
    """Set pandas display options."""
    # Ref: https://stackoverflow.com/a/52432757/
    display = pd.options.display

    display.max_columns = 1000
    display.max_rows = 1000
    display.max_colwidth = 199
    display.width = 1000
    # display.precision = 2  # set as needed

set_pandas_display_options()

def display_audio(filename):
    Fs, aud = wavfile.read(filename)

    # Stereo to mono
    if len(aud.shape) > 1:
        aud = aud[:, 0]
    ipd.display(ipd.Audio(filename=filename, autoplay=True))
    plt.figure(figsize = (5,2))
    #powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(aud, Fs = Fs)
    #plt.show() - REMOVED

org_dir = os.getcwd()
os.chdir(org_dir)

#################################################

# Let's define our labels
ontology_path = os.path.join(*['ontology-master', 'ontology.json'])
f = open(ontology_path)
data = json.load(f)
df = json_normalize(data)
f.close()
df = df.set_index("id")

# Furthermore we like to add some hand-picked labels
chosen_names = ['Human sounds', 'Animal', 'Sounds of things', 'Music', 'Natural sounds']
chosen_ids = list(df.index[df['name'].isin(chosen_names)])
assert len(chosen_names) == len(chosen_ids), "Length of chosen names and ids should be the same"

labels = df.drop(["citation_uri", "positive_examples"], axis = 1)
labels = labels.loc[chosen_ids]

# Drop specific labels because of reasons
labels = labels.drop(labels[labels.name == "Narration, monologue"].index)

In [None]:
labels

## Load annotation file

In [None]:
data_path = os.path.join(org_dir, '../data')
csv_path = os.path.join(org_dir, '../data')

try: 
    df = pd.read_csv(csv_path, index_col=0)
except:
    filenames = get_filenames(data_path)
    df = pd.DataFrame(data={'label':len(filenames)*[None], 'certainty':len(filenames)*[None]}, index=filenames)
    df.to_csv(csv_path)

data_path = os.path.abspath(data_path)

# ANNOTATE
annotations, certainties = annotate(
    list(df.index[df.label.isna()]),
    options=list(labels['name']),
    display_fn=lambda filename: display_audio(os.path.join(data_path, filename)))

## Save labels

In [None]:
for file, label in annotations:
    df.loc[file, "label"] = label

for file, certainty in certainties:
    df.loc[file, "certainty"] = certainty
    
df.to_csv(os.path.join(org_dir, csv_path))