In [1]:
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

In [1]:
import librosa
import numpy as np    
import os
import signal
import subprocess
import json
import uuid
from IPython.display import HTML

Extractor (Input Validation, Chunking, Mapping From file_csv and annotations_csv to chunks)

In [3]:
from acoustic_knowledge_discovery import extractors

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
extractor = extractors.Extractor(
    "/home/s.kamboj.400/acoustic_knowledge_discovery/sample_inputs/file.csv", #file metadata csv
    "/home/s.kamboj.400/acoustic_knowledge_discovery/sample_inputs/anno.csv", #annoations csv
    "/home/s.kamboj.400/acoustic_knowledge_discovery/sample_inputs/inputDtory",#input directory with a files subdirectory for audio files & a XC-templates subdirectory for XC templates
    chunk_size = 5) #chunk size in seconds
chunkDS = extractor.forward()


Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 920.34 examples/s]


In [5]:
for row in chunkDS.chunk_ds["train"]:
    print(row)
chunkDS.chunk_ds

{'file_name': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3', ' time_of_day': 'Morning', 'season': 'Rainy', 'species': ['Bird', 'Bat'], 'chunk_start': 0, 'chunk_id': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_0', 'Annotation': ['owl'], 'Confidence': [0.8999999761581421]}
{'file_name': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3', ' time_of_day': 'Morning', 'season': 'Rainy', 'species': ['Bird', 'Bat'], 'chunk_start': 5, 'chunk_id': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_5', 'Annotation': ['owl'], 'Confidence': [0.8999999761581421]}
{'file_name': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3', ' time_of_day': 'Morning', 'season': 'Rainy', 'species': ['Bird', 'Bat'], 'chunk_start': 10, 'chunk_id': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_10', 'Annotation': [], 'Confidence': []}
{'file_name': 'XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3', ' time_of_day': 'Morning', 's

DatasetDict({
    train: Dataset({
        features: ['file_name', ' time_of_day', 'season', 'species', 'chunk_start', 'chunk_id', 'Annotation', 'Confidence'],
        num_rows: 6
    })
})

Chunk DS Augmentations

In [6]:
def add_raw_audio_batched(batch):
    raws = []
    for fname, start in zip(batch["file_name"], batch["chunk_start"]):
        y, _ = librosa.load(
            extractor.base_dir / "files" / fname,
            offset=float(start),
            duration=float(extractor.chunk_size),
            sr=None
        )
        raws.append(y.astype(np.float32))
    # return list-of-arrays 
    return {"raw_audio": raws}


In [7]:
# Add raw audio column in dataset
chunkDS.chunk_ds = chunkDS.chunk_ds.map(add_raw_audio_batched, 
                                        batched=True, 
                                        num_proc=4)

Map (num_proc=4): 100%|██████████| 6/6 [00:01<00:00,  4.59 examples/s]


In [8]:
chunkDS.chunk_ds #make sure that new column raw_audio is added

DatasetDict({
    train: Dataset({
        features: ['file_name', ' time_of_day', 'season', 'species', 'chunk_start', 'chunk_id', 'Annotation', 'Confidence', 'raw_audio'],
        num_rows: 6
    })
})

File-level Operations

In [9]:
from acoustic_knowledge_discovery.features.feature_preprocessing_pipeline import FeatureSequential
from acoustic_knowledge_discovery import features

THRESHOLD = 0.6

pipeline = FeatureSequential(
    features.TemplateMatching(
        extractor.base_dir / "files",
        extractor.base_dir / "XC-templates",
        THRESHOLD,
        chunk_size=extractor.chunk_size
    ),
    # features.anotherFunctionThatInheretsFromFeaturePreprocessor
)

chunkDS = pipeline.forward(chunkDS)  

1 total templates
Filtered down to 1 unique species templates
  XC758442 - Common Blackbird - Turdus merula: XC758442 - Common Blackbird - Turdus merula.wav


Map: 100%|██████████| 6/6 [00:00<00:00, 548.75 examples/s]

Progress: 1/1 templates completed (100.0%)
Total matches found above the threshold of 0.6: 1





In [10]:
#verify properly loaded after feature extraction
for row in chunkDS.chunk_ds["train"]:
    print(row["Annotation"], row["Confidence"])

['owl'] [0.8999999761581421]
['owl'] [0.8999999761581421]
[] []
[] []
['XC758442 - Common Blackbird - Turdus merula'] [0.6061245203018188]
['bat', 'XC758442 - Common Blackbird - Turdus merula'] [0.800000011920929, 0.6061245203018188]


Chunk-level Operations

In [25]:
#Insert EGCI
egci = features.EGCI()
chunkDS= egci(chunkDS)


Map (num_proc=4): 100%|██████████| 6/6 [00:09<00:00,  1.55s/ examples]


In [26]:
#verify EGCI column properly added
for row in chunkDS.chunk_ds["train"]:
    print(row["chunk_id"], row["EGCI"])
chunkDS.chunk_ds

XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_0 [0.6825488342933071, 0.4642582997587565]
XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_5 [0.6520874520975383, 0.3366618586753143]
XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_10 [0.6374639176949186, 0.4597440843424236]
XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_15 [0.5778173748528687, 0.43583888954253247]
XC758442 - Common Blackbird - Turdus merula.wav_0 [0.7267189092284931, 0.3654319916606137]
XC758442 - Common Blackbird - Turdus merula.wav_5 [0.8713889758341212, 0.19875033527163866]


DatasetDict({
    train: Dataset({
        features: ['file_name', ' time_of_day', 'season', 'species', 'chunk_start', 'chunk_id', 'Annotation', 'Confidence', 'raw_audio', 'EGCI'],
        num_rows: 6
    })
})

Binning!

In [27]:
from acoustic_knowledge_discovery.postprocessing import MakeBins2dFloat
makebins = MakeBins2dFloat(chunkDS)
egci_bin={
    "low": ( (0.0, 34), (0.0,0.34) ), 
    "medium": ((0.34,0.67),(0.34,0.67)), 
    "high": ((0.67,1),(0.67,1))
}

chunkDS = makebins("EGCI", egci_bin)

Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 450.39 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 497.61 examples/s]


x is [0.68254883 0.65208745 0.63746392 0.57781737 0.72671891 0.87138898], x_lo is 0.0, x_hi is 34.0 
y is [0.4642583  0.33666186 0.45974408 0.43583889 0.36543199 0.19875034], y_lo is 0.0, y_hi is 0.34 
x is [0.68254883 0.65208745 0.63746392 0.57781737 0.72671891 0.87138898], x_lo is 0.34, x_hi is 0.67 
y is [0.4642583  0.33666186 0.45974408 0.43583889 0.36543199 0.19875034], y_lo is 0.34, y_hi is 0.67 
x is [0.68254883 0.65208745 0.63746392 0.57781737 0.72671891 0.87138898], x_lo is 0.67, x_hi is 1.0 
y is [0.4642583  0.33666186 0.45974408 0.43583889 0.36543199 0.19875034], y_lo is 0.67, y_hi is 1.0 


Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 744.00 examples/s]


In [24]:
for row in chunkDS.chunk_ds["train"]:
    label_str = chunkDS.chunk_ds["train"].features["EGCI"].int2str(row["EGCI"])
    print(row["chunk_id"], label_str)
chunkDS.chunk_ds

XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_0 Other
XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_5 low
XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_10 medium
XC233970 - Black-chinned Sparrow - Spizella atrogularis.mp3_15 medium
XC758442 - Common Blackbird - Turdus merula.wav_0 Other
XC758442 - Common Blackbird - Turdus merula.wav_5 low


DatasetDict({
    train: Dataset({
        features: ['file_name', ' time_of_day', 'season', 'species', 'chunk_start', 'chunk_id', 'Annotation', 'Confidence', 'raw_audio', 'EGCI'],
        num_rows: 6
    })
})

In [None]:
# # Tests that MakeBins1dFloat does work, but it does not make sense to use it on this dataset
# from acoustic_knowledge_discovery.postprocessing import MakeBins1dFloat
# makebins1d = MakeBins1dFloat(chunkDS)
# OneDBin={
#     "low": (0, 5) , 
#     "medium": (5, 10), 
#     "high": (10, 15)
# }

# chunkDS = makebins1d("chunk_start", OneDBin)

# for row in chunkDS.chunk_ds["train"]:
#     print(row["chunk_id"], row["chunk_start"])
# chunkDS.chunk_ds

Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 770.94 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 563.06 examples/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 711.82 examples/s]


In [None]:
#in graph format, let them choose which columns to exclude (can submit empty list if none) *definitely exclude raw_audio

In [None]:
# sean's code!!
# assume that the output is in d3-visualization/graph_representation.json

Create d3 visualization

In [9]:
command = ["npx", "http-server", "-p", "8080"] 
target_directory = "acoustic_knowledge_discovery/d3-visualization/"

process = subprocess.Popen(
    command,
    cwd=target_directory,
    preexec_fn=os.setsid  
)

# Persist the group leader PID so you can kill it later even after kernel restarts & kill the entire tree
with open("/tmp/http_server_pid.json", "w") as f:
    json.dump({"pgid": process.pid}, f)

print(f"Server started. PGID={process.pid}")

Server started. PGID=2456115


In [41]:
# #nocache ensures that the frame output is never cached because it is pulling from unique uuid link
# HTML(f"""
# <iframe style="background-color: white;" 
#         src="http://127.0.0.1:8080/?nocache={uuid.uuid4()}" 
#         width="1200" height="1000"></iframe>
# """)

from IPython.display import IFrame
import uuid

IFrame(
    src=f"files/acoustic_knowledge_discovery/d3-visualization/index.html?nocache={uuid.uuid4()}",
    width=1200,
    height=800
)






In [25]:
# Kill the entire process group (gracefully) to stop d3vis port
with open("/tmp/http_server_pid.json") as f:
    pgid = json.load(f)["pgid"]


os.killpg(pgid, signal.SIGTERM)