## Copy WAV files of single birds

In [1]:
origin_data_dir = '/media/george-vengrovski/George-SSD/birdconv_20250311_162324'
dest_data_dir = '/media/george-vengrovski/George-SSD/alistair_call_data_test_org'

import shutil
from pathlib import Path

origin_data_dir = Path('/media/george-vengrovski/George-SSD/birdconv_20250311_162324')
dest_data_dir = Path('/media/george-vengrovski/George-SSD/alistair_call_data_test_org')
dest_wav_dir = dest_data_dir / "wav"
dest_wav_dir.mkdir(parents=True, exist_ok=True)

wav_files = list(origin_data_dir.rglob('*.wav'))

copied_count = 0

for wav_file in wav_files:
    # count occurrences of 'USA' in the file name (case insensitive)
    if wav_file.name.upper().count('USA') > 1:
        continue
    dest_file = dest_wav_dir / wav_file.name
    shutil.copy2(wav_file, dest_file)
    copied_count += 1

print(f"{copied_count} wav file(s) copied to {dest_wav_dir}")


142 wav file(s) copied to /media/george-vengrovski/George-SSD/alistair_call_data_test_org/wav


In [2]:
import shutil
from pathlib import Path

origin_data_dir = Path('/media/george-vengrovski/George-SSD/birdconv_20250311_162324')
dest_data_dir = Path('/media/george-vengrovski/George-SSD/alistair_call_data_test_org')
dest_json_dir = dest_data_dir / "json"
dest_json_dir.mkdir(parents=True, exist_ok=True)

json_files = list(origin_data_dir.rglob('*.json'))
moved_count = 0

for json_file in json_files:
    if json_file.name.upper().count('USA') > 1:
        continue
    dest_file = dest_json_dir / json_file.name
    shutil.move(str(json_file), str(dest_file))
    moved_count += 1

print(f"{moved_count} json file(s) moved to {dest_json_dir}")

146 json file(s) moved to /media/george-vengrovski/George-SSD/alistair_call_data_test_org/json


In [5]:
import json
from pathlib import Path

# directories
json_dir = Path('/media/george-vengrovski/George-SSD/alistair_call_data_test_org/json')
output_file = Path('/media/george-vengrovski/George-SSD/alistair_call_data_test_org') / "combined_calls.json"

# conversion factor from ms to timebin as derived from sample (10/27)
conv_factor = 10 / 27

combined_entries = []

# iterate through each json file in the json directory
for jf in json_dir.glob('*.json'):
    with open(jf, 'r') as f:
        data = json.load(f)
    
    # extract segments; only process segments where type is "call"
    segments = data.get("segments", [])
    new_segments = []
    for seg in segments:
        if seg.get("type") != "call":
            continue
        onset_ms = seg.get("onset_ms")
        offset_ms = seg.get("offset_ms")
        if onset_ms is None or offset_ms is None:
            continue
        # compute timebins (round to nearest integer)
        onset_tb = int(round(onset_ms * conv_factor))
        offset_tb = int(round(offset_ms * conv_factor))
        new_segments.append({
            "onset_ms": onset_ms,
            "offset_ms": offset_ms,
            "onset_timebin": onset_tb,
            "offset_timebin": offset_tb
        })
    
    song_present = bool(new_segments)
    
    # derive new filename by removing .combined.json or .json from the original filename
    base = jf.name
    if base.endswith('.combined.json'):
        new_filename = base.replace('.combined.json', '')
    else:
        new_filename = base.replace('.json', '')
    
    # append .wav extension to the new filename
    new_filename += ".wav"
    
    entry = {
        "filename": new_filename,
        "song_present": song_present,
        "segments": new_segments,
        "spec_parameters": {
            "step_size": 119,
            "nfft": 1024
        },
        "syllable_labels": {}
    }
    
    combined_entries.append(entry)

with open(output_file, 'w') as f:
    json.dump(combined_entries, f, indent=4)

print(f"combined json written to {output_file}")


combined json written to /media/george-vengrovski/George-SSD/alistair_call_data_test_org/combined_calls.json
