In [2]:
import librosa
import numpy as np
import os
from collections import Counter

In [3]:
def extract_notes(audio_path):
    y, sr = librosa.load(audio_path)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    peak_frames = librosa.util.peak_pick(onset_env, pre_max=3, post_max=3, pre_avg=5, post_avg=5, delta=0.1, wait=10)
    peak_times = librosa.frames_to_time(peak_frames, sr=sr)

    notes = []
    for i, t in enumerate(peak_times):
        if i % 3 == 0:
            notes.append("A")
        elif i % 3 == 1:
            notes.append("B")
        else:
            notes.append("C")
    return notes

bird_recordings_dir = r'bird_data'
sequences = []

for filename in os.listdir(bird_recordings_dir):
    file_path = os.path.join(bird_recordings_dir, filename)
    notes = extract_notes(file_path)
    sequences.append(notes)

In [4]:
def analyze_sequences(sequences):
    vocabulary = set()
    for seq in sequences:
        vocabulary.update(seq)

    motifs = Counter()
    for seq in sequences:
        for i in range(len(seq) - 1):
            motif = seq[i:i + 2]
            motifs[tuple(motif)] += 1

    return vocabulary, motifs

vocabulary, motifs = analyze_sequences(sequences)
print("\nVocabulary:", vocabulary)
print("\nMotifs:", motifs)


Vocabulary: {'B', 'C', 'A'}

Motifs: Counter({('A', 'B'): 328, ('B', 'C'): 323, ('C', 'A'): 320})
