In [None]:
!pip install librosa

In [None]:
%cd /notebooks/audio-identification-reproduction
!ls

In [None]:
import os
import numpy as np
from skimage.feature import peak_local_max
from scipy.ndimage import maximum_filter
from collections import defaultdict
import librosa

import matplotlib.pyplot as plt

WINDOW_SIZE = 1024 
HOP_LENGTH = 512
PEAK_NEIGHBORHOOD_SIZE = 20

In [None]:
# Load audio
y, sr = librosa.load('dataset/database_recordings/pop.00006.wav', sr=22050)

# Compute and plot STFT spectrogram
D = np.abs(librosa.stft(y, n_fft=WINDOW_SIZE, window='hann', hop_length=HOP_LENGTH))
spectrogram = librosa.amplitude_to_db(D, ref=np.max)  # Convert to log scale (decibels)


In [None]:
peaks = peak_local_max(np.log(D), min_distance=PEAK_NEIGHBORHOOD_SIZE,threshold_rel=0.05)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(peaks[:, 1], peaks[:, 0], 'r.')

In [None]:
from audio_fingerprint import fingerprintBuilder

In [None]:
folder_path = 'dataset/database_recordings'
output_file = "fingerprint_database.txt"

fingerprintBuilder(folder_path, output_file, 
                   WINDOW_SIZE=WINDOW_SIZE, 
                   HOP_LENGTH=HOP_LENGTH, 
                   PEAK_NEIGHBORHOOD_SIZE=PEAK_NEIGHBORHOOD_SIZE)

In [None]:
from collections import defaultdict, Counter

def match_fingerprints(query_fingerprints, database):
    """
    Match query fingerprints against the database and return the top three results.
    """
    matches = defaultdict(list)

    # Compare query fingerprints with database fingerprints
    for fingerprint, query_time in query_fingerprints:
        if fingerprint in database:
            for file_name, db_time in database[fingerprint]:
                time_offset = db_time - query_time
                matches[file_name].append(time_offset)

    # Score matches by counting the most common time offsets
    scores = {}
    for file_name, offsets in matches.items():
        _, count = Counter(offsets).most_common(1)[0]
        scores[file_name] = count

    # Sort results by score and return the top three
    top_results = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:3]
    return top_results

def identify_audio(query_audio_path, database):
    """
    Identify the query audio file by matching its fingerprints against the database.
    """
    print(f"Generating fingerprints for query audio: {query_audio_path}")
    query_fingerprints = generate_fingerprint(query_audio_path)

    print("Matching fingerprints against the database...")
    top_results = match_fingerprints(query_fingerprints, database)

    print("Top 3 matches:")
    for rank, (file_name, score) in enumerate(top_results, start=1):
        print(f"{rank}. {file_name} (Score: {score})")

    return top_results

In [None]:
query_audio_path = 'dataset/query_recordings/pop.00009-snippet-10-10.wav'
query_fingerprints = generate_fingerprint(query_audio_path)

In [None]:
top_results = match_fingerprints(query_fingerprints, fingerprint_database)

In [None]:
top_results