<a href="https://colab.research.google.com/github/YasminaElkhazen/Shazam/blob/main/Shazam_Proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Shazam Alpha version.
The goal is to develop a song recognition app using similarity search.

In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
!wget https://os.unil.cloud.switch.ch/fma/fma_small.zip
!unzip fma_small.zip


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 bunzipping: fma_small/068/068600.mp3  
 bunzipping: fma_small/068/068601.mp3  
 bunzipping: fma_small/068/068680.mp3  
 bunzipping: fma_small/068/068682.mp3  
 bunzipping: fma_small/068/068683.mp3  
 bunzipping: fma_small/068/068820.mp3  
 bunzipping: fma_small/068/068821.mp3  
 bunzipping: fma_small/068/068837.mp3  
 bunzipping: fma_small/068/068838.mp3  
 bunzipping: fma_small/068/068839.mp3  
 bunzipping: fma_small/068/068840.mp3  
 bunzipping: fma_small/068/068841.mp3  
 bunzipping: fma_small/068/068842.mp3  
 bunzipping: fma_small/068/068843.mp3  
 bunzipping: fma_small/068/068844.mp3  
 bunzipping: fma_small/068/068851.mp3  
 bunzipping: fma_small/068/068852.mp3  
 bunzipping: fma_small/068/068853.mp3  
 bunzipping: fma_small/068/068854.mp3  
 bunzipping: fma_small/068/068860.mp3  
 bunzipping: fma_small/068/068861.mp3  
 bunzipping: fma_small/068/068862.mp3  
 bunzipping: fma_small/068/068869.mp3  
 bunzipping: fm

In [None]:
!wget https://os.unil.cloud.switch.ch/fma/fma_metadata.zip
!unzip fma_metadata.zip

--2024-12-06 20:41:16--  https://os.unil.cloud.switch.ch/fma/fma_metadata.zip
Resolving os.unil.cloud.switch.ch (os.unil.cloud.switch.ch)... 86.119.28.16, 2001:620:5ca1:201::214
Connecting to os.unil.cloud.switch.ch (os.unil.cloud.switch.ch)|86.119.28.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 358412441 (342M) [application/zip]
Saving to: ‘fma_metadata.zip’


2024-12-06 20:41:31 (24.0 MB/s) - ‘fma_metadata.zip’ saved [358412441/358412441]

Archive:  fma_metadata.zip
 bunzipping: fma_metadata/README.txt  
 bunzipping: fma_metadata/checksums  
 bunzipping: fma_metadata/not_found.pickle  
 bunzipping: fma_metadata/raw_genres.csv  
 bunzipping: fma_metadata/raw_albums.csv  
 bunzipping: fma_metadata/raw_artists.csv  
 bunzipping: fma_metadata/raw_tracks.csv  
 bunzipping: fma_metadata/tracks.csv  
 bunzipping: fma_metadata/genres.csv  
 bunzipping: fma_metadata/raw_echonest.csv  
 bunzipping: fma_metadata/echonest.csv  
 bunzipping: fma_metadata/features.c

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd
import hashlib
import pickle

# Paths
DATASET_PATH = "fma_small"  # Directory with unzipped dataset
METADATA_PATH = "fma_metadata/tracks.csv"  # Metadata CSV file

# Load metadata
metadata = pd.read_csv(METADATA_PATH, index_col=0, header=2)

def generate_mel_spectrogram(file_path):
    try:
        audio, sr = librosa.load(file_path, sr=22050)  # Default sampling rate
        if len(audio) == 0:
            raise ValueError("Audio file is empty.")
        mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        if np.all(mel_spec_db == 0):  # Check for empty spectrogram
            raise ValueError("Spectrogram is empty or has all zero values.")
        return mel_spec_db
    except Exception as e:
        print(f"Error generating spectrogram for {file_path}: {e}")
        return None


def extract_fingerprint(mel_spectrogram, pre_max=20, post_max=20, pre_avg=10, post_avg=10, wait=50):
    """
    Extracts a unique fingerprint from a mel spectrogram using peak picking.
    """
    try:
        # Normalize the spectrogram (add epsilon to prevent divide-by-zero)
        epsilon = 1e-10
        normalized_spec = mel_spectrogram / (np.max(mel_spectrogram) + epsilon)

        # Convert spectrogram to 1D for peak picking
        flat_spec = np.mean(normalized_spec, axis=0)

        # Use peak picking from librosa
        peaks = librosa.util.peak_pick(flat_spec, pre_max=pre_max, post_max=post_max, pre_avg=pre_avg, post_avg=post_avg, wait=wait)

        # Create a hash of the peak locations
        peak_string = ",".join(map(str, peaks))
        fingerprint = hashlib.md5(peak_string.encode()).hexdigest()
        return fingerprint
    except Exception as e:
        print(f"Error extracting fingerprint: {e}")
        return None

from librosa.util import peak_pick

# Example use of peak_pick with proper arguments
def extract_peaks(spectrogram, pre_max=3, post_max=3, pre_avg=3, post_avg=3, delta=0.5, wait=5):
    peaks = peak_pick(
        spectrogram.flatten(),
        pre_max=pre_max,
        post_max=post_max,
        pre_avg=pre_avg,
        post_avg=post_avg,
        delta=delta,
        wait=wait
    )
    return peaks


In [None]:
import librosa
import numpy as np

def validate_audio(file_path):
    try:
        audio, sr = librosa.load(file_path, sr=None)
        rms = np.sqrt(np.mean(audio**2))
        if rms < 1e-4:  # Threshold for silence
            print(f"File {file_path} is silent or too quiet. Skipping.")
            return False
        return True
    except Exception as e:
        print(f"Error loading audio file {file_path}: {e}")
        return False


In [None]:
print(validate_audio("fma_small/134/134923.mp3" ))

True


In [None]:
METADATA__PATH = "fma_metadata/tracks.csv"
metadataa = pd.read_csv(METADATA__PATH)
metadataa.head()

  metadataa = pd.read_csv(METADATA__PATH)


Unnamed: 0.1,Unnamed: 0,album,album.1,album.2,album.3,album.4,album.5,album.6,album.7,album.8,...,track.10,track.11,track.12,track.13,track.14,track.15,track.16,track.17,track.18,track.19
0,,comments,date_created,date_released,engineer,favorites,id,information,listens,producer,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
1,track_id,,,,,,,,,,...,,,,,,,,,,
2,2,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,...,,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,3,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,...,,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
4,5,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,...,,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World


In [None]:
METADATA_PATH = "fma_metadata/raw_tracks.csv"
metadata = pd.read_csv(METADATA_PATH)
metadata.head()

Unnamed: 0,track_id,album_id,album_title,album_url,artist_id,artist_name,artist_url,artist_website,license_image_file,license_image_file_large,...,track_information,track_instrumental,track_interest,track_language_code,track_listens,track_lyricist,track_number,track_publisher,track_title,track_url
0,2,1.0,AWOL - A Way Of Life,http://freemusicarchive.org/music/AWOL/AWOL_-_...,1,AWOL,http://freemusicarchive.org/music/AWOL/,http://www.AzillionRecords.blogspot.com,http://i.creativecommons.org/l/by-nc-sa/3.0/us...,http://fma-files.s3.amazonaws.com/resources/im...,...,,0,4656,en,1293,,3,,Food,http://freemusicarchive.org/music/AWOL/AWOL_-_...
1,3,1.0,AWOL - A Way Of Life,http://freemusicarchive.org/music/AWOL/AWOL_-_...,1,AWOL,http://freemusicarchive.org/music/AWOL/,http://www.AzillionRecords.blogspot.com,http://i.creativecommons.org/l/by-nc-sa/3.0/us...,http://fma-files.s3.amazonaws.com/resources/im...,...,,0,1470,en,514,,4,,Electric Ave,http://freemusicarchive.org/music/AWOL/AWOL_-_...
2,5,1.0,AWOL - A Way Of Life,http://freemusicarchive.org/music/AWOL/AWOL_-_...,1,AWOL,http://freemusicarchive.org/music/AWOL/,http://www.AzillionRecords.blogspot.com,http://i.creativecommons.org/l/by-nc-sa/3.0/us...,http://fma-files.s3.amazonaws.com/resources/im...,...,,0,1933,en,1151,,6,,This World,http://freemusicarchive.org/music/AWOL/AWOL_-_...
3,10,6.0,Constant Hitmaker,http://freemusicarchive.org/music/Kurt_Vile/Co...,6,Kurt Vile,http://freemusicarchive.org/music/Kurt_Vile/,http://kurtvile.com,http://i.creativecommons.org/l/by-nc-nd/3.0/88...,http://fma-files.s3.amazonaws.com/resources/im...,...,,0,54881,en,50135,,1,,Freeway,http://freemusicarchive.org/music/Kurt_Vile/Co...
4,20,4.0,Niris,http://freemusicarchive.org/music/Chris_and_Ni...,4,Nicky Cook,http://freemusicarchive.org/music/Chris_and_Ni...,,http://i.creativecommons.org/l/by-nc-nd/3.0/88...,http://fma-files.s3.amazonaws.com/resources/im...,...,,0,978,en,361,,3,,Spiritual Level,http://freemusicarchive.org/music/Chris_and_Ni...


In [None]:
metadataa.shape

(106576, 53)

In [None]:
metadataa.columns

Index(['Unnamed: 0', 'album', 'album.1', 'album.2', 'album.3', 'album.4',
       'album.5', 'album.6', 'album.7', 'album.8', 'album.9', 'album.10',
       'album.11', 'album.12', 'artist', 'artist.1', 'artist.2', 'artist.3',
       'artist.4', 'artist.5', 'artist.6', 'artist.7', 'artist.8', 'artist.9',
       'artist.10', 'artist.11', 'artist.12', 'artist.13', 'artist.14',
       'artist.15', 'artist.16', 'set', 'set.1', 'track', 'track.1', 'track.2',
       'track.3', 'track.4', 'track.5', 'track.6', 'track.7', 'track.8',
       'track.9', 'track.10', 'track.11', 'track.12', 'track.13', 'track.14',
       'track.15', 'track.16', 'track.17', 'track.18', 'track.19'],
      dtype='object')

In [None]:
metadata.shape

(109727, 39)

In [None]:
metadata.columns

Index(['track_id', 'album_id', 'album_title', 'album_url', 'artist_id',
       'artist_name', 'artist_url', 'artist_website', 'license_image_file',
       'license_image_file_large', 'license_parent_id', 'license_title',
       'license_url', 'tags', 'track_bit_rate', 'track_comments',
       'track_composer', 'track_copyright_c', 'track_copyright_p',
       'track_date_created', 'track_date_recorded', 'track_disc_number',
       'track_duration', 'track_explicit', 'track_explicit_notes',
       'track_favorites', 'track_file', 'track_genres', 'track_image_file',
       'track_information', 'track_instrumental', 'track_interest',
       'track_language_code', 'track_listens', 'track_lyricist',
       'track_number', 'track_publisher', 'track_title', 'track_url'],
      dtype='object')

In [None]:
import os
import pandas as pd
import numpy as np
import librosa
from pydub import AudioSegment
import sqlite3  # Using SQLite for simplicity

# Paths
DATASET_PATH = "fma_small"
METADATA_PATH = "fma_metadata/raw_tracks.csv"
DB_PATH = "fingerprints.db"

# Load metadata
metadata = pd.read_csv(METADATA_PATH, index_col=0)
# Function to convert MP3 to WAV
def convert_mp3_to_wav(input_path, output_path):
    try:
        audio = AudioSegment.from_mp3(input_path)
        audio.export(output_path, format="wav")
        return output_path
    except Exception as e:
        print(f"Error converting {input_path}: {e}")
        return None

# Function to generate Mel spectrogram
def generate_mel_spectrogram(audio_path, sr=22050, n_mels=128):
    try:
        y, sr = librosa.load(audio_path, sr=sr)
        if y is None or len(y) == 0:
            raise ValueError("Empty or invalid audio file.")
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        return mel_spec_db
    except Exception as e:
        print(f"Error generating Mel spectrogram for {audio_path}: {e}")
        return None

# Function to extract a fingerprint from the Mel spectrogram
def extract_fingerprint(mel_spec):
    try:
        if mel_spec is None or np.all(mel_spec == 0):
            raise ValueError("Spectrogram is empty or has all zero values.")
        fingerprint = np.mean(mel_spec, axis=1)
        return fingerprint.tolist()
    except Exception as e:
        print(f"Error extracting fingerprint: {e}")
        return None

# Database setup
def initialize_database():
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS fingerprints (
            track_id INTEGER PRIMARY KEY,
            title TEXT,
            artist TEXT,
            album TEXT,
            fingerprint TEXT
        )
    """)
    conn.commit()
    conn.close()

# Insert fingerprint into database
def insert_fingerprint(track_id, title, artist, album, fingerprint):
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    cursor.execute("""
        INSERT OR REPLACE INTO fingerprints (track_id, title, artist, album, fingerprint)
        VALUES (?, ?, ?, ?, ?)
    """, (track_id, title, artist, album, str(fingerprint)))
    conn.commit()
    conn.close()

# Process dataset
def process_dataset():
    for subdir in os.listdir(DATASET_PATH):
        subdir_path = os.path.join(DATASET_PATH, subdir)
        if os.path.isdir(subdir_path):
            for file_name in os.listdir(subdir_path):
                if file_name.endswith('.mp3'):
                    try:
                        track_id = int(file_name.split('.')[0])  # Assuming filename is "<track_id>.mp3"
                    except ValueError:
                        print(f"Skipping file with invalid name format: {file_name}")
                        continue

                    # Check if track ID exists in metadata
                    if track_id in metadata.index:
                        mp3_path = os.path.join(subdir_path, file_name)
                        wav_path = os.path.join(subdir_path, file_name.replace('.mp3', '.wav'))

                        # Convert to WAV if necessary
                        if not os.path.exists(wav_path):
                            convert_mp3_to_wav(mp3_path, wav_path)

                        # Generate Mel spectrogram
                        mel_spec = generate_mel_spectrogram(wav_path)
                        if mel_spec is not None:
                            fingerprint = extract_fingerprint(mel_spec)
                            if fingerprint:
                              title = metadata.loc[track_id, "track_title"]       # Column for track title
                              artist = metadata.loc[track_id, "artist_name"]     # Column for artist name
                              album = metadata.loc[track_id, "album_title"]       # Column for album title
                              insert_fingerprint(track_id, title, artist, album, fingerprint)
                              print(f"Inserted fingerprint for track {track_id}")

# Main execution
if __name__ == "__main__":
    initialize_database()
    process_dataset()


Inserted fingerprint for track 75439
Inserted fingerprint for track 75438
Inserted fingerprint for track 75378
Inserted fingerprint for track 75437
Inserted fingerprint for track 75432
Inserted fingerprint for track 75379
Inserted fingerprint for track 75230
Inserted fingerprint for track 75391
Inserted fingerprint for track 75607
Inserted fingerprint for track 75788
Inserted fingerprint for track 75401
Inserted fingerprint for track 75929
Inserted fingerprint for track 75221
Inserted fingerprint for track 75427
Inserted fingerprint for track 75782
Inserted fingerprint for track 75412
Inserted fingerprint for track 75433
Inserted fingerprint for track 75612
Inserted fingerprint for track 75844
Inserted fingerprint for track 75373
Inserted fingerprint for track 75754
Inserted fingerprint for track 75785
Inserted fingerprint for track 75933
Inserted fingerprint for track 75372
Inserted fingerprint for track 75398
Inserted fingerprint for track 75883
Inserted fingerprint for track 75376
I

  y, sr = librosa.load(audio_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Inserted fingerprint for track 133772
Inserted fingerprint for track 133027
Inserted fingerprint for track 133977
Inserted fingerprint for track 133836
Inserted fingerprint for track 133276
Inserted fingerprint for track 133434
Inserted fingerprint for track 133457
Inserted fingerprint for track 133563
Inserted fingerprint for track 133544
Inserted fingerprint for track 133025
Inserted fingerprint for track 133546
Inserted fingerprint for track 133435
Inserted fingerprint for track 133729
Inserted fingerprint for track 133455
Inserted fingerprint for track 133580
Inserted fingerprint for track 133029
Inserted fingerprint for track 133976
Inserted fingerprint for track 133916
Inserted fingerprint for track 133545
Inserted fingerprint for track 133272
Inserted fingerprint for track 133802
Inserted fingerprint for track 133731
Inserted fingerprint for track 133572
Inserted fingerprint for track 133449
Inserted fingerprint for track 133788
Inserted fingerprint for track 133024
Inserted fin

  y, sr = librosa.load(audio_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Inserted fingerprint for track 99419
Inserted fingerprint for track 99437
Inserted fingerprint for track 99391
Inserted fingerprint for track 99374
Inserted fingerprint for track 99345
Inserted fingerprint for track 99501
Inserted fingerprint for track 99442
Inserted fingerprint for track 99375
Inserted fingerprint for track 99364
Inserted fingerprint for track 99096
Inserted fingerprint for track 99436
Inserted fingerprint for track 99441
Inserted fingerprint for track 99214
Inserted fingerprint for track 99369
Inserted fingerprint for track 99363
Inserted fingerprint for track 99703
Inserted fingerprint for track 99373
Inserted fingerprint for track 99361
Inserted fingerprint for track 99371
Inserted fingerprint for track 99260
Inserted fingerprint for track 99392
Inserted fingerprint for track 99439
Inserted fingerprint for track 99395
Inserted fingerprint for track 99394
Inserted fingerprint for track 99438
Inserted fingerprint for track 99041
Inserted fingerprint for track 99370
I

  y, sr = librosa.load(audio_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Inserted fingerprint for track 108422
Inserted fingerprint for track 108867
Inserted fingerprint for track 108882
Inserted fingerprint for track 108812
Inserted fingerprint for track 108303
Inserted fingerprint for track 108992
Inserted fingerprint for track 108845
Inserted fingerprint for track 108532
Inserted fingerprint for track 108427
Inserted fingerprint for track 108020
Inserted fingerprint for track 108841
Inserted fingerprint for track 108488
Inserted fingerprint for track 108060
Inserted fingerprint for track 108969
Inserted fingerprint for track 108475
Inserted fingerprint for track 108298
Inserted fingerprint for track 108836
Inserted fingerprint for track 108473
Inserted fingerprint for track 108461
Inserted fingerprint for track 108878
Inserted fingerprint for track 108319
Inserted fingerprint for track 108808
Inserted fingerprint for track 108318
Inserted fingerprint for track 108428
Inserted fingerprint for track 108846
Inserted fingerprint for track 108528
Inserted fin

In [None]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [None]:
'''import faiss
import numpy as np

def find_similar_song(input_fingerprint, fingerprints):
    # Convert the fingerprints to a numpy array of float32 (required for FAISS)
    db_fingerprints = np.array([entry["fingerprint"] for entry in fingerprints], dtype=np.float32)

    # Initialize a FAISS index (using L2 distance here)
    dimension = db_fingerprints.shape[1]  # Number of features in the fingerprint
    index = faiss.IndexFlatL2(dimension)  # L2 distance is commonly used for vector similarity

    # Add the fingerprints to the FAISS index
    index.add(db_fingerprints)

    # Convert input fingerprint to numpy array of float32 (same format as the database)
    input_fingerprint = np.array(input_fingerprint, dtype=np.float32).reshape(1, -1)

    # Perform the search to find the most similar fingerprint (k=1 to find the closest match)
    distances, indices = index.search(input_fingerprint, k=1)  # k=1 for the closest match

    # Retrieve the best match from the fingerprints
    best_match_index = indices[0][0]  # The index of the closest match
    best_match = fingerprints[best_match_index]
    max_similarity = 1 / (1 + distances[0][0])  # Convert L2 distance to cosine similarity (scaled)

    return best_match, max_similarity'''


In [None]:
'''def match_audio_snippet(snippet_path, fingerprint_db):
    """
    Matches an audio snippet to the fingerprint database.
    """
    mel_spec = generate_mel_spectrogram(snippet_path)
    if mel_spec is None:
        return "Error processing snippet"

    fingerprint = extract_fingerprint(mel_spec)
    if fingerprint is None:
        return "Error generating fingerprint"

    # Search in fingerprint database
    #return fingerprint_db.get(fingerprint, "No match found")
    return find_similar_song(fingerprint, fingerprint_db)'''
import faiss
import numpy as np
import pickle

def load_fingerprint_db(db_path):
    """Load the fingerprint database from a pickle file."""
    with open(db_path, "rb") as f:
        fingerprint_db = pickle.load(f)
    return fingerprint_db

def find_similar_song(input_fingerprint, fingerprints):
    """
    Find the most similar song to the input fingerprint using FAISS for similarity search.
    Converts L2 distance to cosine similarity.
    """
    # Check if fingerprints are in the expected format (list of dicts with 'fingerprint' keys)
    if isinstance(fingerprints, list) and isinstance(fingerprints[0], dict):
        db_fingerprints = np.array([entry["fingerprint"] for entry in fingerprints], dtype=np.float32)
    else:
        raise TypeError("Expected fingerprints to be a list of dictionaries with 'fingerprint' keys.")

    # Initialize a FAISS index for cosine similarity using inner product (FAISS supports L2 and inner product similarity)
    dimension = db_fingerprints.shape[1]  # Number of features in the fingerprint
    index = faiss.IndexFlatIP(dimension)  # Inner Product corresponds to cosine similarity

    # Normalize the database fingerprints to unit length (FAISS uses inner product for cosine similarity)
    faiss.normalize_L2(db_fingerprints)

    # Add the fingerprints to the FAISS index
    index.add(db_fingerprints)

    # Convert input fingerprint to numpy array of float32 (same format as the database)
    input_fingerprint = np.array(input_fingerprint, dtype=np.float32).reshape(1, -1)

    # Normalize the input fingerprint (important for cosine similarity)
    faiss.normalize_L2(input_fingerprint)

    # Perform the search to find the most similar fingerprint (k=1 for the closest match)
    distances, indices = index.search(input_fingerprint, k=1)  # k=1 for the closest match

    # Retrieve the best match from the fingerprints
    best_match_index = indices[0][0]  # The index of the closest match
    best_match = fingerprints[best_match_index]
    max_similarity = distances[0][0]  # Cosine similarity score (higher is more similar)

    return best_match, max_similarity

def match_audio_snippet(snippet_path, fingerprint_db):
    """
    Matches an audio snippet to the fingerprint database.
    Returns the most similar track and the similarity score.
    """
    # Generate mel spectrogram from the snippet
    mel_spec = generate_mel_spectrogram(snippet_path)
    if mel_spec is None:
        return "Error processing snippet"

    # Extract the fingerprint from the mel spectrogram
    fingerprint = extract_fingerprint(mel_spec)
    if fingerprint is None:
        return "Error generating fingerprint"

    # Search in the fingerprint database for the best match
    return find_similar_song(fingerprint, fingerprint_db)





In [None]:
from pydub import AudioSegment
from pydub.playback import play
import pickle
# Function to extract a snippet from an MP3 file
def extract_snippet(input_path, start_ms, end_ms, output_path):
    audio = AudioSegment.from_mp3(input_path)
    snippet = audio[start_ms:end_ms]
    snippet.export(output_path, format="mp3")
    print(f"Snippet saved as {output_path}")
    return output_path


# Load fingerprint database (assuming you already have it)
def load_fingerprint_db(db_path):
    with open(db_path, "rb") as f:
        fingerprint_db = pickle.load(f)
    return fingerprint_db

# Function to simulate listening to the audio snippet
def play_audio_snippet(snippet_path):
    audio = AudioSegment.from_mp3(snippet_path)
    play(audio)  # Play the audio snippet




start_ms = 0                       # Start time of the snippet in milliseconds
end_ms = 10000
input_path = "fma_small/134/134446.mp3"
output_path = "test.mp3"                                                # End time of the snippet in milliseconds
input_fingerprint = extract_snippet(input_path, start_ms, end_ms, output_path)  # Example input fingerprint, replace with real data



Snippet saved as test.mp3


In [None]:
import numpy as np
import sqlite3
import ast  # Import the ast module to safely evaluate the string as a list

# Connect to the database
DB_PATH = 'fingerprints.db'
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

# Extract fingerprints from the database
cursor.execute("SELECT track_id, fingerprint FROM fingerprints")
rows = cursor.fetchall()

# Convert the results to a list of dictionaries for easier handling
fingerprints = []
for row in rows:
    track_id = row[0]
    fingerprint_data = row[1]

    # If the fingerprint data is stored as a string representation of a list, safely convert it
    if isinstance(fingerprint_data, str):
        try:
            # Convert the string representation of the list to an actual list of floats
            fingerprint_array = np.array(ast.literal_eval(fingerprint_data), dtype=np.float32)
        except Exception as e:
            print(f"Error parsing fingerprint data for track {track_id}: {e}")
            continue
    else:
        # If the fingerprint data is already a list or array, convert it directly to numpy array
        fingerprint_array = np.array(fingerprint_data, dtype=np.float32)

    fingerprints.append({"track_id": track_id, "fingerprint": fingerprint_array})

# Close the connection
conn.close()

# Now you can use the find_similar_song function with the extracted fingerprints
#input_fingerprint = np.random.rand(128).astype(np.float32)  # Replace with actual input fingerprint




# Now you can use the find_similar_song function with the extracted fingerprints
best_match, max_similarity = match_audio_snippet(input_fingerprint, fingerprints)

print(f"Best match: {best_match['track_id']}, Similarity: {max_similarity}")


Best match: 11795, Similarity: 0.9981878995895386


In [None]:
play_audio_snippet('test.mp3')


In [None]:
from IPython.display import Audio
audio, sr = librosa.load('test.mp3', sr=None)

# Display the clickable audio player
Audio('test.mp3')

In [None]:
def get_file_path_from_id(track_id, dataset_path="fma_small"):
    # Traverse the directory structure
    for subdir in os.listdir(dataset_path):
        subdir_path = os.path.join(dataset_path, subdir)

        # Only process directories
        if os.path.isdir(subdir_path):
            for file_name in os.listdir(subdir_path):
                # Match track ID in the file name (assuming <track_id>.mp3 format)
                try:
                    file_track_id = int(file_name.split('.')[0])  # Extract track ID from the filename
                    if file_track_id == track_id:
                        # If track ID matches, return the full file path
                        return os.path.join(subdir_path, file_name)
                except ValueError:
                    continue  # Skip files that don't match the expected format
    return None  # Return None if track ID is not found

In [None]:
track_id=best_match['track_id']
path=get_file_path_from_id(track_id, dataset_path="fma_small")
Audio(path)

In [None]:
Audio('fma_small/134/134446.mp3')

In [None]:
faiss.write_index(index, "/content/fingerprint_index.faiss")
from google.colab import files

# Download the FAISS index file
files.download("/content/fingerprint_index.faiss")
