# Get similar songs from an uploaded mp3

In [72]:
# TODO - It does not work at all from extracting features from a new mp3. Think of how to fix this

In [23]:
# IMPORTS
from keras.models import load_model
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

import os
import ast

import librosa
import IPython.display as ipd

import tqdm
from scipy import stats


import FMA_code.features
from notebook_utils import md

In [24]:
# PATHS
class paths():
    FMA_D = 'data/FMA/fma_metadata/' # path to the FMA metadata files

    FMA_FEATURE = FMA_D + 'features.csv' # features for each track
    FMA_TRACKS = FMA_D + 'tracks.csv' # metadata for every track. We only use the genre column
    FMA_GENRE = FMA_D + 'genres.csv' # genre key from ID to string

    FMA_SMALL_D = 'data/FMA/fma_small/' # path to the FMA small dataset

In [25]:
# LOAD THE EMBEDDING MODEL & NORMALISATION PARAMETERS
embedding_model = load_model('models/FMA_embedding_model.keras')
dimMin = np.load('models/FMA_dimMin.npy')
dimMax = np.load('models/FMA_dimMax.npy')

In [26]:
# PREPARE FMA FEATURES & GENRES
def load_FMA(filepath):
    # Copied from FMA utils.py file

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')

        # the categories and ordered arguments were removed in pandas 0.25
        tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype(pd.CategoricalDtype())

        return tracks

def normalize_new_song_features(new_song_features, dimMin, dimMax):
    '''Uses the max and min obtained from normalised the train dataset to normalise new features'''
    dimRange = dimMax - dimMin
    norm_features = (new_song_features - dimMin) / dimRange
    return norm_features

def get_genre_mappings(FMA_genres):
    # Create dictionaries to map between genre IDs and names
    id_to_genre = FMA_genres['title'].to_dict()
    genre_to_id = {g: i for i, g in id_to_genre.items()}

    # Create a mapping from genre_id to index
    id_to_idx = {id: idx for idx, id in enumerate(id_to_genre.keys())}

    return genre_to_id, id_to_genre, id_to_idx

# Load data
FMA_tracks = load_FMA(paths.FMA_TRACKS)
FMA_genres = load_FMA(paths.FMA_GENRE)
FMA_features = load_FMA(paths.FMA_FEATURE)

# Get genre mappings
genre_to_id, id_to_genre, id_to_idx = get_genre_mappings(FMA_genres)

In [27]:
# GET EMBEDDINGS FOR FMA_SMALL DATASET
# Get all mp3 files in the FMA small dataset
fma_small_mp3 = [os.path.join(root, file) for root, dirs, files in os.walk(paths.FMA_SMALL_D) for file in files if file.endswith('.mp3')]

# Get the track IDs from the mp3 file names
fma_small_ids = [int(file.split('/')[-1].split('.')[0].lstrip('0')) for file in fma_small_mp3]

# Filter FMA_small_features to only include the FMA small subset
FMA_small_features = FMA_features.loc[fma_small_ids]

# Normalise the features using the same parameters as the training data
FMA_small_features_norm = normalize_new_song_features(FMA_small_features, dimMin, dimMax)

# Get embeddings for the FMA_small dataset
FMA_small_embeddings = embedding_model.predict(FMA_small_features_norm)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


In [70]:
# GET EMBEDDINGS FOR NEW MP3 FILE
# Functions obtained from features.py from the FMA GitHub
def columns():
    feature_sizes = dict(chroma_stft=12, chroma_cqt=12, chroma_cens=12,
                         tonnetz=6, mfcc=20, rmse=1, zcr=1,
                         spectral_centroid=1, spectral_bandwidth=1,
                         spectral_contrast=7, spectral_rolloff=1)
    moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = ((name, moment, '{:02d}'.format(i+1)) for i in range(size))
            columns.extend(it)

    names = ('feature', 'statistics', 'number')
    columns = pd.MultiIndex.from_tuples(columns, names=names)

    # More efficient to slice if indexes are sorted.
    return columns.sort_values()

def compute_features(mp3_filepath):

    features = pd.Series(index=columns(), dtype=np.float64)

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

    x, sr = librosa.load(mp3_filepath, sr=None, mono=True)  # kaiser_fast

    f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
    feature_stats('zcr', f)

    cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                n_bins=7*12, tuning=None))
    assert cqt.shape[0] == 7 * 12
    assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

    f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cqt', f)
    f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
    feature_stats('chroma_cens', f)
    f = librosa.feature.tonnetz(chroma=f)
    feature_stats('tonnetz', f)

    del cqt
    stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
    assert stft.shape[0] == 1 + 2048 // 2
    assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
    del x

    f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
    feature_stats('chroma_stft', f)

    f = librosa.feature.rms(S=stft)
    feature_stats('rmse', f)

    f = librosa.feature.spectral_centroid(S=stft)
    feature_stats('spectral_centroid', f)
    f = librosa.feature.spectral_bandwidth(S=stft)
    feature_stats('spectral_bandwidth', f)
    f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
    feature_stats('spectral_contrast', f)
    f = librosa.feature.spectral_rolloff(S=stft)
    feature_stats('spectral_rolloff', f)

    mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
    del stft
    f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
    feature_stats('mfcc', f)

    return features

def play_song(filename):
    x, sr = librosa.load(filename, sr=None, mono=True)
    start, end = 5, 25
    display(ipd.Audio(data=x[start*sr:end*sr], rate=sr))

# Compute features for the song
mp3_ex = 'data/Beyoncé - Single Ladies.mp3'
ex_features = compute_features(mp3_ex)

# Ensure features have expected column names
assert all(FMA_small_features.columns == ex_features.index)

# Normalise the features
ex_features_norm = normalize_new_song_features(ex_features, dimMin, dimMax)

# Get the embedding for the song
query_embedding = embedding_model.predict(ex_features_norm.values.reshape(1, -1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


In [71]:
# FIND SIMILAR SONGS
# TRY OUT THE SIMILARITY FUNCTION

# Function to compute similarity between a query embedding and a set of embeddings
def find_most_similar(query_embedding, song_embeddings, song_list, top_k=5):
    """
    Find the most similar songs to the query embedding.

    Args:
    - query_embedding: The embedding vector for the query song (1D array).
    - song_embeddings: 2D array of all song embeddings.
    - song_list: List of song names corresponding to the embeddings.
    - top_k: Number of similar songs to return.

    Returns:
    - A list of the top_k most similar songs and their similarity scores.
    """
    # Reshape query embedding and compute cosine similarity
    query_embedding = query_embedding.reshape(1, -1)
    similarities = cosine_similarity(query_embedding, song_embeddings).flatten()

    # Get top K similar songs (excluding the query itself if present)
    top_indices = similarities.argsort()[-top_k-1:][::-1]  # Sort descending
    top_indices = [i for i in top_indices if similarities[i] < 0.99][:top_k]  # Filter out perfect match (self-match)

    # Return top similar songs with their similarity scores
    similar_songs = [(song_list[i], similarities[i]) for i in top_indices]
    return similar_songs

def get_audio_path(audio_dir, track_id):
    """
    Return the path to the mp3 given the directory where the audio is stored
    and the track ID.

    Examples
    --------
    >>> import utils
    >>> AUDIO_DIR = os.environ.get('AUDIO_DIR')
    >>> utils.get_audio_path(AUDIO_DIR, 2)
    '../data/fma_small/000/000002.mp3'

    """
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

def play_FMA_song(AUDIO_DIR, song_idx):
    filename = get_audio_path(AUDIO_DIR, int(song_idx))

    x, sr = librosa.load(filename, sr=None, mono=True)
    start, end = 5, 25
    display(ipd.Audio(data=x[start*sr:end*sr], rate=sr))
    
# Play the query song
print(f'Query song: Beyoncé - Single Ladies')
play_song(mp3_ex)

# Find the 3 most similar songs to that index
most_similar = find_most_similar(query_embedding, FMA_small_embeddings, FMA_small_features_norm.index, top_k=3)

# Play those songs
print('Similar songs:')
for song_id, similarity in most_similar:
    print(f'Song ID: {song_id}')
    print(f'{FMA_tracks.loc[song_id]["track"]["title"]} | {FMA_tracks.loc[song_id]["album"]["title"]}')
    print(f"Genres: {[id_to_genre[id] for id in FMA_tracks.loc[song_id]["track"]['genres_all']]}")
    print('Similarity: {:.2f}'.format(similarity))
    play_FMA_song(paths.FMA_SMALL_D, song_id)
print('\n')

Query song: Beyoncé - Single Ladies


Similar songs:
Song ID: 109548
alittlebitofjazz | Scrunch
Genres: ['Avant-Garde', 'Experimental', 'Electroacoustic']
Similarity: 0.79


Song ID: 116029
In a Heart of Jade | Serpent's Moratorium
Genres: ['Electronic']
Similarity: 0.74


Song ID: 55481
Waves | Waves
Genres: ['Noise', 'Experimental']
Similarity: 0.74




