In [1]:
import librosa
import matplotlib.pyplot as plt
import IPython.display as ipd
import numpy as np
import pandas as pd

In [2]:
# feature names are initialized
features = ["harmonic_mean", "harmonic_var", "percussive_mean", "percussive_var", 
            "rms_mean", "rms_var", 
            "spec_second_mean", "spec_second_var", "spec_third_mean", "spec_third_var", "spec_fourth_mean", "spec_fourth_var"];

for i in range(1, 21):
  feature_name = "mfcc" + str(i) + "_mean"
  features.append(feature_name)

for i in range(1, 21):
  feature_name = "mfcc" + str(i) + "_var"
  features.append(feature_name)

X = pd.read_csv("extracted_features/recommender/data.txt", names=features) # data points are read
y = pd.read_csv("extracted_features/recommender/y.txt", names=range(1, 1001)); # data target labels are read
y = y.transpose()

print(X.shape, y.shape)
X.head()

(1000, 52) (1000, 1)


Unnamed: 0,harmonic_mean,harmonic_var,percussive_mean,percussive_var,rms_mean,rms_var,spec_second_mean,spec_second_var,spec_third_mean,spec_third_var,...,mfcc11_var,mfcc12_var,mfcc13_var,mfcc14_var,mfcc15_var,mfcc16_var,mfcc17_var,mfcc18_var,mfcc19_var,mfcc20_var
0,3.3e-05,0.013656,0.00033,0.003486,0.563806,0.078948,33680.954503,112863600.0,1.77384,0.201149,...,38.205821,38.270407,31.941718,33.75805,31.906852,35.416081,34.731329,37.898421,30.602889,32.291942
1,0.000476,0.016595,0.000518,0.002004,0.540435,0.104708,35164.654938,254444200.0,2.245021,0.828744,...,43.022364,51.437401,43.785227,40.777942,33.001429,37.395107,38.419952,41.217259,30.989519,33.996483
2,-0.000163,0.016543,-0.000268,0.004633,0.588423,0.050648,25625.525443,84169980.0,2.225454,0.459023,...,43.948866,48.251983,43.211023,34.675321,34.501859,29.404089,28.175414,31.29806,29.62331,29.823241
3,3.1e-05,0.016952,-0.000104,0.001813,0.534345,0.111438,22318.48158,124214800.0,3.247749,1.241887,...,58.349657,41.591428,36.386798,29.266951,44.090713,27.633107,30.182088,31.985474,28.84573,24.563945
4,0.000114,0.006187,-1.2e-05,0.001507,0.537805,0.107858,25740.695941,97228500.0,2.012868,0.818298,...,43.553411,42.58657,41.851272,41.111907,64.879078,44.235949,55.176918,50.945027,49.12491,46.473739


In [3]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import preprocessing

scaler = preprocessing.StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled = preprocessing.scale(X_scaled)
X_scaled.shape

(1000, 52)

In [4]:
# Cosine similarity
similarity = cosine_similarity(X_scaled)
print("Similarity shape:", similarity.shape)

# Convert into a dataframe and then set the row index and column names as labels
similarity_df = pd.DataFrame(similarity)
similarity_df = similarity_df.set_index(y.index)
similarity_df.columns = y.index

similarity_df.head()

Similarity shape: (1000, 1000)


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,991,992,993,994,995,996,997,998,999,1000
1,1.0,0.248292,0.515886,0.049475,0.150898,-0.228572,-0.225152,-0.117487,0.513466,-0.027462,...,-0.104872,0.681322,0.746085,0.532474,0.596768,0.585685,0.678663,0.600131,0.190153,0.238831
2,0.248292,1.0,0.158677,0.378267,0.172999,0.269733,0.401926,0.420695,0.211839,0.372605,...,-0.230773,-0.126505,-0.165927,-0.244623,-0.196019,-0.123126,-0.138835,-0.1785,0.250059,0.202961
3,0.515886,0.158677,1.0,0.19968,0.393114,0.077947,0.127912,0.325044,0.419129,-0.124536,...,-0.045626,0.499463,0.544998,0.531318,0.500931,0.450736,0.495953,0.533121,0.284208,0.309003
4,0.049475,0.378267,0.19968,1.0,0.026736,0.076193,0.215766,0.141998,0.122127,-0.051372,...,-0.216941,-0.101753,-0.160092,-0.056907,-0.093326,-0.063542,0.042561,-0.10682,0.356812,0.393374
5,0.150898,0.172999,0.393114,0.026736,1.0,0.622847,0.438621,0.49158,0.012003,0.485862,...,0.119366,0.028044,0.11528,0.067564,-0.037877,0.052069,0.007045,0.117258,-0.003887,-0.081652


In [5]:
# find the most similar songs to this song:
track, sr = librosa.load("data/genre_data/pop/pop.00019.wav")
ipd.Audio(track, rate=sr)

In [6]:
# top three most similar songs
track_idx = 720
top3 = similarity_df[track_idx].sort_values(ascending=False)[1:4]
top3 

724    0.834363
735    0.809479
792    0.795371
Name: 720, dtype: float64

In [7]:
# most similar song
track, sr = librosa.load("data/genre_data/pop/pop.00023.wav")
ipd.Audio(track, rate=sr)

In [8]:
# second most similar song
track, sr = librosa.load("data/genre_data/pop/pop.00034.wav")
ipd.Audio(track, rate=sr)

In [9]:
# third most similar song
track, sr = librosa.load("data/genre_data/pop/pop.00091.wav")
ipd.Audio(track, rate=sr)