In [9]:
import os
import librosa
import pandas as pd
import numpy as np
from tinytag import TinyTag
from PIL import Image
import io
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
from mutagen.id3 import APIC

# replace the path.txt with a text file that points to your local library folder
# you can also duretly define "audio_folder" variable as your path like /Users/johnappleseed/Music/iTunes
path_to_library = 'path.txt'

with open(path_to_library, 'r') as file:
    audio_folder = file.read().strip()

print(f"Audio folder: {audio_folder}")

def get_album_art(file_path):
    album_art = None
    if file_path.lower().endswith('.mp3'):
        audio = MP3(file_path)
        for tag in audio.tags.values():
            if isinstance(tag, APIC):
                album_art = Image.open(io.BytesIO(tag.data))
                break
    elif file_path.lower().endswith('.flac'):
        audio = FLAC(file_path)
        if audio.pictures:
            pic = audio.pictures[0]
            album_art = Image.open(io.BytesIO(pic.data))
    return album_art

def load_audio_data(file_path):
    try:
        audio_data, sample_rate = librosa.load(file_path, sr=None)
        if audio_data.size == 0:
            print(f"Skipping empty file: {file_path}")
            return None

        tag = TinyTag.get(file_path)
        metadata = tag.as_dict()
        
        spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sample_rate)
        spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_data, sr=sample_rate)
        zcr = librosa.feature.zero_crossing_rate(y=audio_data)
        tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sample_rate)
        chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
        
        album_art = get_album_art(file_path)

        artist_title_year = f"{tag.artist} - {tag.title} ({tag.year})"

        return {
            'file_name': artist_title_year,
            'sample_rate': sample_rate,
            'metadata': metadata,
            'album_art': album_art,
            'audio_data': audio_data,
            'spectral_contrast': spectral_contrast.mean(axis=1),
            'spectral_centroid': spectral_centroid.mean(axis=1),
            'spectral_bandwidth': spectral_bandwidth.mean(axis=1),
            'zcr': zcr.mean(axis=1),
            'tempo': tempo,
            'chroma': chroma.mean(axis=1)
        }
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

audio_extensions = ['.flac', '.aiff', '.mp3']

audio_files_data = []
for file_name in os.listdir(audio_folder):
    if os.path.splitext(file_name)[1].lower() in audio_extensions:
        print(f"Processing: {file_name}")
        file_path = os.path.join(audio_folder, file_name)
        result = load_audio_data(file_path)
        if result is not None:
            audio_files_data.append(result)

df = pd.DataFrame(audio_files_data)

print('ready')
#df.tail()

Audio folder: /Users/victoriapaskannaya/Desktop/Music (testing subset)
Processing: 127571_100 Percent Pure Love_(Original Mix).mp3
Processing: 12396076_Work_It_Soulwax_Remix.mp3
Processing: 10762898_Blade_Runner_Maceo_Plex_Renaissance_Remix.mp3
Processing: 9813018_Beautiful_People_Underground_Network_Mix.aiff
Processing: 2_The_Age_of_Love_Solomun_Renaissance_Remix.mp3
Processing: 5169284_Groove_Is_In_The_Heart_Instrumental.mp3
Processing: 9813085_Free_Original_Mix.mp3
Processing: 9340168_Body Language_(Original Mix).mp3
Processing: 15350623_Insomnia 2021_(Epic Mix).mp3
Processing: 10591853_At Night_(Purple Disco Machine Extended Remix).aiff
Processing: 8942157_Insomnia_(Original Mix).mp3
Processing: 127533_Gypsy Woman_(Teddy Douglas Edit).mp3
Processing: 191800_Tell_Me_Why_Original_Mix.mp3
Processing: 3_Domino_Rework.mp3
ready


In [10]:
# Calculate means for music values to prepare the data
# Average over the correct axis and make variable suitable for aggregation

def mean_of_lists(data):
    if isinstance(data[0], (list, np.ndarray)):
        return np.mean(data, axis=0).tolist()
    return data

# Apply the function to each column
df['mean_spectral_contrast'] = df['spectral_contrast'].apply(mean_of_lists)
df['mean_chroma'] = df['chroma'].apply(mean_of_lists)

df['mean_spectral_contrast'] = df['mean_spectral_contrast'].apply(np.mean)
df['mean_chroma'] = df['mean_chroma'].apply(np.mean)

print('ready')
df.head()

ready


Unnamed: 0,file_name,sample_rate,metadata,album_art,audio_data,spectral_contrast,spectral_centroid,spectral_bandwidth,zcr,tempo,chroma,mean_spectral_contrast,mean_chroma
0,Crystal Waters - 100 Percent Pure Love (Origin...,44100,"{'filesize': 18250607, 'album': 'Anthology', '...",<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[8.269285e-12, 1.7083732e-11, 9.668148e-12, -1...","[17.16224904156293, 11.96825727735031, 14.3845...",[3184.0195956397934],[3286.5330468270004],[0.07300851921138585],120.18532,"[0.5509958, 0.5714973, 0.5900755, 0.47315145, ...",20.737831,0.526597
1,"Marie Davidson, Pierre Guerineau - Work It (So...",44100,"{'filesize': 11567077, 'album': 'Chasing the L...",<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[17.11605705070837, 9.206327418860866, 13.9195...",[2292.03662397257],[2260.9724681459247],[0.06207726943280508],126.048018,"[0.48066157, 0.45022473, 0.5234966, 0.49173036...",19.486957,0.472061
2,Remake - Blade Runner (Maceo Plex Renaissance ...,44100,"{'filesize': 21505322, 'album': 'Blade Runner'...",<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[0.00016452477, -0.00025989363, -0.0004998131,...","[19.131861653185393, 9.478848799693408, 13.183...",[3613.052733911983],[3914.39566010277],[0.06689411923397304],123.046875,"[0.6498554, 0.64506996, 0.6692225, 0.705606, 0...",20.715866,0.658172
3,Barbara Tucker - Beautiful People (Underground...,44100,"{'filesize': 96030160, 'album': 'Beautiful Peo...",,"[4.5776367e-05, -4.5776367e-05, 3.0517578e-05,...","[19.69313443882562, 10.3364412823517, 15.13853...",[3252.239796887583],[3432.116232584501],[0.07305430364498848],123.046875,"[0.5416319, 0.5483058, 0.5367707, 0.46339896, ...",18.813928,0.528258
4,Age Of Love - The Age of Love (Solomun Renaiss...,44100,"{'filesize': 19701580, 'album': 'The Age Of Lo...",<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[3.406152e-06, -9.709579e-07, 2.886095e-06, -3...","[12.783295517143358, 12.091735892044897, 15.35...",[2379.5193218740283],[3313.4411207636695],[0.03438121562337271],126.048018,"[0.49214146, 0.48657697, 0.5033874, 0.57146925...",20.325751,0.558657


In [28]:
from sklearn.preprocessing import StandardScaler
from annoy import AnnoyIndex

features = df[['tempo', 'mean_spectral_contrast', 'mean_chroma', 'zcr', 'spectral_centroid', 'spectral_bandwidth']]
features_scaled = StandardScaler().fit_transform(features)

# building the index
f = features_scaled.shape[1]
# choose the distance metric, 'angular' or 'euclidean' in my case
t = AnnoyIndex(f, 'angular')  

for i, vector in enumerate(features_scaled):
    t.add_item(i, vector)

In [29]:
# let's finally build the index with 15 trees
t.build(15)  

# get the nearest neighbors for each item
# i.e. to find the 10 nearest neighbors for the first item
indices = t.get_nns_by_item(0, 10)

# a quick check to see the nearest neighbour 
nn_df = pd.DataFrame([t.get_nns_by_item(i, 10) for i in range(len(features_scaled))])

nn_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,5,11,3,1,2,9,6,12,4
1,1,10,4,5,3,0,11,9,12,8
2,2,13,8,11,6,7,9,0,3,4
3,3,9,1,0,8,5,10,2,4,12
4,4,10,1,11,8,5,13,3,2,7


In [30]:
import plotly.graph_objects as go
from sklearn.decomposition import PCA
#from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# it is time to visualize!

# I reduced dimentionality for 3D visualization by applying PCA
# principal_components_3d is the PCA result with 3 components
pca = PCA(n_components=3)
principal_components_3d = pca.fit_transform(features_scaled)

# 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=principal_components_3d[:, 0],
    y=principal_components_3d[:, 1],
    z=principal_components_3d[:, 2],
    mode='markers+text',
    text=df['file_name'],
    marker=dict(
        size=5,
        color=principal_components_3d[:, 2],  # Color by z-axis value for depth effect
        colorscale='Viridis',
        opacity=0.8
    ),
    textposition='top center'
)])

# layout
fig.update_layout(
    title='3D PCA of Songs',
    scene=dict(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3'
    ),
    margin=dict(l=0, r=0, b=0, t=0)
)

# export
fig.write_html('my-library-vector-visualization.html')

# confirmation
# fig.show()

print('the visualization is ready')

the visualization is ready
