In [2]:
import os
import librosa
import pandas as pd
import numpy as np
from tinytag import TinyTag
from PIL import Image
import io
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
from mutagen.id3 import APIC

# replace the path.txt with a text file that points to your local library folder
# you can also duretly define "audio_folder" variable as your path like /Users/johnappleseed/Music/iTunes
path_to_library = 'path.txt'

with open(path_to_library, 'r') as file:
    audio_folder = file.read().strip()

print(f"Audio folder: {audio_folder}")

def get_album_art(file_path):
    album_art = None
    if file_path.lower().endswith('.mp3'):
        audio = MP3(file_path)
        for tag in audio.tags.values():
            if isinstance(tag, APIC):
                album_art = Image.open(io.BytesIO(tag.data))
                break
    elif file_path.lower().endswith('.flac'):
        audio = FLAC(file_path)
        if audio.pictures:
            pic = audio.pictures[0]
            album_art = Image.open(io.BytesIO(pic.data))
    return album_art

def load_audio_data(file_path):
    try:
        audio_data, sample_rate = librosa.load(file_path, sr=None)
        if audio_data.size == 0:
            print(f"Skipping empty file: {file_path}")
            return None

        tag = TinyTag.get(file_path)
        metadata = tag.as_dict()
        
        spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sample_rate)
        spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_data, sr=sample_rate)
        zcr = librosa.feature.zero_crossing_rate(y=audio_data)
        tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sample_rate)
        chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
        
        album_art = get_album_art(file_path)

        artist_title_year = f"{tag.artist} - {tag.title} ({tag.year})"

        return {
            'file_name': artist_title_year,
            'sample_rate': sample_rate,
            'metadata': metadata,
            'album_art': album_art,
            'audio_data': audio_data,
            'spectral_contrast': spectral_contrast.mean(axis=1),
            'spectral_centroid': spectral_centroid.mean(axis=1),
            'spectral_bandwidth': spectral_bandwidth.mean(axis=1),
            'zcr': zcr.mean(axis=1),
            'tempo': tempo,
            'chroma': chroma.mean(axis=1)
        }
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

audio_extensions = ['.flac', '.aiff', '.mp3']

audio_files_data = []
for file_name in os.listdir(audio_folder):
    if os.path.splitext(file_name)[1].lower() in audio_extensions:
        print(f"Processing: {file_name}")
        file_path = os.path.join(audio_folder, file_name)
        result = load_audio_data(file_path)
        if result is not None:
            audio_files_data.append(result)

df = pd.DataFrame(audio_files_data)

print('ready')
#df.tail()

Audio folder: /Users/victoriapaskannaya/Desktop/Music Small
Processing: 9560524_Black_Original_Mix.aiff
Processing: 12201203_Sleepless_(Original Mix).aiff
Processing: 12092586_Phiom Enhah_(Original Mix).aiff
Processing: 11785249_Contact_(Original Mix) (1).aiff


KeyboardInterrupt: 

In [4]:
# Calculate means for music values to prepare the data
# Average over the correct axis and make variable suitable for aggregation

def mean_of_lists(data):
    if isinstance(data[0], (list, np.ndarray)):
        return np.mean(data, axis=0).tolist()
    return data

# Apply the function to each column
df['mean_spectral_contrast'] = df['spectral_contrast'].apply(mean_of_lists)
df['mean_chroma'] = df['chroma'].apply(mean_of_lists)

df['mean_spectral_contrast'] = df['mean_spectral_contrast'].apply(np.mean)
df['mean_chroma'] = df['mean_chroma'].apply(np.mean)

print('ready')
df.head()

ready


Unnamed: 0,file_name,sample_rate,metadata,album_art,audio_data,spectral_contrast,spectral_centroid,spectral_bandwidth,zcr,tempo,chroma,mean_spectral_contrast,mean_chroma
0,The Birthday Massacre - Black (Original Mix) (...,44100,"{'filesize': 15813517, 'album': 'Violet', 'alb...",,"[-0.0023651123, -0.0013275146, -0.00062561035,...","[11.99492525925654, 9.006962555065458, 14.8176...",[731.1916191040629],[1311.428283130022],[0.015753782354900558],156.605114,"[0.7059452, 0.5758748, 0.59326637, 0.5150646, ...",17.111656,0.541846
1,Mall Grab - Sleepless (Original Mix) (2019-07-12),44100,"{'filesize': 73931327, 'album': 'Growing Pains...",,"[-0.0014343262, -0.0011901855, -0.0008239746, ...","[18.546054202509872, 10.074127461232207, 12.66...",[2827.4372029673013],[3415.5078913321067],[0.053640992039723674],89.102909,"[0.53289914, 0.566921, 0.7695968, 0.6252977, 0...",16.441233,0.588228
2,Rikhter - Phiom Enhah (Original Mix) (2019-06-07),44100,"{'filesize': 58479484, 'album': 'Rik1', 'album...",,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[18.07039773669111, 8.846243190489469, 11.2936...",[2892.275917896567],[3806.8495750132643],[0.04567250432050361],139.674831,"[0.73411405, 0.7244896, 0.7563907, 0.67789984,...",17.16696,0.658871


In [5]:
# Annoy
from sklearn.preprocessing import StandardScaler
from annoy import AnnoyIndex

features = df[['tempo', 'mean_spectral_contrast', 'mean_chroma', 'zcr', 'spectral_centroid', 'spectral_bandwidth']]  # Add more features as needed
features_scaled = StandardScaler().fit_transform(features)

# building the Annoy index
f = features_scaled.shape[1]  # Dimensionality of the feature space
t = AnnoyIndex(f, 'euclidean')  # You can choose 'angular', 'euclidean', 'manhattan', 'hamming', or 'dot'

for i, vector in enumerate(features_scaled):
    t.add_item(i, vector)

t.build(10)  # Build the index with 10 trees

# To get the nearest neighbors for each item
# For example, to find the 10 nearest neighbors for the first item
indices = t.get_nns_by_item(0, 10)  # Change 0 to other indices as needed

# a quickcheck to see the nearest neighbour 
nn_df = pd.DataFrame([t.get_nns_by_item(i, 10) for i in range(len(features_scaled))])

nn_df.head()

Unnamed: 0,0,1,2
0,0,2,1
1,1,2,0
2,2,1,0


In [8]:
import plotly.graph_objects as go
from sklearn.decomposition import PCA
#from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# I reduced dimentionality for 3D visualization by applying PCA
# principal_components_3d is the PCA result with 3 components
pca = PCA(n_components=3)
principal_components_3d = pca.fit_transform(features_scaled)

# 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=principal_components_3d[:, 0],
    y=principal_components_3d[:, 1],
    z=principal_components_3d[:, 2],
    mode='markers+text',
    text=df['file_name'],
    marker=dict(
        size=5,
        color=principal_components_3d[:, 2],  # Color by z-axis value for depth effect
        colorscale='Viridis',
        opacity=0.8
    ),
    textposition='top center'
)])

# layout
fig.update_layout(
    title='3D PCA of Songs',
    scene=dict(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3'
    ),
    margin=dict(l=0, r=0, b=0, t=0)
)

# export
fig.write_html('my-library-vector-visualization.html')

# confirmation
# fig.show()

print('the visualization is ready')

the visualization is ready
