# Plot the artists

In [12]:
import pandas as pd
import umap
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

## Data config

In [5]:
df_music = pd.read_pickle('extracted data/2025_01_07_local_music_librosa_features.pkl')

In [6]:
# Select features (from mfcc_0 to tempo)
feature_columns = df_music.columns[df_music.columns.get_loc('mfcc_0'):df_music.columns.get_loc('tempo')+1]
feature_columns

Index(['mfcc_0', 'mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6',
       'mfcc_7', 'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12',
       'mfcc_13', 'mfcc_14', 'mfcc_15', 'mfcc_16', 'mfcc_17', 'mfcc_18',
       'mfcc_19', 'spectral_centroid', 'chroma_0', 'chroma_1', 'chroma_2',
       'chroma_3', 'chroma_4', 'chroma_5', 'chroma_6', 'chroma_7', 'chroma_8',
       'chroma_9', 'chroma_10', 'chroma_11', 'tempo'],
      dtype='object')

In [7]:
# Create feature matrix and labels
X = df_music[feature_columns].values
y = df_music['artist'].values

## UMAP

In [8]:
# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_components=2, random_state=42)
embedding = reducer.fit_transform(X)

In [14]:
# Create a DataFrame with the reduced dimensions and artist labels
plot_df = pd.DataFrame({
    'UMAP1': embedding[:, 0],
    'UMAP2': embedding[:, 1],
    'Artist': y,
    'Track': df_music['track_name']
})

In [16]:
fig = px.scatter(plot_df, x='UMAP1', y='UMAP2', color='Artist', hover_name='Track', title='UMAP projection of songs by artist')
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>Artist: %{customdata}')
fig.update_traces(customdata=plot_df['Artist'])