In [None]:
import pandas as pd
import librosa
import numpy as np

# read the CSV file
df = pd.read_csv('songs.csv')

# select 1000 songs at random
random_songs = df.sample(n=1000)

# initialize an empty list to store the similarity scores
similarity_scores = []

# iterate over each song in the random songs
for index, song in random_songs.iterrows():
    # load the audio file using librosa
    audio, sr = librosa.load(song['path'])
    
    # extract features using librosa
    feature_vector = librosa.feature.mfcc(audio, sr)
    
    # initialize an empty list to store the similarity scores for this song
    song_similarity_scores = []
    
    # iterate over all other songs in the DataFrame
    for index2, song2 in df.iterrows():
        if song2['name'] != song['name']:
            # load the audio file of the other song using librosa
            audio2, sr2 = librosa.load(song2['path'])
            
            # extract features of the other song using librosa
            feature_vector2 = librosa.feature.mfcc(audio2, sr2)
            
            # calculate the similarity score using Euclidean distance
            euclidean_distance = np.linalg.norm(feature_vector - feature_vector2)
            similarity_score = 1 / (1 + euclidean_distance)
            
            # append the similarity score to the list of similarity scores for this song
            song_similarity_scores.append(similarity_score)
    
    # add the list of similarity scores for this song to the list of all similarity scores
    similarity_scores.append(song_similarity_scores)

# convert the list of similarity scores to a DataFrame
similarity_scores_df = pd.DataFrame(similarity_scores)

# set the column names of the DataFrame to the song names
similarity_scores_df.columns = df['name'].tolist()

# write the DataFrame to a CSV file
similarity_scores_df.to_csv('similarity_scores.csv', index=False)