In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load embeddings
clap_embeddings = np.load("./audio_embeddings_clap.npy", allow_pickle=True).item()
mert_embeddings = np.load("./audio_embeddings_mert.npy", allow_pickle=True).item()

# Load Cocola scores from CSV
cocola_scores_df = pd.read_csv("./cocola_scores.csv")

# Columns of the Cocola scores DataFrame
cocola_columns = cocola_scores_df.columns
print(cocola_columns)



Index(['vocals', 'instrumental', 'score'], dtype='object')


In [None]:
embedding_matrix_clap = np.load("./audio_embeddings_clap.npy", allow_pickle=True)
print(type(embedding_matrix_clap))
embedding_matrix_clap = embedding_matrix_clap.item()
print(type(embedding_matrix_clap))
file_names = list(embedding_matrix_clap.keys())


# Retrieve the embedding vectors (dictionary values)
embeddings_list_clap = list(embedding_matrix_clap.values())
embedding_matrix_clap = np.vstack(embeddings_list_clap)  # Combine the vectors into a matrix
print(f"Nouvelle forme de la matrice d'embeddings : {embedding_matrix_clap.shape}")

<class 'numpy.ndarray'>
<class 'dict'>
Nouvelle forme de la matrice d'embeddings : (42, 512)


In [None]:
embedding_matrix_mert = np.load("./audio_embeddings_mert.npy", allow_pickle=True)
print(type(embedding_matrix_mert))
embedding_matrix_mert = embedding_matrix_mert.item()
print(type(embedding_matrix_mert))
file_names = list(embedding_matrix_mert.keys())


# Retrieve the embedding vectors (dictionary values)
embeddings_list_mert = list(embedding_matrix_mert.values())
embedding_matrix_mert = np.vstack(embeddings_list_mert)  # Combine the vectors into a matrix
print(f"Nouvelle forme de la matrice d'embeddings : {embedding_matrix_mert.shape}")

<class 'numpy.ndarray'>
<class 'dict'>
Nouvelle forme de la matrice d'embeddings : (42, 768)


In [None]:
# Compute cosine similarity for the CLAP embeddings
cosine_sim_matrixc = cosine_similarity(embedding_matrix_clap)

# Print the shape of the cosine similarity matrix and a preview of its content
print(f"Shape of cosine similarity matrix of CLAP embeddings: {cosine_sim_matrixc.shape}")
print(cosine_sim_matrixc[:5, :5])  # Display the first 5 rows and columns



Forme de la matrice de similarité cosinus : (42, 42)
[[1.0000002  0.34097084 0.83422333 0.5616863  0.65733886]
 [0.34097084 1.0000002  0.16409242 0.5731591  0.15953967]
 [0.83422333 0.16409242 0.99999994 0.4414206  0.6725746 ]
 [0.5616863  0.5731591  0.4414206  1.0000004  0.38131076]
 [0.65733886 0.15953967 0.6725746  0.38131076 0.9999998 ]]


In [None]:
# Compute cosine similarity for the CLAP embeddings
cosine_sim_matrixm = cosine_similarity(embedding_matrix_mert)

# Print the shape of the cosine similarity matrix and a preview of its content
print(f"Shape of cosine similarity matrix of MERT embeddings: {cosine_sim_matrixm.shape}")
print(cosine_sim_matrixm[:5, :5])  # Display the first 5 rows and columns

Forme de la matrice de similarité cosinus : (42, 42)
[[ 0.99999994 -0.92627406  0.66491747 -0.76870966  0.03508142]
 [-0.92627406  1.         -0.7857152   0.83416104  0.14618175]
 [ 0.66491747 -0.7857152   1.0000001  -0.54862237 -0.5991662 ]
 [-0.76870966  0.83416104 -0.54862237  1.          0.02676624]
 [ 0.03508142  0.14618175 -0.5991662   0.02676624  0.9999999 ]]


In [None]:

# Extraction of the similarity values for instrumentals (last 21 rows and columns)
# CLAP separates vocals and instrumentals so we are searching for nearest instrumentals instead of nearest vocal/instrumental pairs
vocals_similarity = cosine_sim_matrixc[21:42, 21:42]  # 21x21


# Adding the similarity values to the DataFrame
similarity_values = []
for i in range(21):  # Corresponding to the instrumentals (21 last rows)
    for j in range(21):  # Corresponding to the instrumentals (21 last columns)
        # Append the similarity value to the list
        similarity_values.append(vocals_similarity[i, j])

# Adding the similarity values to the DataFrame
cocola_scores_df["CLAP"] = similarity_values




In [76]:
print(cocola_scores_df.head())

           vocals                                      instrumental  \
0  Bam Bam - Hi-Q                                    Bam Bam - Hi-Q   
1  Bam Bam - Hi-Q  Benji Cossa - New Flowers (Fast 4-track Version)   
2  Bam Bam - Hi-Q               Bessie Smith - My Sweetie Went Away   
3  Bam Bam - Hi-Q             David Rovics - We Just Want The World   
4  Bam Bam - Hi-Q                   deef - Ein sonniger Tag mit dir   

       score      CLAP  
0  45.754675  1.000000  
1  46.145325  0.255522  
2  44.335644  0.630813  
3  46.341496  0.289742  
4  43.764599  0.459481  


In [77]:
#null values
print(cocola_scores_df.isnull().sum())

vocals          0
instrumental    0
score           0
CLAP            0
dtype: int64


In [None]:
# Normalization and centering of score values
cocola_scores_df["normalized_score"] = (cocola_scores_df["score"] - cocola_scores_df["score"].mean()) / cocola_scores_df["score"].std()
cocola_scores_df["normalized_CLAP"] = (cocola_scores_df["CLAP"] - cocola_scores_df["CLAP"].mean()) / cocola_scores_df["CLAP"].std()

# Compute the Pearson correlation between the scores and similarity values
correlation = cocola_scores_df["normalized_score"].corr(cocola_scores_df["normalized_CLAP"])
print(f"Pearson correlation between the Cocola scores and CLAP similarity values: {correlation}")
# Compute the Spearman correlation between the scores and similarity values
correlation_spearman = cocola_scores_df["normalized_score"].corr(cocola_scores_df["normalized_CLAP"], method="spearman")
print(f"Spearman correlation between the Cocola scores and CLAP similarity values: {correlation_spearman}")
# Compute the Kendall correlation between the scores and similarity values
correlation_kendall = cocola_scores_df["normalized_score"].corr(cocola_scores_df["normalized_CLAP"], method="kendall")
print(f"Kendall correlation between the Cocola scores and CLAP similarity values: {correlation_kendall}")

Corrélation entre les scores et les valeurs de similarité : 0.05109174349230319
Corrélation de Spearman entre les scores et les valeurs de similarité : 0.07905827255516565
Corrélation de Kendall entre les scores et les valeurs de similarité : 0.05312077256961182


In [None]:
# Same process for MERT embeddings except that we are looking for the nearest vocal/instrumental pairs
# Extraction of the similarity values for vocals (first 21 rows) and instrumentals (last 21 columns)
vocals_similaritym = cosine_sim_matrixm[:21, 21:42]  # 21x21

mert_values = []
for i in range(21):  # Corresponding to the vocals (first 21 rows)
    for j in range(21):  # Corresponding to the instrumentals (last 21 columns)
        mert_values.append(vocals_similaritym[i, j])

# Adding the similarity values to the DataFrame
cocola_scores_df["MERT"] = mert_values
# Normalization and centering of cosine similarity values
cocola_scores_df["normalized_MERT"] = (cocola_scores_df["MERT"] - cocola_scores_df["MERT"].mean()) / cocola_scores_df["MERT"].std()

In [None]:
# Compute the Pearson correlation between the scores and similarity values
correlation = cocola_scores_df["normalized_score"].corr(cocola_scores_df["normalized_MERT"])
print(f"Pearson correlation between the Cocola scores and MERT similarity values: {correlation}")

# Compute the Spearman correlation between the scores and similarity values
spearman_correlation = cocola_scores_df["normalized_score"].corr(cocola_scores_df["normalized_MERT"], method="spearman")
print(f"Spearman correlation between the Cocola scores and MERT similarity values: {spearman_correlation}")

# Compute the Kendall correlation between the scores and similarity values
kendall_correlation = cocola_scores_df["normalized_score"].corr(cocola_scores_df["normalized_MERT"], method="kendall")
print(f"Kendall correlation between the Cocola scores and MERT similarity values : {kendall_correlation}")

Corrélation entre les scores et les valeurs de similarité : -0.018302057090385595
Corrélation de Spearman entre les scores et les valeurs de similarité : -0.016549836717903946
Corrélation de Kendall entre les scores et les valeurs de similarité : -0.010327767470624612


In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Definition of the features and target variable
X = cocola_scores_df["score"].values.reshape(-1, 1)  # Reshape to 2D array
y = cocola_scores_df["CLAP"].values

# Create and fit the linear regression model for CLAP
model = LinearRegression()
model.fit(X, y)

# Print results
print("CLAP Linear Regression Results:")
print(f"Regression coefficient: {model.coef_}")
print(f"Intercept: {model.intercept_}")
print(f"R² : {model.score(X, y)}")



Coefficient de régression : [0.0049813]
Ordonnée à l'origine : 0.09516064020165937
R² (coefficient de détermination) : 0.002610366414126264


In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Definition of the features and target variable
X = cocola_scores_df["normalized_score"].values.reshape(-1, 1)  # Reshape to 2D array

# Create and fit the linear regression model for MERT
model = LinearRegression()
model.fit(X, y)

# Print results
print("MERT Linear Regression Results:")
print(f"Regression coefficient: {model.coef_}")
print(f"Intercept: {model.intercept_}")
print(f"R² : {model.score(X, y)}")



Coefficient de régression : [-0.01047648]
Ordonnée à l'origine : -0.0036104229672720978
R² (coefficient de détermination) : 0.0003349653416386822


In [None]:
# Print the top 10 most similar pairs according to Cocola scores
top10 = cocola_scores_df.sort_values(by="score", ascending=False).head(10)
print(top10)

                                 vocals  \
45  Bessie Smith - My Sweetie Went Away   
56  Bessie Smith - My Sweetie Went Away   
51  Bessie Smith - My Sweetie Went Away   
43  Bessie Smith - My Sweetie Went Away   
61  Bessie Smith - My Sweetie Went Away   
52  Bessie Smith - My Sweetie Went Away   
50  Bessie Smith - My Sweetie Went Away   
53  Bessie Smith - My Sweetie Went Away   
42  Bessie Smith - My Sweetie Went Away   
54  Bessie Smith - My Sweetie Went Away   

                                        instrumental      score      CLAP  \
45             David Rovics - We Just Want The World  58.303272  0.133987   
56                    Short Hand - Certain Strangers  57.926910  0.470327   
51                         Los Steaks - Sunday Girls  57.922951  0.218267   
43  Benji Cossa - New Flowers (Fast 4-track Version)  57.745975  0.219688   
61                             Wann - Happy Birthday  57.739521  0.238412   
52                 Mia Doi Todd - I gave you my home  57.255163 

In [None]:
#Save the DataFrame with the new columns to a CSV file
cocola_scores_df.to_csv("mesures_mashups.csv", index=False)