In [3]:
import pandas as pd
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from rtree import index

ModuleNotFoundError: No module named 'librosa'

## Extracción de Características

In [None]:
def extract_features(file_path, max_length=1000):
    audio, sr = librosa.load(file_path, mono=True)
    # 1. Coeficientes MFCC
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
    mfcc_features = np.concatenate((mfccs.mean(axis=1), mfccs.std(axis=1)))

    # 2. Delta MFCC
    delta_mfccs = librosa.feature.delta(mfccs)
    delta_mfcc_features = np.concatenate((delta_mfccs.mean(axis=1), delta_mfccs.std(axis=1)))

    # 3. Delta Delta MFCC
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    delta2_mfcc_features = np.concatenate((delta2_mfccs.mean(axis=1), delta2_mfccs.std(axis=1)))

    # Representan la información espectral de la señal de audio.
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)

    # 4. Contraste espectral
    contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)

    # 5. Tonnetz
    tonnetz = librosa.feature.tonnetz(y=audio, sr=sr)

    # 6. Tempograma
    tempo, tempogram = librosa.beat.beat_track(y=audio, sr=sr)

    # Asegurar que todas las características tengan la misma longitud
    all_features = np.concatenate((
        mfcc_features,
        delta_mfcc_features,
        delta2_mfcc_features,
        chroma.mean(axis=1),
        contrast.mean(axis=1),
        tonnetz.mean(axis=1),
        tempogram
    ))

    # Rellenar con ceros si es necesario
    if len(all_features) < max_length:
        all_features = np.pad(all_features, (0, max_length - len(all_features)))
    else:
        # Recortar si es necesario
        all_features = all_features[:max_length]

    return all_features

## EXTRACCION DE CARACTERISTICAS DE TODAS LAS CANCIONES :

In [None]:
import os

def listar_archivos_carpeta(ruta_carpeta):
    archivos = [arch for arch in os.listdir(ruta_carpeta) if arch.endswith(('.mp3'))]
    return archivos

In [None]:
ruta_carpeta = "spotify/CANCIONES"
archivos_carpeta = listar_archivos_carpeta(ruta_carpeta)

# Almacena las características y las etiquetas (nombre de archivo) en listas
caracteristicas = []
etiquetas = []

for archivo in archivos_carpeta:
    ruta_archivo = os.path.join(ruta_carpeta, archivo)
    features = extract_features(ruta_archivo)
    caracteristicas.append(features)
    etiquetas.append(archivo)

In [None]:
etiquetas = np.array(etiquetas)
df = pd.DataFrame(data=caracteristicas)
df['etiqueta'] = etiquetas
df.to_csv('spotify/caracteristicas_cancionesjp.csv', index=False)

NameError: name 'etiquetas' is not defined

## CREACION DE BUSQUEDAS 

In [7]:
data = pd.read_csv('../music/song_features.csv')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,etiqueta
0,-33.522110,46.967037,7.297648,28.791386,4.638599,14.015849,-4.014965,7.175768,0.569459,9.676569,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Exodus.mp3
1,-148.184113,77.414627,22.313719,29.190990,13.712492,13.180655,-4.847233,5.510168,-3.705767,7.560897,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - King Tulip.mp3
2,-109.715393,69.597122,31.331320,50.046558,13.795660,15.090928,3.032498,8.497753,-10.353943,3.490418,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Lte.mp3
3,-104.375908,81.880394,8.105327,1.289422,-12.881152,-1.505768,-2.986757,7.328893,-5.891006,3.347755,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - The Sacred.mp3
4,-70.708229,92.657921,3.042413,18.210211,8.917986,11.823200,1.670250,2.491837,0.363811,6.691009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Til Tuesday - Voices Carry.mp3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5209,-17.343597,81.888573,-10.056756,37.247326,7.296055,3.846286,-1.420673,4.889192,1.470839,3.792245,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sharp Dressed Man - 2008 Remaster.mp3
5210,-97.704109,61.729267,-9.165577,24.729053,7.406517,5.858263,-1.380089,3.252451,-1.981778,8.454137,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sleeping Bag.mp3
5211,-31.040625,99.809891,-3.825508,38.275597,-1.322180,3.809813,-1.666210,6.598217,3.866884,10.444964,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Tush - 2006 Remaster.mp3
5212,-105.022682,99.173553,-1.972429,21.914129,7.793817,14.785956,1.718355,8.648097,-1.364933,4.222836,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[dunkelbunt], Boban i Marko Marcovic Orkestar ..."


In [31]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,etiqueta
0,-33.522110,46.967037,7.297648,28.791386,4.638599,14.015849,-4.014965,7.175768,0.569459,9.676569,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Exodus.mp3
1,-148.184113,77.414627,22.313719,29.190990,13.712492,13.180655,-4.847233,5.510168,-3.705767,7.560897,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - King Tulip.mp3
2,-109.715393,69.597122,31.331320,50.046558,13.795660,15.090928,3.032498,8.497753,-10.353943,3.490418,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Lte.mp3
3,-104.375908,81.880394,8.105327,1.289422,-12.881152,-1.505768,-2.986757,7.328893,-5.891006,3.347755,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - The Sacred.mp3
4,-70.708229,92.657921,3.042413,18.210211,8.917986,11.823200,1.670250,2.491837,0.363811,6.691009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Til Tuesday - Voices Carry.mp3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5209,-17.343597,81.888573,-10.056756,37.247326,7.296055,3.846286,-1.420673,4.889192,1.470839,3.792245,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sharp Dressed Man - 2008 Remaster.mp3
5210,-97.704109,61.729267,-9.165577,24.729053,7.406517,5.858263,-1.380089,3.252451,-1.981778,8.454137,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sleeping Bag.mp3
5211,-31.040625,99.809891,-3.825508,38.275597,-1.322180,3.809813,-1.666210,6.598217,3.866884,10.444964,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Tush - 2006 Remaster.mp3
5212,-105.022682,99.173553,-1.972429,21.914129,7.793817,14.785956,1.718355,8.648097,-1.364933,4.222836,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[dunkelbunt], Boban i Marko Marcovic Orkestar ..."


### KNN SECUENCIAL

In [15]:
%pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/0c/2a/d3ff6091406bc2207e0adb832ebd15e40ac685811c7e2e3b432bfd969b71/scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Using cached scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy>=1.5.0 (from scikit-learn)
  Obtaining dependency information for scipy>=1.5.0 from https://files.pythonhosted.org/packages/6b/d4/d62ce38ba00dc67d7ec4ec5cc19d36958d8ed70e63778715ad626bcbc796/scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m547.2 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
Collect

In [85]:
#data[data['etiqueta'] == 'Aventura - El Malo.mp3']

#data['etiqueta']

data.iloc[364]



#print(data)

#data.iloc(1001)


0                      -149.063171
1                        80.901222
2                        27.121805
3                        27.354921
4                        16.408049
                     ...          
996                            0.0
997                            0.0
998                            0.0
999                            0.0
etiqueta    Aventura - El Malo.mp3
Name: 364, Length: 1001, dtype: object

In [52]:
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from queue import PriorityQueue
X = data.iloc[:, :-1].values
print(data.iloc[:, :-1])
scaler = StandardScaler()
X = scaler.fit_transform(X)


               0          1          2          3          4          5  \
0     -33.522110  46.967037   7.297648  28.791386   4.638599  14.015849   
1    -148.184113  77.414627  22.313719  29.190990  13.712492  13.180655   
2    -109.715393  69.597122  31.331320  50.046558  13.795660  15.090928   
3    -104.375908  81.880394   8.105327   1.289422 -12.881152  -1.505768   
4     -70.708229  92.657921   3.042413  18.210211   8.917986  11.823200   
...          ...        ...        ...        ...        ...        ...   
5209  -17.343597  81.888573 -10.056756  37.247326   7.296055   3.846286   
5210  -97.704109  61.729267  -9.165577  24.729053   7.406517   5.858263   
5211  -31.040625  99.809891  -3.825508  38.275597  -1.322180   3.809813   
5212 -105.022682  99.173553  -1.972429  21.914129   7.793817  14.785956   
5213  -61.813671  93.429657  12.125089  25.161915   6.463640   2.758632   

             6          7          8          9  ...  990  991  992  993  994  \
0    -4.014965   7

In [83]:
def knn_search_priority_queue(query, k):
    print("-----------------assa")
    #print(query)
    print(query.reshape(1,-1))
    print(X)
    similarities = cosine_similarity(query.reshape(1, -1), X).flatten()
    # cola de prioridad
    priority_queue = PriorityQueue()
    for i, sim in enumerate(similarities):
        priority_queue.put((-sim, data.iloc[i]['etiqueta']))  # por ser una cola para sacar los mejores(mas abajo ) desde abajo los metemos
    neighbors = []
    for _ in range(k):
        sim, neighbor = priority_queue.get()
        neighbors.append((neighbor, -sim))
    return neighbors

In [86]:
query_example = X[364]  # Puedes cambiar esto con el objeto de consulta que desees
radius_result = knn_search_priority_queue(query_example, 20)

print("\nResultados de búsqueda por rango:")
for label,dist in radius_result:
    print(f"Distancia: {dist}, Etiqueta: {label}")

-----------------assa
[[-1.16415568e+00 -1.90670479e-01  1.66125887e+00  6.42038864e-01
   1.72911408e+00  7.10214578e-01 -8.54350687e-01  9.08917846e-01
  -5.78089500e-02 -2.20305834e-02  1.44675753e-02 -7.04489351e-01
  -3.01876245e-01 -1.75019785e-01 -1.79138737e+00 -1.07876143e+00
  -1.21364756e+00 -8.43461051e-01  1.68787767e-01  1.76023762e+00
   1.20724673e+00  1.98115505e-01  1.39313363e+00  5.86786055e-01
   2.69156198e-01 -1.49892984e-02  3.45306690e-01 -2.57100119e-01
   1.58832198e-01 -1.83280089e-01  6.49978367e-02  4.64953242e-01
  -1.54403917e-01  1.73364199e-01  5.29366076e-01  6.67112259e-01
   2.24552490e-01  1.22988469e+00  2.33775638e+00  2.67055236e+00
   5.00458199e-01  6.63853542e-01  1.18192705e+00  3.75386700e-01
   7.11407547e-01 -3.55394450e-01  1.12551969e-01 -2.57088093e-01
  -6.29225351e-02 -1.58720615e-01 -4.49690292e-01 -3.32124250e-01
  -5.80437856e-01 -1.69490324e-01 -1.95123445e-01  1.04181354e-01
  -5.89402517e-03  7.53006089e-02 -4.84583089e-02  8.8

### KNN POR RANGO 

In [87]:
def range_search(query_object, radius):
    query_object_2d = query_object.reshape(1, -1)
    within_radius = [(euclidean_distances(query_object_2d, X[i].reshape(1, -1))[0, 0], data.iloc[i]['etiqueta']) for i in range(len(X)) if euclidean_distances(query_object_2d, X[i].reshape(1, -1))[0, 0] <= radius]
    return within_radius

In [90]:
query_example = X[364]
radius_result = range_search(query_example, radius = 15)
# Ordenar los resultados por distancia de menor a mayor
sorted_results = sorted(radius_result, key=lambda x: x[0])

print("\nResultados de búsqueda por rango ordenados por distancia:")
for  distance ,label in sorted_results:
    print(f"Distancia euclidiana: {distance}, Etiqueta: {label}")


Resultados de búsqueda por rango ordenados por distancia:
Distancia euclidiana: 0.0, Etiqueta: Aventura - El Malo.mp3
Distancia euclidiana: 13.094945981449515, Etiqueta: Giveon - Like I Want You.mp3
Distancia euclidiana: 13.777988899505056, Etiqueta: Skott - Porcelain.mp3
Distancia euclidiana: 13.94803443055368, Etiqueta: Jason Derulo - Swalla (feat. Nicki Minaj and Ty Dolla $ign) - After Dark Remix.mp3


### KNN RTREE DIMAEL

In [114]:
!conda activate env


9682.36s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
zsh:1: command not found: conda


In [115]:
!conda install -c conda-forge faiss-cpu

9693.37s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
zsh:1: command not found: conda


### FAISS

In [120]:
import  faiss

X = data.iloc[:, :-1].values.astype('float32')  # Asegúrate de que los datos sean de tipo float32
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Inicializa el índice de FAISS
dimension = X.shape[1]  # Dimensión de las características
nlist = 10
# Agrega los vectores al índice
quantizer = faiss.IndexFlatL2(dimension)
index = faiss.IndexIVFFlat(quantizer, dimension, nlist, faiss.METRIC_L2)

# Entrenamiento del índice
index.train(X)
index.add(X)



ModuleNotFoundError: No module named 'faiss'

In [109]:
def knn_faiss(query_object, k):
    query_object = query_object.astype('float32')
    distances, indices = index.search(np.expand_dims(query_object, axis=0), k)
    return [(distances[0][i], data.iloc[indices[0][i]]['etiqueta']) for i in range(k)]

In [111]:
query_example = X[100]  # Puedes cambiar esto con el objeto de consulta que desees
k_result = knn_faiss(query_example, 5)
print("Resultados de búsqueda KNN con FAISS:")
for dist, label in k_result:
    print(f"Distancia: {dist}, Etiqueta: {label}")

NameError: name 'index' is not defined