In [1]:
import pandas as pd
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from rtree import index

## Extracción de Características

In [2]:
def extract_features(file_path, max_length=1000):
    audio, sr = librosa.load(file_path, mono=True)
    # 1. Coeficientes MFCC
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
    mfcc_features = np.concatenate((mfccs.mean(axis=1), mfccs.std(axis=1)))

    # 2. Delta MFCC
    delta_mfccs = librosa.feature.delta(mfccs)
    delta_mfcc_features = np.concatenate((delta_mfccs.mean(axis=1), delta_mfccs.std(axis=1)))

    # 3. Delta Delta MFCC
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    delta2_mfcc_features = np.concatenate((delta2_mfccs.mean(axis=1), delta2_mfccs.std(axis=1)))

    # Representan la información espectral de la señal de audio.
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)

    # 4. Contraste espectral
    contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)

    # 5. Tonnetz
    tonnetz = librosa.feature.tonnetz(y=audio, sr=sr)

    # 6. Tempograma
    tempo, tempogram = librosa.beat.beat_track(y=audio, sr=sr)

    # Asegurar que todas las características tengan la misma longitud
    all_features = np.concatenate((
        mfcc_features,
        delta_mfcc_features,
        delta2_mfcc_features,
        chroma.mean(axis=1),
        contrast.mean(axis=1),
        tonnetz.mean(axis=1),
        tempogram
    ))

    # Rellenar con ceros si es necesario
    if len(all_features) < max_length:
        all_features = np.pad(all_features, (0, max_length - len(all_features)))
    else:
        # Recortar si es necesario
        all_features = all_features[:max_length]

    return all_features

## EXTRACCION DE CARACTERISTICAS DE TODAS LAS CANCIONES :

In [3]:
import os

def listar_archivos_carpeta(ruta_carpeta):
    archivos = [arch for arch in os.listdir(ruta_carpeta) if arch.endswith(('.mp3'))]
    return archivos

In [None]:
ruta_carpeta = "spotify/CANCIONES"
archivos_carpeta = listar_archivos_carpeta(ruta_carpeta)

# Almacena las características y las etiquetas (nombre de archivo) en listas
caracteristicas = []
etiquetas = []

for archivo in archivos_carpeta:
    ruta_archivo = os.path.join(ruta_carpeta, archivo)
    features = extract_features(ruta_archivo)
    caracteristicas.append(features)
    etiquetas.append(archivo)

In [5]:
etiquetas = np.array(etiquetas)
df = pd.DataFrame(data=caracteristicas)
df['etiqueta'] = etiquetas
df.to_csv('spotify/caracteristicas_cancionesjp.csv', index=False)

NameError: name 'etiquetas' is not defined

## CREACION DE BUSQUEDAS 

In [2]:
data = pd.read_csv('caracteristicas_cancionesjp.csv')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,etiqueta
0,-33.522110,46.967037,7.297648,28.791386,4.638599,14.015849,-4.014965,7.175768,0.569459,9.676569,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Exodus.mp3
1,-148.184113,77.414627,22.313719,29.190990,13.712492,13.180655,-4.847233,5.510168,-3.705767,7.560897,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - King Tulip.mp3
2,-109.715393,69.597122,31.331320,50.046558,13.795660,15.090928,3.032498,8.497753,-10.353943,3.490418,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Lte.mp3
3,-104.375908,81.880394,8.105327,1.289422,-12.881152,-1.505768,-2.986757,7.328893,-5.891006,3.347755,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - The Sacred.mp3
4,-70.708229,92.657921,3.042413,18.210211,8.917986,11.823200,1.670250,2.491837,0.363811,6.691009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Til Tuesday - Voices Carry.mp3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5209,-17.343597,81.888573,-10.056756,37.247326,7.296055,3.846286,-1.420673,4.889192,1.470839,3.792245,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sharp Dressed Man - 2008 Remaster.mp3
5210,-97.704109,61.729267,-9.165577,24.729053,7.406517,5.858263,-1.380089,3.252451,-1.981778,8.454137,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sleeping Bag.mp3
5211,-31.040625,99.809891,-3.825508,38.275597,-1.322180,3.809813,-1.666210,6.598217,3.866884,10.444964,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Tush - 2006 Remaster.mp3
5212,-105.022682,99.173553,-1.972429,21.914129,7.793817,14.785956,1.718355,8.648097,-1.364933,4.222836,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[dunkelbunt], Boban i Marko Marcovic Orkestar ..."


In [3]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,etiqueta
0,-33.522110,46.967037,7.297648,28.791386,4.638599,14.015849,-4.014965,7.175768,0.569459,9.676569,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Exodus.mp3
1,-148.184113,77.414627,22.313719,29.190990,13.712492,13.180655,-4.847233,5.510168,-3.705767,7.560897,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - King Tulip.mp3
2,-109.715393,69.597122,31.331320,50.046558,13.795660,15.090928,3.032498,8.497753,-10.353943,3.490418,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - Lte.mp3
3,-104.375908,81.880394,8.105327,1.289422,-12.881152,-1.505768,-2.986757,7.328893,-5.891006,3.347755,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,$uicideboy$ - The Sacred.mp3
4,-70.708229,92.657921,3.042413,18.210211,8.917986,11.823200,1.670250,2.491837,0.363811,6.691009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,'Til Tuesday - Voices Carry.mp3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5209,-17.343597,81.888573,-10.056756,37.247326,7.296055,3.846286,-1.420673,4.889192,1.470839,3.792245,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sharp Dressed Man - 2008 Remaster.mp3
5210,-97.704109,61.729267,-9.165577,24.729053,7.406517,5.858263,-1.380089,3.252451,-1.981778,8.454137,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Sleeping Bag.mp3
5211,-31.040625,99.809891,-3.825508,38.275597,-1.322180,3.809813,-1.666210,6.598217,3.866884,10.444964,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ZZ Top - Tush - 2006 Remaster.mp3
5212,-105.022682,99.173553,-1.972429,21.914129,7.793817,14.785956,1.718355,8.648097,-1.364933,4.222836,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[dunkelbunt], Boban i Marko Marcovic Orkestar ..."


### KNN SECUENCIAL

In [4]:
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from queue import PriorityQueue

X = data.iloc[:, :-1].values
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
def knn_search_priority_queue(query, k):
    similarities = cosine_similarity(query.reshape(1, -1), X).flatten()
    # cola de prioridad
    priority_queue = PriorityQueue()
    for i, sim in enumerate(similarities):
        priority_queue.put((-sim, data.iloc[i]['etiqueta']))  # por ser una cola para sacar los mejores(mas abajo ) desde abajo los metemos
    neighbors = []
    for _ in range(k):
        sim, neighbor = priority_queue.get()
        neighbors.append((neighbor, -sim))
    return neighbors

In [7]:
query_example = X[500]  # Puedes cambiar esto con el objeto de consulta que desees
radius_result = knn_search_priority_queue(query_example, 5)

print("\nResultados de búsqueda por rango:")
for label,dist in radius_result:
    print(f"Distancia: {dist}, Etiqueta: {label}")


Resultados de búsqueda por rango:
Distancia: 1.0, Etiqueta: BIA - BIA BIA (feat. Lil Jon).mp3
Distancia: 0.8697328884724876, Etiqueta: pewdiepie, Party in Backyard - Bitch Lasagna.mp3
Distancia: 0.869674897226493, Etiqueta: YBN Nahmir - Baby 8.mp3
Distancia: 0.8394644513147063, Etiqueta: Ghostemane, Pouya - Stick Out.mp3
Distancia: 0.834323346161588, Etiqueta: Ghostemane - As Above so Look out Below.mp3


### KNN POR RANGO 

In [8]:
def range_search(query_object, radius):
    query_object_2d = query_object.reshape(1, -1)
    within_radius = [(euclidean_distances(query_object_2d, X[i].reshape(1, -1))[0, 0], data.iloc[i]['etiqueta']) for i in range(len(X)) if euclidean_distances(query_object_2d, X[i].reshape(1, -1))[0, 0] <= radius]
    return within_radius

In [16]:
query_example = X[500]
radius_result = range_search(query_example, radius = 15)
# Ordenar los resultados por distancia de menor a mayor
sorted_results = sorted(radius_result, key=lambda x: x[0])

print("\nResultados de búsqueda por rango ordenados por distancia:")
for  distance ,label in sorted_results:
    print(f"Distancia euclidiana: {distance}, Etiqueta: {label}")


Resultados de búsqueda por rango ordenados por distancia:
Distancia euclidiana: 0.0, Etiqueta: BIA - BIA BIA (feat. Lil Jon).mp3
Distancia euclidiana: 13.635643005371094, Etiqueta: pewdiepie, Party in Backyard - Bitch Lasagna.mp3


### KNN RTREE DIMAEL

### FAISS

In [12]:
import  faiss

X = data.iloc[:, :-1].values.astype('float32')  # Asegúrate de que los datos sean de tipo float32
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Inicializa el índice de FAISS
dimension = X.shape[1]  # Dimensión de las características
nlist = 10
# Agrega los vectores al índice
quantizer = faiss.IndexFlatL2(dimension)
index = faiss.IndexIVFFlat(quantizer, dimension, nlist, faiss.METRIC_L2)

# Entrenamiento del índice
index.train(X)
index.add(X)

In [13]:
def knn_faiss(query_object, k):
    query_object = query_object.astype('float32')
    distances, indices = index.search(np.expand_dims(query_object, axis=0), k)
    return [(distances[0][i], data.iloc[indices[0][i]]['etiqueta']) for i in range(k)]

In [15]:
query_example = X[100]  # Puedes cambiar esto con el objeto de consulta que desees
k_result = knn_faiss(query_example, 5)
print("Resultados de búsqueda KNN con FAISS:")
for dist, label in k_result:
    print(f"Distancia: {dist}, Etiqueta: {label}")

Resultados de búsqueda KNN con FAISS:
Distancia: 0.0, Etiqueta: Accept - Life's a Bitch.mp3
Distancia: 83.60263061523438, Etiqueta: Judas Priest - Living After Midnight.mp3
Distancia: 92.01953125, Etiqueta: Foreigner - Double Vision.mp3
Distancia: 93.28280639648438, Etiqueta: Depeche Mode - Personal Jesus - Acoustic.mp3
Distancia: 98.23233032226562, Etiqueta: HIM - Wicked Game.mp3
