# Audio Recommender

## Simplified Recommender

In [1]:
import json
import os
import numpy as np
from sklearn.neighbors import NearestNeighbors

# Load JSON metadata
with open("../data/examples.json", "r") as f:
    data = json.load(f)

# Prepare file paths and tag vectors
file_paths = []
tag_vectors = []

for note_id, item in data.items():
    # Construct the full relative audio path (assuming all files in ./audio/)
    audio_path = os.path.join("audio", note_id + ".wav")
    file_paths.append(audio_path)

    # Vector = [10-dimensional qualities] + [source] + [velocity]
    vector = item["qualities"] + [item["instrument_source"], item["velocity"]]
    tag_vectors.append(vector)

X = np.array(tag_vectors)

# Fit nearest neighbor model
knn = NearestNeighbors(n_neighbors=3, metric="euclidean")
knn.fit(X)

# Define recommendation function
def recommend_audio(quality_vector, source, velocity, top_k=3):
    # Build query vector
    query = quality_vector + [source, velocity]
    query_vec = np.array([query])

    # Get nearest neighbors
    distances, indices = knn.kneighbors(query_vec, n_neighbors=top_k)

    # Return file paths
    return [file_paths[i] for i in indices[0]]

# Example: Recommend audio with a dark quality, synthetic source, and medium velocity
quality_query = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]  # e.g. "dark"
recommended = recommend_audio(quality_query, source=2, velocity=50)

# Print results
print("Recommended audio:")
for path in recommended:
    print(path)


Recommended audio:
audio\bass_synthetic_033-044-050.wav
audio\bass_synthetic_098-036-050.wav
audio\bass_synthetic_033-035-050.wav


## Advanced recommender

### Step 1: Preparation

In [None]:
import json
import os
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import ipywidgets as widgets

with open("../data/examples.json", "r") as f:
    data = json.load(f)

### Step 2: Define mappings and labels 

In [2]:
quality_labels = [
    "bright", "dark", "distortion", "fast_decay", "long_release",
    "multiphonic", "nonlinear_env", "percussive", "reverb", "tempo_synced"
]
source_map = {"acoustic": 0, "electronic": 1, "synthetic": 2}
velocity_map = {"low": 30, "medium": 75, "high": 120}

### Step 3: Prepare vectors


In [3]:
file_paths, tag_vectors, note_ids = [], [], []

for note_id, item in data.items():
    file_paths.append(os.path.join("audio", note_id + ".wav"))
    note_ids.append(note_id)
    vector = item["qualities"] + [item["instrument_source"], item["velocity"]]
    tag_vectors.append(vector)

X = np.array(tag_vectors)
knn = NearestNeighbors(n_neighbors=3, metric="euclidean")
knn.fit(X)

### Step 4: Utility functions


In [4]:
def encode_query(qualities_str, source_str, velocity_str):
    quality_vector = [1 if q in qualities_str else 0 for q in quality_labels]
    source = source_map[source_str]
    velocity = velocity_map[velocity_str]
    return quality_vector + [source, velocity]

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8)

### Step 5: Widgets


In [5]:
quality_selector = widgets.SelectMultiple(
    options=quality_labels,
    value=["dark"],
    description="Qualities:",
    layout=widgets.Layout(width="300px", height="120px")
)

source_dropdown = widgets.Dropdown(
    options=list(source_map.keys()),
    value="synthetic",
    description="Source:"
)

velocity_dropdown = widgets.Dropdown(
    options=list(velocity_map.keys()),
    value="medium",
    description="Velocity:"
)

output_area = widgets.Output()

### Step 6: Main callback function


In [6]:
def on_recommend_click(_):
    with output_area:
        output_area.clear_output()
        # --- 1. Query vector ---
        query_vec = encode_query(
            quality_selector.value,
            source_dropdown.value,
            velocity_dropdown.value
        )
        distances, indices = knn.kneighbors([query_vec], n_neighbors=3)

        print("🎧 Recommended Audio Files:")
        similarities = []
        for i in indices[0]:
            sim = cosine_similarity(query_vec, X[i])
            similarities.append(sim)
            path = file_paths[i]
            print(f"{note_ids[i]} | similarity: {sim:.3f}")
            if os.path.exists(path):
                display(Audio(path))

        # --- 2. Cosine similarity table ---
        table = pd.DataFrame({
            "Note ID": [note_ids[i] for i in indices[0]],
            "Cosine Similarity": similarities
        })
        display(table)

        # --- 3. PCA visualization ---
        X_2d = PCA(n_components=2).fit_transform(X)
        plt.figure(figsize=(10, 7))
        plt.scatter(X_2d[:, 0], X_2d[:, 1], alpha=0.3, label="All Samples")
        plt.scatter(X_2d[indices[0], 0], X_2d[indices[0], 1], color='red', label="Recommended")
        plt.title("PCA: NSynth Tag Space with Recommendations Highlighted")
        plt.xlabel("PC1")
        plt.ylabel("PC2")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

### Step 7: Display UI


In [None]:
recommend_button = widgets.Button(description="🎧 Recommend")
recommend_button.on_click(on_recommend_click)

ui = widgets.VBox([
    quality_selector,
    source_dropdown,
    velocity_dropdown,
    recommend_button,
    output_area
])
display(ui)

VBox(children=(SelectMultiple(description='Qualities:', index=(1,), layout=Layout(height='120px', width='300px…

## Analysis of the Music Recommendation Results

The recommender system demonstrated strong performance in returning relevant audio samples based on the selected tags. In the example shown, the user requested sounds with the qualities “dark,” a “electronic” source, and “high” velocity. The system instantly provided three top recommendations—organ_electronic_113-054-127, organ_electronic_001-060-127, and organ_electronic_057-056-127—all with a cosine similarity score of 1.0. This perfect similarity suggests that these samples match the input tag vector exactly, confirming the system’s ability to find highly relevant matches in the dataset.

The accompanying PCA visualization provides additional insight into the recommendation process. Here, all dataset samples are plotted in a reduced two-dimensional tag space, with the recommended items highlighted in red. The red dots sit squarely within the overall data distribution, illustrating that the recommended clips are not outliers, but rather representative examples from the relevant region of the feature space. This means the recommender is not only precise in matching user-specified tags, but also consistent in navigating the entire dataset.

Overall, these results show that the tag-based KNN approach is highly effective for interactive music search. The system responds instantly, provides intuitive control via tags, and its recommendations are both accurate and explainable—making it a practical tool for musicians or sound designers looking for just the right sound. The visualization further builds trust, showing that the system’s choices are well-supported by the underlying data distribution.