---

# DTW inference for chroma

>This notebook contains code for performing DTW for chroma features

---

In [1]:
import os
import numpy as np
import pandas as pd
import librosa

In [2]:
queries_path = 'chroma/queries'
raw_path = 'chroma/raw'
spleeter_path = 'chroma/spleeter'
openunmix_path = 'chroma/openunmix'

results_raw_path = 'results/chroma/raw'
results_spleeter_path = 'results/chroma/spleeter'
results_openunmix_path = 'results/chroma/openunmix'

## Query database

In [3]:
queries = pd.read_csv('metadata/Queries.csv')
queries.head()

Unnamed: 0,Filename,Query ID,Song ID,Title,Original artist,Class label
0,q1.wav,q1,1118,Mother natures son,The Beatles,mothernat
1,q2.wav,q2,1438,Scarborough fair,Unknown,scarborou
2,q3.wav,q3,1546,Stairway to heaven,Led Zeppelin,stairway2h
3,q4.wav,q4,1579,Strawberry fields forever,The Beatles,strawberry
4,q5.wav,q5,1808,When I am sixty four,The Beatles,whenim64


## Filter queries based on existing song IDs in the collection

In [4]:
collection_ids = [int(f[:-4]) for f in os.listdir(raw_path)]
queries = queries[queries["Song ID"].isin(collection_ids)][["Query ID", "Song ID"]]
queries.head()

Unnamed: 0,Query ID,Song ID
0,q1,1118
1,q2,1438
4,q5,1808
7,q8,593
8,q9,1224


## Querying

**DTW sliding window function**

In [5]:
eps = np.finfo(float).eps

def dtw_search(query, song):
    window_length = int(query.shape[1] * 1.2)
    hop_length = window_length // 4
    
    cost = np.inf

    # Avoid divide by zero error
    query[query == 0] = eps
    song[song == 0] = eps

    for transpose in range(12):
        # transpose query chroma 12 times to avoid octave errors
        query_transposed = np.roll(query, transpose, axis=0)

        for window_start in range(0, song.shape[1] - window_length, hop_length):
            # Use a sliding window approach
            window = song[:, window_start: window_start + window_length]    
    
            # Find the minimum cost
            distance = librosa.sequence.dtw(query, window, metric='cosine', subseq=True, global_constraints=True, band_rad=0.1, backtrack=False) 
            cost = min(distance[-1, -1], cost)
        
    return cost

### Raw audio

In [9]:
# Create directory to save results
os.makedirs(results_raw_path, exist_ok=True)

for i, query in queries.iterrows():
    query_name = query['Query ID'] + '.npy'
    query_path = os.path.join(queries_path, query_name)

    # Load query chroma
    query_chroma = np.load(query_path)
    
    song_ids = list()
    costs = list()
    
    for chroma_name in os.listdir(raw_path):
        # Load song chroma
        chroma_path = os.path.join(raw_path, chroma_name)
        song_chroma = np.load(chroma_path)

        # Perform dtw on query and current song
        cost = dtw_search(query_chroma, song_chroma)

        song_ids.append(int(chroma_name[:-4]))
        costs.append(cost)

    # Save results
    result_df = pd.DataFrame({'Song ID': song_ids, 'Cost': costs})
    result_df = result_df.sort_values(by='Cost', ascending=True)
    result_df.to_csv(os.path.join(results_raw_path, query['Query ID'] + '.csv'), index=False)

### Spleeter

In [14]:
# Create directory to save results
os.makedirs(results_spleeter_path, exist_ok=True)

for i, query in queries.iterrows():
    query_name = query['Query ID'] + '.npy'
    query_path = os.path.join(queries_path, query_name)

    # Load query chroma
    query_chroma = np.load(query_path)

    song_ids = list()
    costs = list()
    
    for chroma_name in os.listdir(spleeter_path):
        # Load song chroma
        chroma_path = os.path.join(spleeter_path, chroma_name)
        song_chroma = np.load(chroma_path)

        # Perform dtw on query and current song
        cost = dtw_search(query_chroma, song_chroma)

        song_ids.append(int(chroma_name[:-4]))
        costs.append(cost)

    result_df = pd.DataFrame({'Song ID': song_ids, 'Cost': costs})
    result_df = result_df.sort_values(by='Cost', ascending=True)
    result_df.to_csv(os.path.join(results_spleeter_path, query['Query ID'] + '.csv'), index=False)

### Open unmix

In [None]:
# Create directory to save results
os.makedirs(results_openunmix_path, exist_ok=True)

for i, query in queries.iterrows():
    query_name = query['Query ID'] + '.npy'
    query_path = os.path.join(queries_path, query_name)

    # Load query chroma
    query_chroma = np.load(query_path)

    song_ids = list()
    costs = list()
    
    for chroma_name in os.listdir(openunmix_path):
        # Load song chroma
        chroma_path = os.path.join(openunmix_path, chroma_name)
        song_chroma = np.load(chroma_path)

        # Perform dtw on query and current song
        cost = dtw_search(query_chroma, song_chroma)

        song_ids.append(int(chroma_name[:-4]))
        costs.append(cost)

    result_df = pd.DataFrame({'Song ID': song_ids, 'Cost': costs})
    result_df = result_df.sort_values(by='Cost', ascending=True)
    result_df.to_csv(os.path.join(results_openunmix_path, query['Query ID'] + '.csv'), index=False)