In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os.path
from pathlib import Path
import pickle
import multiprocessing
from tqdm import tqdm

In [2]:
import import_ipynb

In [3]:
import DTW

In [4]:
import NWTW

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [5]:
import FlexDTW

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [6]:
import Parflex
import Sparse_Parflex

In [7]:
DATASET = 'train' # 'test'
VERSION = 'full'

In [8]:
QUERY_LIST = Path(f'cfg_files/queries.{DATASET}.{VERSION}')

In [9]:
# SYSTEMS = ['dtw1', 'dtw2', 'dtw3', 'subseqdtw1', 'subseqdtw2', 'subseqdtw3', 'nwtw', 'flexdtw', 'parflex']
SYSTEMS = ['parflex','sparse_parflex']

BENCHMARKS = ['matching', 'subseq_20', 'subseq_30', 'subseq_40', 'partialStart', 'partialEnd', 'partialOverlap', 
              'pre_5', 'pre_10', 'pre_20', 'post_5', 'post_10', 'post_20', 'prepost_5', 'prepost_10',
              'prepost_20']

In [10]:
features_root = Path('/home/ijain/ttmp/Chopin_Mazurkas_features')
FEAT_DIRS = {}

for benchmark in BENCHMARKS:
    if benchmark == 'partialOverlap':
        FEAT_DIRS[benchmark] = ([features_root/'partialStart', features_root/'partialEnd'])
    elif 'prepost' in benchmark:
        sec = benchmark.split('_')[-1]
        FEAT_DIRS[benchmark] = ([features_root/f'pre_{sec}', features_root/f'post_{sec}'])
    else:
        FEAT_DIRS[benchmark] = [features_root/f'{benchmark}', features_root/'original']

In [11]:
steps = {'dtw1': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'dtw2': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'dtw3': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'subseqdtw1': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'subseqdtw2': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'subseqdtw3': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'nwtw': 0, # transitions are specified in NWTW algorithm
        'flexdtw': np.array([1,1,1,2,2,1]).reshape((-1,2)), 
        'parflex': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'sparse_parflex': np.array([1,1,1,2,2,1]).reshape((-1,2))
        }
weights = {'dtw1': np.array([2,3,3]),
          'dtw2': np.array([1,1,1]),
          'dtw3': np.array([1,2,2]),
          'subseqdtw1': np.array([1,1,2]),
          'subseqdtw2': np.array([2,3,3]),
          'subseqdtw3': np.array([1,2,2]),
          'nwtw': 0, # weights are specified in NWTW algorithm
          'flexdtw': np.array([1.25,3,3]),
          'parflex': np.array([1.25,3.0,3.0]),
          'sparse_parflex': np.array([1.25,3.0,3.0])
          }
other_params = {
                'flexdtw': {'beta': 0.1}, 
                'parflex': {'beta': 0.1},
                'sparse_parflex': {'beta': 0.1}
               }

# Benchmarks

In [12]:
def get_outfile(outdir, benchmark, system, queryid):
    outpath = (outdir / benchmark / system)
    outpath.mkdir(parents=True, exist_ok=True)
    outfile = (outpath / queryid).with_suffix('.pkl')
    return outfile

In [13]:
# Parflex implementation lives in Parflex.ipynb; use import Parflex.


In [14]:
def align_system(system, F1, F2, outfile):
    subseq = 'subseq' in system
    if system == 'parflex':
        C = 1 - FlexDTW.L2norm(F1).T @ FlexDTW.L2norm(F2)
        best_cost, wp = Parflex.parflex(C, steps=steps[system], weights=weights[system], beta=other_params[system]['beta'], L=other_params[system].get('L'))

    elif system == 'sparse_parflex':
        C = 1 - FlexDTW.L2norm(F1).T @ FlexDTW.L2norm(F2)
        best_cost, wp = Sparse_Parflex.parflex(C, steps=steps[system], weights=weights[system], beta=other_params[system]['beta'], L=other_params[system].get('L'))
    
    elif system == 'flexdtw':
        L1 = F1.shape[1]
        L2 = F2.shape[1]
        buffer = min(L1, L2) * (1 - (1 - other_params[system]['beta']) * min(L1,L2) / max(L1, L2))
        C = 1 - FlexDTW.L2norm(F1).T @ FlexDTW.L2norm(F2) # cos distance metric 
        best_cost, wp, D, P, B, debug = FlexDTW.flexdtw(C, steps=steps[system], weights=weights[system], buffer=buffer)
        
    elif system == 'nwtw':
        downsample = 1
        C = 1 - NWTW.L2norm(F1)[:,0::downsample].T @ NWTW.L2norm(F2)[:,0::downsample] # cos distance metric
        optcost, wp, D, B = NWTW.NWTW_faster(C, gamma=0.346)
    else:
        downsample = 1
        if subseq and (F2.shape[1] < F1.shape[1]):
            C = 1 - DTW.L2norm(F2)[:,0::downsample].T @ DTW.L2norm(F1)[:,0::downsample] # cos distance metric
            wp = DTW.alignDTW(C, steps=steps[system], weights=weights[system], downsample=downsample, outfile=outfile, subseq=subseq)
            wp = wp[::-1,:]
        else:
            C = 1 - DTW.L2norm(F1)[:,0::downsample].T @ DTW.L2norm(F2)[:,0::downsample] # cos distance metric
            wp = DTW.alignDTW(C, steps=steps[system], weights=weights[system], downsample=downsample, outfile=outfile, subseq=subseq)
            
    if wp is not None:
        pickle.dump(wp, open(outfile, 'wb'))


In [15]:
def run_all_benchmarks(outdir):
    parts_batch = []
    queryids = []
    with open(QUERY_LIST, 'r') as f:
        for line in f:
            parts = line.strip().split(' ')
            assert len(parts) == 2
            queryid = os.path.basename(parts[0]) + '__' + os.path.basename(parts[1])
            
            if 'Czerny-Stefanska-1949_pid9086' in queryid:
                continue
            
            parts_batch.append(parts)
            queryids.append(queryid)
            
    for benchmark in tqdm(BENCHMARKS):
#         for i in range(len(parts_batch)):
#             run_benchmark(benchmark, FEAT_DIRS[benchmark][0], FEAT_DIRS[benchmark][1], parts_batch[i], outdir, queryids[i])
        run_benchmark_batch(benchmark, FEAT_DIRS[benchmark][0], FEAT_DIRS[benchmark][1], parts_batch, outdir, queryids, n_cores=4)

In [16]:
def run_benchmark_batch(benchmark, featdir1, featdir2, parts_batch, outdir, queryids, n_cores):
    inputs = []
    assert len(parts_batch) == len(queryids)
    
    for i in range(len(parts_batch)):
        featfile1 = (featdir1 / parts_batch[i][0]).with_suffix('.npy')
        featfile2 = (featdir2 / parts_batch[i][1]).with_suffix('.npy')
        
        F1 = np.load(featfile1)
        F2 = np.load(featfile2)
        
        for system in SYSTEMS:
            
            # only compute alignment if this hypothesis file doesn't already exist
            outfile = get_outfile(outdir, benchmark, system, queryids[i])
            if not os.path.isfile(outfile):   
                inputs.append((system, F1, F2, outfile))

    # process files in parallel
    pool = multiprocessing.Pool(processes = multiprocessing.cpu_count()-1)
    pool.starmap(align_system, inputs)
     
    
    return

In [17]:
def run_benchmark(benchmark, featdir1, featdir2, parts, outdir, queryid):
    featfile1 = (featdir1 / parts[0]).with_suffix('.npy')
    featfile2 = (featdir2 / parts[1]).with_suffix('.npy')

    F1 = np.load(featfile1)
    F2 = np.load(featfile2)
        
    # run all baselines
    for system in SYSTEMS:
        
        # only compute alignment if this hypothesis file doesn't already exist
        outfile = get_outfile(outdir, benchmark, system, queryids[i])
        if not os.path.isfile(outfile):   
            align_system(system, F1, F2, outfile)

In [18]:
outdir = Path(f'experiments_{DATASET}/{VERSION}')
run_all_benchmarks(outdir)

  0%|          | 0/16 [00:00<?, ?it/s]

  6%|▋         | 1/16 [26:19<6:34:51, 1579.45s/it]

 12%|█▎        | 2/16 [33:07<3:27:43, 890.22s/it] 

 19%|█▉        | 3/16 [40:30<2:28:42, 686.33s/it]

 25%|██▌       | 4/16 [48:32<2:01:06, 605.57s/it]

 31%|███▏      | 5/16 [1:05:51<2:19:38, 761.65s/it]

 38%|███▊      | 6/16 [1:22:25<2:20:07, 840.74s/it]

 44%|████▍     | 7/16 [1:34:50<2:01:24, 809.36s/it]

 50%|█████     | 8/16 [1:59:54<2:17:25, 1030.71s/it]

 56%|█████▋    | 9/16 [2:31:50<2:32:31, 1307.32s/it]

 62%|██████▎   | 10/16 [3:00:23<2:23:15, 1432.51s/it]

 69%|██████▉   | 11/16 [3:27:11<2:03:51, 1486.37s/it]

 75%|███████▌  | 12/16 [3:54:15<1:41:52, 1528.07s/it]

 81%|████████▏ | 13/16 [4:22:16<1:18:43, 1574.43s/it]

 88%|████████▊ | 14/16 [4:50:14<53:31, 1605.67s/it]  

 94%|█████████▍| 15/16 [5:17:20<26:52, 1612.00s/it]

100%|██████████| 16/16 [5:46:38<00:00, 1655.90s/it]

100%|██████████| 16/16 [5:46:38<00:00, 1299.91s/it]


