# ISCC Content-ID-Audio Benchmark

Benchmarking robustness of the ISCC Content-ID Audio against transcoding.

In [161]:
import os
import sys
sys.path.append("../../")
import iscc
from IPython.display import HTML, display
import tabulate
from iscc_bench.readers.fma_small import fma_small
from iscc_bench.algos.metrics import jaccard
from iscc_cli import audio_id
from iscc_cli import ffmpeg
from subprocess import run
from dataclasses import dataclass
from os.path import basename, splitext, abspath, join
from iscc_bench import DATA_DIR
from tqdm.notebook import tqdm as pbar
from statistics import mean
from bitstring import BitArray
from codetiming import Timer

In [162]:
files = [abspath(fp) for fp in fma_small()]
VARIATIONS_PATH = os.path.join(DATA_DIR, 'fma_variations')
os.makedirs(VARIATIONS_PATH, exist_ok=True )


@dataclass
class Muxer:
    ext: str
    mime: str
    dac: str


def muxer_info(muxer):
    result = run([ffmpeg.exe_path(), '-h', f'muxer={muxer}'], capture_output=True).stdout.decode('utf-8').splitlines()
    ext, mime, dac = '', '', ''
    for line in result:
        if 'Common extensions' in line:
            ext = line.strip().split()[-1].strip('.')
            ext = ext.split(',')[0]
        if 'Mime type' in line:
            mime = line.strip().split()[-1].strip('.')
        if 'Default audio codec' in line:
            dac = line.strip().split()[-1].strip('.')

    return Muxer(ext, mime, dac)
    

def formats():
    """Show supported ffmpeg file formats"""
    return run([ffmpeg.exe_path(), '-formats'], capture_output=True).stdout.decode('utf-8').splitlines()
    
def audio_codecs():
    """List supported ffmpeg audio codecs"""
    result = run([ffmpeg.exe_path(), '-codecs'], capture_output=True).stdout.decode('utf-8')
    # Only Audio Codecs with de- and encoding support
    result = [line.strip() for line in result.splitlines() if line.strip().startswith('DEA')]
    result = [line.split()[1] for line in result]
    return result


def muxers():
    """Find out what audio file extensions map to what default codecs in ffmpeg"""
    result = run([ffmpeg.exe_path(), '-muxers'], capture_output=True).stdout.decode('utf-8').splitlines()
    result = [line.strip(' E ').split()[0] for line in result if line.startswith("  E")]
    result = [muxer_info(muxer) for muxer in result]
    codecs = audio_codecs() 
    result = [r for r in result if r.dac in codecs]
    seen_codecs = set()
    unique = []
    for mux in result:
        if mux.dac not in seen_codecs:
            if mux.mime and mux.mime.startswith('audio'):
                if mux.ext:
                    unique.append(mux)
                    seen_codecs.add(mux.dac)
    return unique

 
def show(table):
    """Pretty print results table"""
    display(HTML(tabulate.tabulate(table, tablefmt='html', headers="firstrow")))

target_formats = (
    'ac3_128000', 
    'ac3_64000', 
    'aac_128000', 
    'aac_64000',
    'aif_128000', 
    'aif_64000',
    'flac_128000', 
    'flac_64000', 
    'mp2_128000', 
    'mp2_64000', 
    'mp3_128000', 
    'mp3_64000', 
    'opus_128000', 
    'opus_64000', 
    'wv_128000',
    'wv_64000',
)


def variations(fp):
    """Builds/Caches/Returns a list of encoding variations for a given audio file at file path (fp)"""
    in_name, in_ext = splitext(basename(fp))
    vars = []
    for tf in target_formats:
        fmt, bitrate = tf.split('_')
        out_path = abspath(join(VARIATIONS_PATH, f'{in_name}-{bitrate}.{fmt}'))
        # generate if it does not exist:
        if not os.path.exists(out_path):
            run([ffmpeg.exe_path(), '-i', fp, '-b:a', bitrate, out_path], check=True)
        vars.append(out_path)
    return vars


In [163]:
# Algorithms


def cr_mh_lsb(fp):
    crv = audio_id.get_chroma_vector(fp)
    mh = iscc.minimum_hash(crv, n=64)
    return [(i, x & 1) for i, x in enumerate(mh)]


def cr_sh32(fp):
    crv = audio_id.get_chroma_vector(fp)
    digests = [f.to_bytes(4, "big", signed=True) for f in crv]
    shash_digest = iscc.similarity_hash(digests)
    bitstr = BitArray(shash_digest).bin
    return [(i, b) for i, b in enumerate(bitstr)] 


def cr_sh64_pw(fp):
    crv = audio_id.get_chroma_vector(fp)
    digests = []
    for a, b in iscc.sliding_window(crv, 2):
        digest = a.to_bytes(4, 'big', signed=True) +  b.to_bytes(4, 'big', signed=True)
        digests.append(digest) 
    shash_digest = iscc.similarity_hash(digests)
    bitstr = BitArray(shash_digest).bin
    return [(i, b) for i, b in enumerate(bitstr)]
   

algorithms = [
    cr_mh_lsb,
    cr_sh32,
    cr_sh64_pw,
]

In [164]:
def evaluate(max_files=100):
    print(f"Benchmarking {max_files} tracks against {len(target_formats)} transcodings.")
    result = {func.__name__:{} for func in algorithms}   
    for func in algorithms:
        db = result[func.__name__]
        print(f'\nComputing {func.__name__}')
        with Timer():
            for tf in target_formats:
                db[tf] = []
            for reference_fp in pbar(files[:max_files]):
                ref_vec = func(reference_fp)
                for variation_fp in variations(reference_fp):
                    fname, ext = splitext(basename(variation_fp))
                    vari_name = f'{ext.lstrip(".")}_{fname.split("-")[-1]}'
                    vari_vec = func(variation_fp)
                    distance = jaccard(ref_vec, vari_vec)
                    db[vari_name].append(distance)

    algo_names = [func.__name__ for func in algorithms]
    head = ['Transcoding', *algo_names]
    tbl = [head]
    for tf in target_formats:
        row = [tf]
        for alg, varis in result.items():
            for vari, data in varis.items():
                if vari==tf:
                    row.append(round(mean(data), 2))
        tbl.append(row)
    show(tbl)
    return result

r = evaluate()

Benchmarking 100 tracks against 16 transcodings.

Computing cr_mh_lsb


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Elapsed time: 90.5313 seconds

Computing cr_sh32


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Elapsed time: 85.4680 seconds

Computing cr_sh64_pw


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Elapsed time: 87.3433 seconds


Transcoding,cr_mh_lsb,cr_sh32,cr_sh64_pw
ac3_128000,0.67,0.97,0.97
ac3_64000,0.58,0.97,0.97
aac_128000,0.46,0.96,0.96
aac_64000,0.44,0.95,0.95
aif_128000,0.99,1.0,1.0
aif_64000,0.99,1.0,1.0
flac_128000,1.0,1.0,1.0
flac_64000,1.0,1.0,1.0
mp2_128000,0.58,0.97,0.97
mp2_64000,0.56,0.97,0.97
