In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
ROOT_PATH = Path().resolve().parent
if str(ROOT_PATH) not in sys.path:
    sys.path.insert(ROOT_PATH, 1)

In [3]:
import ipywidgets as widgets
from IPython.display import display
from autoeq.constants import PEQ_CONFIGS
from autoeq.batch_processing import batch_processing
ROOT_PATH = Path().resolve().parent
from dbtools.rtings_crawler import RtingsCrawler
from dbtools.crinacle_crawler import CrinacleCrawler
from dbtools.oratory1990_crawler import Oratory1990Crawler
from dbtools.innerfidelity_crawler import InnerfidelityCrawler
from dbtools.headphonecom_crawler import HeadphonecomCrawler
from dbtools.squig_crawler import SquigCrawler, SquigCrawlerManager
from dbtools.prune_results import prune_results
from dbtools.update_result_indexes import update_all_indexes, write_webapp_entries_and_measurements
from dbtools.constants import TARGETS_PATH, MEASUREMENTS_PATH, RESULTS_PATH

## Crawling and Parsing
Additional Python packages are required for processing the measurements:
```bash
python -m pip install -U -r dbtools/requirements.txt
```

This notebook uses IPyWidgets
```bash
jupyter nbextension enable --py widgetsnbextension --sys-prefix
```

Finally install IPython kernel
```bash
python -m ipykernel install --user --name="autoeq"
```

Measurement crawlers require [Google Chrome](https://www.google.com/chrome/) installed and
[ChromeDriver](https://googlechromelabs.github.io/chrome-for-testing/) binary in the measurements folder (or anywhere
in the PATH).

Measurement crawlers also require C++. This should be installed by default on Linux but on Windows you need to install
Microsoft Visual Studio build tools for this. https://visualstudio.microsoft.com/downloads/ ->
"Tools for Visual Studio 2019" -> "Build Tools for Visual Studio 2019".

oratory1990 crawler requires Ghostscript installed: https://www.ghostscript.com/download/gsdnld.html

### Crinacle
Download measurement data from Drive folder to `measurements/crinacle/raw_data/` before running this!

* `IEM Measurements/IEC60318-4 IEM Measurements (TSV txt)` into `AutoEq/measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)`
* `IEM Measurements/4620 IEM Measurements` into `AutoEq/measurements/crinacle/raw_data/4620 IEM Measurements`
* `HP Measurements/EARS + 711 (TSV txt) (Legacy)` into `AutoEq/measurements/crinacle/raw_data/EARS + 711 (TSV txt) (Legacy)`
* `GRAS 43AG-7` into `AutoEq/measurements/crinacle/raw_data/GRAS 43AG-7`

In [None]:
crawler = CrinacleCrawler()
crawler.run()
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

### oratory1990
oratory1990 crawler fetches all measurements from https://www.reddit.com/r/oratory1990/wiki/index/list_of_presets/, downloads PDFs and reads the frequency response measurement data from the PDFs. Parsing the PDFs requires [Ghostscript](https://www.ghostscript.com/download/gsdnld.html) to be installed on the system.

In [None]:
crawler = Oratory1990Crawler()

In [None]:
crawler.run()
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

### Rtings
Rtings crawler fetches all measurements from https://www.rtings.com/headphones/1-[2,4,5]/graph and downloads raw FR JSON files and parses them.

In [None]:
crawler = RtingsCrawler()

In [None]:
crawler.run()
display(crawler.widget)

In [None]:
crawler.process(new_only=True)

### Squig.link
Several databases included in squig.link

**TODO:** rig information for cralwers, somehow

In [12]:
manager = SquigCrawlerManager()
print(', '.join(sorted([crawler.username for crawler in manager.crawlers])))

achoreviews, adri-n, aftersound, animagus, ankramutt, arn, auricularesargentina, aydn, bakkwatan, bedrock, bryaudioreviews, bukanaudiophile, cammyfi, cqtek, dchpgall, dhrme, dl, eliseaudio, eplv, fahryst, filk, freeryder05, gadgetrytech, harpo, hbb, hobbytalk, hore, ianfann, iemworld, ish, jacstone, jaytiss, kazi, kr0mka, kuulokenurkka, listener, melatonin, mmagtech, musicafe, nymz, practiphile, precog, pw, ragnarok, recode, regancipher, rg, sai, seanwee, smirk, soundcheck39, superreview, suporsalad, tanchjim, tedthepraimortis, tgx78, therollo9, timmyv, tonedeafmonk, vortexreviews, vsg, yanyin


In [None]:
display(manager.run('auricularesargentina').widget)

In [None]:
manager.process('auricularesargentina')

In [None]:
display(manager.run('bakkwatan').widget)

In [None]:
manager.process('bakkwatan')

In [None]:
display(manager.run('cqtek').widget)

In [None]:
manager.process('cqtek')

In [None]:
display(manager.run('dhrme').widget)

In [None]:
manager.process('dhrme')

In [None]:
display(manager.run('fahryst').widget)

In [None]:
manager.process('fahryst')

In [None]:
display(manager.run('filk').widget)

In [None]:
manager.process('filk')

In [None]:
display(manager.run('freeryder05').widget)

In [None]:
manager.process('freeryder05')

In [None]:
# Not in squigsites.json
#display(manager.run('gadgetgenerations').widget)

In [None]:
display(manager.run('harpo').widget)

In [None]:
manager.process('harpo')

In [None]:
# Rig not known yet
#display(manager.run('ish').widget)

In [None]:
# Rig not known yet
#display(manager.run('jacstone').widget)

In [None]:
display(manager.run('jaytiss').widget)

In [None]:
manager.process('jaytiss')

In [None]:
display(manager.run('kazi').widget)

In [None]:
manager.process('kazi')

In [None]:
display(manager.run('kr0mka').widget)

In [42]:
manager.process('kr0mka')

  0%|          | 0/111 [00:00<?, ?it/s]

In [None]:
display(manager.run('kuulokenurkka').widget)

In [None]:
manager.process('kuulokenurkka')

In [None]:
# Not in squigsites.json
#display(manager.run('mini').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('psykano').widget)

In [None]:
display(manager.run('regancipher').widget)

In [None]:
manager.process('regancipher')

In [None]:
display(manager.run('rg').widget)

In [None]:
manager.process('rg')

In [None]:
# Not in squigsites.json
#display(manager.run('sdlib').widget)

In [None]:
# Almost all measurements are with 3rd party eartips
#display(manager.run('smirk').widget)

In [None]:
# Rig not known yet
#display(manager.run('soundcheck39').widget)

In [None]:
display(manager.run('superreview').widget)

In [None]:
manager.process('superreview')

In [None]:
display(manager.run('tedthepraimortis').widget)

In [None]:
manager.process('tedthepraimortis')

In [None]:
# Not in squigsites.json
#display(manager.run('therollo9').widget)

In [None]:
display(manager.run('tonedeafmonk').widget)

In [None]:
manager.process('tonedeafmonk')

In [None]:
# Not in squigsites.json
#display(manager.run('wdym').widget)

In [None]:
# Only 3 models
#display(manager.run('yanyin').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('zamo').widget)

## Rename Measurements
Sometimes measurements are named incorrectly or previously only one sample existed and now multiple samples have been measured and so the original one needs to be renamed as "<name> (sample 1)"

In [None]:
crinacle = CrinacleCrawler()
headphonecom = HeadphonecomCrawler()
innerfidelity = InnerfidelityCrawler()
oratory1990 = Oratory1990Crawler()
rtings = RtingsCrawler(driver=oratory1990.driver)

renames = [
    {'old_name': 'SeeAudio x Crincale Yume Midnight', 'new_name': 'SeeAudio x Crinacle Yume Midnight', 'crawlers': [crinacle]},
]
for rename in renames:
    for crawler in rename['crawlers']:
        crawler.rename_measurement(old_name=rename['old_name'], new_name=rename['new_name'], dry_run=False)

## Prune Results
Check if obsolete results (e.g. because of renaming) exist and remove them

In [41]:
prune_results(databases=['kr0mka'], dry_run=False)

Removed "kr0mka\in-ear\Tanchjim One DSP"


## Update Results
Creates new results from the measurements. `eq_kwargs` are parameters shared by all jobs.

In [5]:
def update_results(source_name, form, **override_kwargs):
    kwargs = {
        'input_dir': MEASUREMENTS_PATH.joinpath(source_name, 'data', form),
        'output_dir': RESULTS_PATH.joinpath(source_name, form),
        'bass_boost_gain': 6.0 if form == 'over-ear' else (0.0 if form == 'earbud' else 8.0),
        'target': TARGETS_PATH.joinpath('Harman over-ear 2018 without bass.csv' if form == 'over-ear' else 'AutoEq in-ear.csv'),
        'bass_boost_fc': 105, 'bass_boost_q': 0.7,
        'parametric_eq': True, 'ten_band_eq': True, 'convolution_eq': True,
        'parametric_eq_config': [PEQ_CONFIGS['4_PEAKING_WITH_LOW_SHELF'], PEQ_CONFIGS['4_PEAKING_WITH_HIGH_SHELF']],
        'fs': [44100, 48000],
        'thread_count': 0,
    }
    if override_kwargs:
        kwargs.update(override_kwargs)
    batch_processing(**kwargs)

### oratory1990

In [None]:
update_results('oratory1990', 'over-ear', new_only=True)

In [None]:
update_results('oratory1990', 'in-ear', new_only=True)

In [None]:
update_results('oratory1990', 'earbud', new_only=True)

### crinacle

In [None]:
update_results('crinacle', 'over-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'over-ear', 'EARS + 711'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'EARS + 711 over-ear'),
    target=TARGETS_PATH.joinpath('crinacle EARS + 711 Harman over-ear 2018 without bass.csv'))

In [None]:
update_results('crinacle', 'over-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'over-ear', 'GRAS 43AG-7'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'GRAS 43AG-7 over-ear'))

In [None]:
update_results('crinacle', 'in-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'in-ear', 'Bruel & Kjaer 4620'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'Bruel & Kjaer 4620 in-ear'),
    target=TARGETS_PATH.joinpath('JM-1 without bass.csv'),
    bass_boost_gain=6.5)

In [None]:
update_results('crinacle', 'in-ear', new_only=True,
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'in-ear', '711'),
    output_dir=RESULTS_PATH.joinpath('crinacle', '711 in-ear'))

### Rtings

In [None]:
update_results('Rtings', 'over-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Harman over-ear 2018 without bass.csv'))

In [None]:
update_results('Rtings', 'in-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

In [None]:
update_results('Rtings', 'earbud', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

### Innerfidelity

In [None]:
update_results('Innerfidelity', 'over-ear', target=TARGETS_PATH.joinpath('HMS II.3 Harman over-ear 2018 without bass.csv')), new_only=True)

In [None]:
update_results('Innerfidelity', 'in-ear', target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv')), new_only=True)

In [None]:
update_results('Innerfidelity', 'earbud', target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv')), new_only=True)

### Headphone.com Legacy

In [None]:
update_results('Headphone.com Legacy', 'over-ear', target=TARGETS_PATH.joinpath('HMS II.3 Harman over-ear 2018 without bass.csv')), new_only=True)

In [None]:
update_results('Headphone.com Legacy', 'in-ear', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

In [None]:
update_results('Headphone.com Legacy', 'earbud', new_only=True, target=TARGETS_PATH.joinpath('HMS II.3 Autoeq in-ear.csv'))

### squig.link

In [None]:
update_results('Auriculares Argentina', 'over-ear', new_only=True)

In [None]:
update_results('Auriculares Argentina', 'in-ear', new_only=True)

In [None]:
update_results('Bakkwatan', 'in-ear', new_only=True)

In [None]:
update_results('DHRME', 'in-ear', new_only=True)

In [None]:
update_results('Fahryst', 'in-ear', new_only=True)

In [None]:
update_results('Filk', 'in-ear', new_only=True)

In [None]:
update_results('Filk', 'over-ear', new_only=True)

In [None]:
update_results('freeryder05', 'in-ear', new_only=True)

In [None]:
update_results('Harpo', 'in-ear', new_only=True)

In [None]:
update_results('Hi End Portable', 'in-ear', new_only=True)

In [None]:
update_results('Jaytiss', 'in-ear', new_only=True)

In [None]:
update_results('Kazi', 'in-ear', new_only=True)

In [None]:
update_results('Kazi', 'earbud', new_only=True)

In [35]:
update_results('kr0mka', 'over-ear', new_only=True)

  0%|          | 0/3 [00:00<?, ?it/s]

In [43]:
update_results('kr0mka', 'in-ear', new_only=True)

  0%|          | 0/4 [00:00<?, ?it/s]

In [36]:
update_results('kr0mka', 'earbud', new_only=True)

  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
update_results('Kuulokenurkka', 'over-ear', new_only=True)

In [None]:
update_results('Regan Cipher', 'over-ear', new_only=True)

In [None]:
update_results('Regan Cipher', 'in-ear', new_only=True)

In [None]:
update_results('Regan Cipher', 'earbud', new_only=True)

In [None]:
update_results('RikudouGoku', 'over-ear', new_only=True)

In [None]:
update_results('RikudouGoku', 'in-ear', new_only=True)

In [None]:
update_results('Super Review', 'over-ear', new_only=True)

In [None]:
update_results('Super Review', 'in-ear', new_only=True)

In [None]:
update_results('Super Review', 'earbud', new_only=True)

In [None]:
update_results('Ted\'s Squig Hoard', 'in-ear', new_only=True)

In [None]:
update_results('ToneDeafMonk', 'in-ear', new_only=True)

## Update Indexes
Updates recommended results, full results, DB specific results, HeSuVi results and ranking table.

In [None]:
update_all_indexes()

Creating ranking index...


  0%|          | 0/4821 [00:00<?, ?it/s]

Creating recommendations index...
Creating full index...
Creating source indices...
Creating HeSuVi ZIP archive...


  0%|          | 0/4821 [00:00<?, ?it/s]

Creating webapp data...


  0%|          | 0/4821 [00:00<?, ?it/s]

## Deploy
1. Add files to Git, commit and push
2. Upload webapp data to server

# Sandbox
Don't run these! Random exploration while developing.

In [46]:
from pathlib import Path
from tqdm.auto import tqdm
import re
import requests
from selenium.webdriver.common.by import By
import json
from bs4 import BeautifulSoup
import numpy as np
import json
from autoeq.frequency_response import FrequencyResponse

In [None]:
crawler = CrinacleCrawler()
crawler.crawl()

for item in crawler.crawl_index:
    index_item = crawler.name_index.find_one(url=item.url)
    book = crawler.book_maps[item.url.split('/')[-2]]
    normalized_file_name = crawler.normalize_file_name(item.url.split('/')[-1])
    source_name = book[normalized_file_name] if normalized_file_name in book else None
    if index_item is not None and source_name is not None and index_item.source_name != source_name:
        print(f'{index_item.source_name} --> {source_name}')
        index_item.source_name = source_name
crawler.write_name_index()

In [None]:
print(crawler.crawl_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

In [None]:
print(crawler.name_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

In [48]:
measurements = list(MEASUREMENTS_PATH.glob('**/*.csv'))
for fp in tqdm(measurements):
    fr = FrequencyResponse.read_csv(fp)
    if np.sum(np.abs(fr.raw)) < 0.01:
        print(f'Removed "{fp}"')

  0%|          | 0/6759 [00:00<?, ?it/s]