In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
ROOT_PATH = Path().resolve().parent
if str(ROOT_PATH) not in sys.path:
    sys.path.insert(ROOT_PATH, 1)

In [3]:
import ipywidgets as widgets
from IPython.display import display
from autoeq.constants import PEQ_CONFIGS
from autoeq.batch_processing import batch_processing
ROOT_PATH = Path().resolve().parent
from dbtools.rtings_crawler import RtingsCrawler
from dbtools.crinacle_crawler import CrinacleCrawler
from dbtools.oratory1990_crawler import Oratory1990Crawler
from dbtools.innerfidelity_crawler import InnerfidelityCrawler
from dbtools.headphonecom_crawler import HeadphonecomCrawler
from dbtools.squig_crawler import SquigCrawler, SquigCrawlerManager
from dbtools.prune_results import prune_results
from dbtools.update_result_indexes import update_all_indexes
from dbtools.create_webapp_data import write_entries_and_measurements, write_targets
from dbtools.constants import TARGETS_PATH, MEASUREMENTS_PATH, RESULTS_PATH

## Crawling and Parsing
Additional Python packages are required for processing the measurements:
```bash
python -m pip install -U -r dbtools/requirements.txt
```

This notebook uses IPyWidgets
```bash
jupyter nbextension enable --py widgetsnbextension --sys-prefix
```

Finally install IPython kernel
```bash
python -m ipykernel install --user --name="autoeq"
```

Measurement crawlers require [Google Chrome](https://www.google.com/chrome/) installed and
[ChromeDriver](https://googlechromelabs.github.io/chrome-for-testing/) binary in the measurements folder (or anywhere
in the PATH).

Measurement crawlers also require C++. This should be installed by default on Linux but on Windows you need to install
Microsoft Visual Studio build tools for this. https://visualstudio.microsoft.com/downloads/ ->
"Tools for Visual Studio 2019" -> "Build Tools for Visual Studio 2019".

oratory1990 crawler requires Ghostscript installed: https://www.ghostscript.com/download/gsdnld.html

### Crinacle
Download measurement data from Drive folder to `measurements/crinacle/raw_data/` before running this!

* `IEM Measurements/IEC60318-4 IEM Measurements (TSV txt)` into `AutoEq/measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)`
* `IEM Measurements/4620 IEM Measurements` into `AutoEq/measurements/crinacle/raw_data/4620 IEM Measurements`
* `HP Measurements/EARS + 711 (TSV txt) (Legacy)` into `AutoEq/measurements/crinacle/raw_data/EARS + 711 (TSV txt) (Legacy)`
* `GRAS 43AG-7` into `AutoEq/measurements/crinacle/raw_data/GRAS 43AG-7`

In [None]:
crawler = CrinacleCrawler()
crawler.run()
display(crawler.widget)

In [8]:
crawler.prune_measurements(dry_run=True)

No measurements need to be pruned


In [None]:
crawler.process(new_only=True)

### oratory1990
oratory1990 crawler fetches all measurements from https://www.reddit.com/r/oratory1990/wiki/index/list_of_presets/, downloads PDFs and reads the frequency response measurement data from the PDFs. Parsing the PDFs requires [Ghostscript](https://www.ghostscript.com/download/gsdnld.html) to be installed on the system.

In [34]:
crawler = Oratory1990Crawler()

In [None]:
crawler.run()
display(crawler.widget)

In [36]:
crawler.prune_measurements(dry_run=False)

Removed "C:\Users\jaakko\code\AutoEq\measurements\oratory1990\data\over-ear\AKG K712 (Dekoni sheepskin Earpads).csv"


In [41]:
crawler.process(new_only=True)

  0%|          | 0/612 [00:00<?, ?it/s]

### Rtings
Rtings crawler fetches all measurements from https://www.rtings.com/headphones/1-[2,4,5]/graph and downloads raw FR JSON files and parses them.

In [44]:
crawler = RtingsCrawler()

In [None]:
crawler.run()
display(crawler.widget)

In [45]:
crawler.prune_measurements(dry_run=True)

No measurements need to be pruned


In [53]:
crawler.process(new_only=True)

  0%|          | 0/723 [00:00<?, ?it/s]

### Squig.link
Several databases included in squig.link

**TODO:** rig information for cralwers, somehow

In [5]:
manager = SquigCrawlerManager()
print(', '.join(sorted([crawler.username for crawler in manager.crawlers])))

achoreviews, adri-n, aftersound, animagus, ankramutt, arn, auricularesargentina, aydn, bakkwatan, bedrock, bryaudioreviews, bukanaudiophile, cammyfi, cqtek, dchpgall, dhrme, dl, eliseaudio, eplv, fahryst, filk, freeryder05, gadgetrytech, harpo, hbb, hobbytalk, hore, ianfann, iemworld, ish, jacstone, jaytiss, kazi, kr0mka, kuulokenurkka, listener, melatonin, mmagtech, musicafe, nymz, practiphile, precog, pw, ragnarok, recode, regancipher, rg, sai, seanwee, smirk, soundcheck39, superreview, suporsalad, tanchjim, tedthepraimortis, tgx78, therollo9, timmyv, tonedeafmonk, vortexreviews, vsg, yanyin


In [None]:
display(manager.run('auricularesargentina').widget)

In [None]:
display(manager.run('bakkwatan').widget)

In [None]:
display(manager.run('cqtek').widget)

In [None]:
display(manager.run('dhrme').widget)

In [None]:
display(manager.run('fahryst').widget)

In [None]:
display(manager.run('filk').widget)

In [None]:
display(manager.run('freeryder05').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('gadgetgenerations').widget)

In [None]:
display(manager.run('harpo').widget)

In [None]:
# Rig not known yet
#display(manager.run('ish').widget)

In [None]:
# Rig not known yet
#display(manager.run('jacstone').widget)

In [None]:
display(manager.run('jaytiss').widget)

In [None]:
display(manager.run('kazi').widget)

In [None]:
display(manager.run('kr0mka').widget)

In [None]:
display(manager.run('kuulokenurkka').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('mini').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('psykano').widget)

In [None]:
display(manager.run('regancipher').widget)

In [None]:
display(manager.run('rg').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('sdlib').widget)

In [None]:
# Almost all measurements are with 3rd party eartips
#display(manager.run('smirk').widget)

In [None]:
# Rig not known yet
#display(manager.run('soundcheck39').widget)

In [None]:
display(manager.run('superreview').widget)

In [None]:
display(manager.run('tedthepraimortis').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('therollo9').widget)

In [None]:
display(manager.run('tonedeafmonk').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('wdym').widget)

In [25]:
# Only 3 models
#display(manager.run('yanyin').widget)

In [None]:
# Not in squigsites.json
#display(manager.run('zamo').widget)

In [27]:
manager.process('auricularesargentina')

  0%|          | 0/47 [00:00<?, ?it/s]

## Rename Measurements
Sometimes measurements are named incorrectly or previously only one sample existed and now multiple samples have been measured and so the original one needs to be renamed as "<name> (sample 1)"

In [None]:
crinacle = CrinacleCrawler()
headphonecom = HeadphonecomCrawler()
innerfidelity = InnerfidelityCrawler()
oratory1990 = Oratory1990Crawler()
rtings = RtingsCrawler(driver=oratory1990.driver)

renames = [
    {'old_name': 'SeeAudio x Crincale Yume Midnight', 'new_name': 'SeeAudio x Crinacle Yume Midnight', 'crawlers': [crinacle]},
]
for rename in renames:
    for crawler in rename['crawlers']:
        crawler.rename_measurement(old_name=rename['old_name'], new_name=rename['new_name'], dry_run=False)

## Prune Results
Check if obsolete results (e.g. because of renaming) exist and remove them

In [61]:
prune_results(databases=['crinacle', 'oratory1990', 'Headphone.com Legacy', 'Innerfidelity', 'Rtings'], dry_run=True)

## Update Results
Creates new results from the measurements. `eq_kwargs` are parameters shared by all jobs.

In [5]:
eq_kwargs = {
    'parametric_eq': True, 'ten_band_eq': True, 'convolution_eq': True,
    'parametric_eq_config': [PEQ_CONFIGS['4_PEAKING_WITH_LOW_SHELF'], PEQ_CONFIGS['4_PEAKING_WITH_HIGH_SHELF']],
    'fs': [44100, 48000],
    'thread_count': 0,
}

#### oratory1990 Over-ear

In [42]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('oratory1990', 'data', 'over-ear'),
    output_dir=RESULTS_PATH.joinpath('oratory1990', 'over-ear'),
    target=TARGETS_PATH.joinpath('Harman over-ear 2018 without bass.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=True, **eq_kwargs)

  0%|          | 0/1 [00:00<?, ?it/s]

#### oratory1990 In-ear

In [7]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('oratory1990', 'data', 'in-ear'),
    output_dir=RESULTS_PATH.joinpath('oratory1990', 'in-ear'),
    target=TARGETS_PATH.joinpath('AutoEq in-ear.csv'),
    bass_boost_gain=9.5, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/188 [00:00<?, ?it/s]

#### oratory1990 Earbud

In [8]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('oratory1990', 'data', 'earbud'),
    output_dir=RESULTS_PATH.joinpath('oratory1990', 'earbud'),
    target=TARGETS_PATH.joinpath('AutoEq in-ear.csv'),
    bass_boost_gain=0.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/1 [00:00<?, ?it/s]

#### crinacle GRAS 43AG-7 On-ear

In [9]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'over-ear', 'GRAS 43AG-7'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'GRAS 43AG-7 over-ear'),
    target=TARGETS_PATH.joinpath('Harman over-ear 2018 without bass.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/331 [00:00<?, ?it/s]

#### crinacle EARS+711 Over-ear

In [10]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'over-ear', 'EARS + 711'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'EARS + 711 over-ear'),
    target=TARGETS_PATH.joinpath('crinacle EARS + 711 Harman over-ear 2018 without bass.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/65 [00:00<?, ?it/s]

#### crinacle 4620 In-ear

In [11]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'in-ear', 'Bruel & Kjaer 4620'),
    output_dir=RESULTS_PATH.joinpath('crinacle', 'Bruel & Kjaer 4620 in-ear'),
    target=TARGETS_PATH.joinpath('Diffuse field 5128 -1dB per octave.csv'),
    bass_boost_gain=0.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/169 [00:00<?, ?it/s]

#### crinacle 711 In-ear

In [6]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('crinacle', 'data', 'in-ear', '711'),
    output_dir=RESULTS_PATH.joinpath('crinacle', '711 in-ear'),
    target=TARGETS_PATH.joinpath('AutoEq in-ear.csv'),
    bass_boost_gain=9.5, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=True, **eq_kwargs)

  0%|          | 0/1 [00:00<?, ?it/s]

#### Rtings Over-ear

In [54]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Rtings', 'data', 'over-ear'),
    output_dir=RESULTS_PATH.joinpath('Rtings', 'over-ear'),
    target=TARGETS_PATH.joinpath('Rtings Harman over-ear 2018 without bass.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=True, **eq_kwargs)

  0%|          | 0/6 [00:00<?, ?it/s]

#### Rtings In-ear

In [49]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Rtings', 'data', 'in-ear'),
    output_dir=RESULTS_PATH.joinpath('Rtings', 'in-ear'),
    target=TARGETS_PATH.joinpath('Rtings AutoEq in-ear.csv'),
    bass_boost_gain=9.5, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=True, **eq_kwargs)

  0%|          | 0/15 [00:00<?, ?it/s]

#### Rtings Earbud

In [17]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Rtings', 'data', 'earbud'),
    output_dir=RESULTS_PATH.joinpath('Rtings', 'earbud'),
    target=TARGETS_PATH.joinpath('Rtings Autoeq in-ear.csv'),
    bass_boost_gain=0.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/23 [00:00<?, ?it/s]

#### Innerfidelity In-ear

In [18]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Innerfidelity', 'data', 'over-ear'),
    output_dir=RESULTS_PATH.joinpath('Innerfidelity', 'over-ear'),
    target=TARGETS_PATH.joinpath('Innerfidelity Harman over-ear 2018 without bass.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/612 [00:00<?, ?it/s]

#### Innerfidelity In-ear

In [19]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Innerfidelity', 'data', 'in-ear'),
    output_dir=RESULTS_PATH.joinpath('Innerfidelity', 'in-ear'),
    target=TARGETS_PATH.joinpath('Innerfidelity AutoEq in-ear.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/307 [00:00<?, ?it/s]

#### Innerfidelity Earbud

In [20]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Innerfidelity', 'data', 'earbud'),
    output_dir=RESULTS_PATH.joinpath('Innerfidelity', 'earbud'),
    target=TARGETS_PATH.joinpath('Innerfidelity AutoEq in-ear.csv'),
    bass_boost_gain=0.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/15 [00:00<?, ?it/s]

#### Headphone.com Legacy Over-ear

In [21]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Headphone.com Legacy', 'data', 'over-ear'),
    output_dir=RESULTS_PATH.joinpath('Headphone.com Legacy', 'over-ear'),
    target=TARGETS_PATH.joinpath('Headphone.com Legacy Harman over-ear 2018 without bass.csv'),
    bass_boost_gain=6.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/225 [00:00<?, ?it/s]

#### Headphone.com Legacy In-ear

In [22]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Headphone.com Legacy', 'data', 'in-ear'),
    output_dir=RESULTS_PATH.joinpath('Headphone.com Legacy', 'in-ear'),
    target=TARGETS_PATH.joinpath('Headphone.com Legacy AutoEq in-ear.csv'),
    bass_boost_gain=9.5, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/94 [00:00<?, ?it/s]

#### Headphone.com Legacy Earbud

In [23]:
_ = batch_processing(
    input_dir=MEASUREMENTS_PATH.joinpath('Headphone.com Legacy', 'data', 'earbud'),
    output_dir=RESULTS_PATH.joinpath('Headphone.com Legacy', 'earbud'),
    target=TARGETS_PATH.joinpath('Headphone.com Legacy AutoEq in-ear.csv'),
    bass_boost_gain=0.0, bass_boost_fc=105, bass_boost_q=0.7,
    new_only=False, **eq_kwargs)

  0%|          | 0/10 [00:00<?, ?it/s]

## Update Indexes
Updates recommended results, full results, DB specific results, HeSuVi results and ranking table.

In [4]:
update_all_indexes()

Creating ranking index...


  0%|          | 0/3664 [00:00<?, ?it/s]

Creating recommendations index...
Creating full index...
Creating source indices...
Creating HeSuVi ZIP archive...


  0%|          | 0/3664 [00:00<?, ?it/s]

#### Data for webapp

In [4]:
write_entries_and_measurements()

  0%|          | 0/4599 [00:00<?, ?it/s]

In [27]:
write_targets()

## Deploy
1. Add files to Git, commit and push
2. Upload webapp data to server

# Sandbox
Don't run these! Random exploration while developing.

In [12]:
from pathlib import Path
from tqdm.auto import tqdm
import re
import requests
from selenium.webdriver.common.by import By
import json
from bs4 import BeautifulSoup
import numpy as np
import json
from autoeq.frequency_response import FrequencyResponse

In [26]:
crawler = CrinacleCrawler()
crawler.crawl()

for item in crawler.crawl_index:
    index_item = crawler.name_index.find_one(url=item.url)
    book = crawler.book_maps[item.url.split('/')[-2]]
    normalized_file_name = crawler.normalize_file_name(item.url.split('/')[-1])
    source_name = book[normalized_file_name] if normalized_file_name in book else None
    if index_item is not None and source_name is not None and index_item.source_name != source_name:
        print(f'{index_item.source_name} --> {source_name}')
        index_item.source_name = source_name
crawler.write_name_index()

Annotation --> BLON Annotation
Annotation --> BLON Annotation
Aria --> Moondrop Aria
Aria --> Moondrop Aria
None --> BLON BL-03
None --> BLON BL-03
Blessing 3 --> Moondrop Blessing 3
Blessing 3 --> Moondrop Blessing 3
Cencibel --> Tripowin Cencibel
Cencibel --> Tripowin Cencibel
None --> oBravo Cupid
None --> oBravo Cupid
Dawn --> Fearless Dawn
Dawn --> Fearless Dawn
7Hz-Salnotes Dioko Prototype --> 7Hz-Salnotes Dioko
7Hz-Salnotes Dioko Prototype --> 7Hz-Salnotes Dioko
Dusk --> Moondrop Blessing2:Dusk
Dusk --> Moondrop Blessing2:Dusk
ERX --> Etymotic ERX
ERX --> Etymotic ERX
F1 --> KBEar F1
F1 --> KBEar F1
FHE Eclipse --> FiiO FHE:Eclipse
FHE Eclipse --> FiiO FHE:Eclipse
Galileo --> LetShuoer Galileo
Galileo --> LetShuoer Galileo
GR-I --> Guide Ray GR-I
GR-I --> Guide Ray GR-I
H27 --> Shuoer H27
H27 --> Shuoer H27
H3 --> Kinera H3
H3 --> Kinera H3
i4 TRI --> TRI i4
i4 TRI --> TRI i4
IE200 --> Sennheiser IE200
IE200 --> Sennheiser IE200
IER-Z1R --> Sony IER-Z1R
IER-Z1R --> Sony IER-Z1R


In [22]:
print(crawler.crawl_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

"file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt"	"1Custom Dual"	"1Custom SA02"	"in-ear"	"711"


In [23]:
print(crawler.name_index.find_one(url='file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt'))

"file://measurements/crinacle/raw_data/IEC60318-4 IEM Measurements (TSV txt)/1Custom Dual L.txt"	"1Custom Dual"	"1Custom SA02"	"in-ear"	"711"
