#### Clone repo and install requirements:

In [None]:
!git clone https://github.com/dariadiatlova/russian_speech_denoiser.git
cd russian_speech_denoiser/denoiser/
!git submodule update --init --recursive

In [14]:
!git checkout develop

Previous HEAD position was 16cc32f Test commit
Branch 'develop' set up to track remote branch 'develop' from 'origin'.
Switched to a new branch 'develop'


In [None]:
!pip install -r requirements.txt
!pip install -r requirements_cuda.txt

#### Fine tune model on Russian speech toy dataset

In [22]:
!python3 -m denoiser.audio dataset/russian_debug/noisy > $path/noisy.json
!python3 -m denoiser.audio dataset/russian_debug/clean > $path/clean.json



In [None]:
!python train.py continue_pretrained=dns64 demucs.hidden=64

#### Evaluation

In [27]:
!python -m denoiser.evaluate --dns64 --data_dir '/content/russian_speech_denoiser/denoiser/egs/debug/tr'

INFO:denoiser.pretrained:Loading pre-trained real time H=64 model trained on DNS.
INFO:__main__:Eval estimates | 2/10 | 13.5 it/sec
INFO:__main__:Eval estimates | 4/10 | 20.7 it/sec
INFO:__main__:Eval estimates | 6/10 | 25.8 it/sec
INFO:__main__:Eval estimates | 8/10 | 30.7 it/sec
INFO:__main__:Eval estimates | 10/10 | 33.7 it/sec
INFO:__main__:Eval metrics | 2/10 | 0.2 it/sec
INFO:__main__:Eval metrics | 4/10 | 0.3 it/sec
INFO:__main__:Eval metrics | 6/10 | 0.4 it/sec
INFO:__main__:Eval metrics | 8/10 | 0.5 it/sec
INFO:__main__:Eval metrics | 10/10 | 0.7 it/sec
INFO:__main__:[1mTest set performance:PESQ=1.2861518144607544, STOI=0.27915504441529393.[0m
{"pesq": 1.2861518144607544, "stoi": 0.27915504441529393}


#### Enhancement

In [29]:
!python -m denoiser.enhance --dns64 --noisy_dir '/content/russian_speech_denoiser/denoiser/dataset/russian_debug/noisy' --out_dir '/content/russian_speech_denoiser/denoiser/dataset/russian_debug_enhanced'

INFO:denoiser.pretrained:Loading pre-trained real time H=64 model trained on DNS.
INFO:__main__:Generate enhanced files | 2/10 | 47.8 it/sec
INFO:__main__:Generate enhanced files | 4/10 | 69.1 it/sec
INFO:__main__:Generate enhanced files | 6/10 | 51.6 it/sec
INFO:__main__:Generate enhanced files | 8/10 | 51.6 it/sec
INFO:__main__:Generate enhanced files | 10/10 | 53.7 it/sec
Waiting for pending jobs...
INFO:__main__:Generate enhanced files | 2/10 | 0.3 it/sec
INFO:__main__:Generate enhanced files | 4/10 | 0.4 it/sec
INFO:__main__:Generate enhanced files | 6/10 | 0.5 it/sec
INFO:__main__:Generate enhanced files | 8/10 | 0.6 it/sec
INFO:__main__:Generate enhanced files | 10/10 | 0.8 it/sec


#### Load Samples

In [87]:
from torchaudio import load
from IPython import display
from os import walk, listdir
from os.path import join
import random

In [95]:
def get_audio_paths(root_dir):
  filenames = list(walk(root_dir))[0][2]
  return sorted([join(root_dir, filename) for filename in filenames])

In [96]:
enhanced_audios = get_audio_paths("/content/russian_speech_denoiser/denoiser/dataset/russian_debug_enhanced")

In [108]:
def display_random_triple():
  idx = random.randint(0, len(enhanced_audios) // 2 - 1)
  print("Noisy audio:")
  display.display(display.Audio(load(enhanced_audios[idx * 2 + 1])[0], rate=16_000))
  print("Enhanced audio:")
  display.display(display.Audio(load(enhanced_audios[idx * 2])[0], rate=16_000))

In [120]:
display_random_triple()

Noisy audio:


Enhanced audio:
