# Finetune on Wav Files

Need a few (text, wav) pairs. The prepare_mels.sh script just calls the following for train/valid/test:

```bash
python preprocess_audio2mel.py --wav-files "$TRAINLIST" --mel-files "$TRAINLIST_MEL"
```

Each filelist passed for `wav-files` has lines with the form:

```
LJSpeech-1.1/wavs/LJ049-0022.wav|The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.
```

Each line in the associated mels file seems identical but has a different (output?) path:

```
LJSpeech-1.1/mels/LJ049-0022.pt|The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.
```

In [10]:
from scipy.io.wavfile import read
import numpy as np

def load_wav_to_torch(full_path):
    sampling_rate, data = read(full_path)
    print(data.shape)
    return torch.FloatTensor(data.astype(np.float32)), sampling_rate

import re
import os
import sys
import torch
import librosa
import librosa.display

import numpy as np
import IPython.display as ipd
import shutil


from pprint import pprint

tacotron_dir = '/root/Tacotron2'
sys.path.append(tacotron_dir)

def move_file(src_path, dst_path):
    shutil.move(src_path, dst_path)

def copy_file(src_path, dst_path):
    shutil.copy(src_path, dst_path)

def subdir(name):
    return os.path.join(tacotron_dir, name)

def ojoin(*args):
    return os.path.join(*args)

def read_lines(path):
    with open(path) as f:
        return [l.strip() for l in f]
    
def assert_equal(a, b):
    assert a == b, f'{a} != {b}'
    

In [7]:
load_wav_to_torch(ojoin(wavs_dir, 'LJSpeech_370.wav'))

(90112,)


(tensor([ 2.2914e-04,  2.9328e-04,  3.0282e-04,  ...,  2.1688e-04,
         -1.9058e-05,  4.4397e-05]), 22050)

In [8]:
load_wav_to_torch(ojoin(tacotron_dir, 'LJSpeech-1.1', 'wavs','LJ013-0227.wav'))

(149917,)


(tensor([-7., -9., -3.,  ..., 18., 11.,  9.]), 22050)

In [23]:
import librosa
import IPython.display as ipd

def fix_sr_and_trim(wav_file, top_db=27, norm=False, hop_length=512):
    y_orig, sr = librosa.load(wav_file, sr=22050)
    y, index = librosa.effects.trim(y_orig, hop_length=hop_length, top_db=top_db)
    # print(f'trimmed {len(y_orig[index[0]:index[1]])} samples')
    librosa.output.write_wav(wav_file, y, 22050, norm=norm)

In [14]:
wav_file = ojoin(wavs_dir, 'LJSpeech_10.wav')
dst_path = '/tmp/hifiveson.wav'
copy_file(wav_file, dst_path)

print('before fix_sr_and_trim')
ipd.Audio(dst_path, rate=22050, autoplay=True)

before fix_sr_and_trim


In [15]:
fixed_dst_path = '/tmp/hifiveson_fixed.wav'
copy_file(dst_path, fixed_dst_path)
fix_sr_and_trim(fixed_dst_path, top_db=25, norm=True)

print('after fix_sr_and_trim')
ipd.Audio(fixed_dst_path, rate=22050, autoplay=True)

after fix_sr_and_trim


In [25]:
fixed_dst_path = '/tmp/hifiveson_fixed.wav'
copy_file(dst_path, fixed_dst_path)
fix_sr_and_trim(fixed_dst_path, top_db=25, hop_length=256, norm=True)

print('after fix_sr_and_trim with 256 hop')
ipd.Audio(fixed_dst_path, rate=22050,  autoplay=True)

after fix_sr_and_trim with 256 hop


In [16]:
import os
import sys
import torch

tacotron_dir = '/root/Tacotron2'
sys.path.append(tacotron_dir)

def subdir(name):
    return os.path.join(tacotron_dir, name)

def ojoin(*args):
    return os.path.join(*args)

def read_lines(path):
    with open(path) as f:
        return [l.strip() for l in f]

scripts_dir = subdir('scripts')
notebooks_dir = subdir('notebooks')
data_dir = subdir('data')
my_voice_dir = subdir('data/my_voice')
wavs_dir = os.path.join(my_voice_dir, 'wavs')
mels_dir = os.path.join(my_voice_dir, 'mels')

print(f'ls {wavs_dir}:')
os.listdir(wavs_dir)

ls /root/Tacotron2/data/my_voice/wavs:


['I_really_like_dogs_8.wav',
 'LJSpeech_22.wav',
 'LJSpeech_213.wav',
 'LJSpeech_47.wav',
 'LJSpeech_27.wav',
 'LJSpeech_203.wav',
 'LJSpeech_105.wav',
 'LJSpeech_220.wav',
 'LJSpeech_126.wav',
 'LJSpeech_287.wav',
 'LJSpeech_389.wav',
 'LJSpeech_244.wav',
 'LJSpeech_318.wav',
 'LJSpeech_266.wav',
 'LJSpeech_68.wav',
 'LJSpeech_76.wav',
 'LJSpeech_70.wav',
 'LJSpeech_112.wav',
 'LJSpeech_191.wav',
 'LJSpeech_281.wav',
 'LJSpeech_387.wav',
 'LJSpeech_28.wav',
 'LJSpeech_351.wav',
 'LJSpeech_299.wav',
 'LJSpeech_211.wav',
 'LJSpeech_134.wav',
 'LJSpeech_122.wav',
 'LJSpeech_388.wav',
 'LJSpeech_8.wav',
 'LJSpeech_94.wav',
 'I_really_like_dogs_7.wav',
 'LJSpeech_305.wav',
 'LJSpeech_3.wav',
 'LJSpeech_304.wav',
 'LJSpeech_118.wav',
 'LJSpeech_184.wav',
 'LJSpeech_91.wav',
 'LJSpeech_156.wav',
 'LJSpeech_312.wav',
 'LJSpeech_308.wav',
 'LJSpeech_137.wav',
 'LJSpeech_380.wav',
 'LJSpeech_289.wav',
 'LJSpeech_311.wav',
 'LJSpeech_342.wav',
 'LJSpeech_83.wav',
 'LJSpeech_335.wav',
 'LJSpeech_

In [17]:
raw_audio_base_dir = ojoin(my_voice_dir, 'raw_audio_recordings')
print(f'raw_audio_base_dir={raw_audio_base_dir}')
os.listdir(raw_audio_base_dir)

raw_audio_base_dir=/root/Tacotron2/data/my_voice/raw_audio_recordings


['I_like_dogs',
 'I_like_dogs._I_really_like_dogs',
 'I_really_like_dogs',
 '.Trash',
 '.DS_Store',
 'LJSpeech']

In [18]:
audio_dirs = [x for x in os.listdir(raw_audio_base_dir) if not x.startswith('.')]
print(f'Using audio_dirs={audio_dirs}')

dirname_to_text = {d: d.replace('_', ' ') + '.' for d in audio_dirs}
print(f'Using dirname_to_text={dirname_to_text}')

Using audio_dirs=['I_like_dogs', 'I_like_dogs._I_really_like_dogs', 'I_really_like_dogs', 'LJSpeech']
Using dirname_to_text={'I_like_dogs': 'I like dogs.', 'I_like_dogs._I_really_like_dogs': 'I like dogs. I really like dogs.', 'I_really_like_dogs': 'I really like dogs.', 'LJSpeech': 'LJSpeech.'}


Build up the filelists. These also define what text is associated with each audio file.

In [19]:
wavs_train_list_path = ojoin(my_voice_dir, 'my_voice_train_filelist.txt')
ljspeech_train_list_path = ojoin(tacotron_dir, 'filelists', 'ljs_audio_text_train_filelist.txt')

In [20]:
NUM_LJSPEECH_RECORDINGS = 400
with open(ljspeech_train_list_path) as f:
    ljspeech_text = [l.strip().split('|')[-1] for l in f][:NUM_LJSPEECH_RECORDINGS]


from pprint import pprint
pprint(ljspeech_text)

['It has used other Treasury law enforcement agents on special experiments in '
 'building and route surveys in places to which the President frequently '
 'travels.',
 'to avail himself of his powers, as it was difficult to bring home the '
 'derelictions of duties and evasion of the acts. Too much was left to the '
 'inspectors.',
 'Although Chief Rowley does not complain about the pay scale for Secret '
 'Service agents,',
 'The three officers confirm that their primary concern was crowd and traffic '
 'control,',
 'The tried and the untried, young and old, were herded together',
 'According to Marina Oswald, he thought that would help him when he got to '
 'Cuba.',
 'The proposal made was to purchase some fifty thousand square feet between '
 'Newgate, Warwick Lane, and the Sessions House,',
 'I shall seek assurances of the making and maintenance of agreements, which '
 'can be mutually relied upon,',
 'who took a carving-knife from the sideboard in the dining-room, went '
 "upstai

In [26]:
import shutil

def move_file(src_path, dst_path):
    shutil.move(src_path, dst_path)


def copy_file(src_path, dst_path):
    shutil.copy(src_path, dst_path)
    
    
wav_filelist = []
for subdir_name, text in dirname_to_text.items():
    if subdir_name == 'LJSpeech':
        print('Skipping LJSpeech...')
        continue
        
    print(f'Prepping for subdir_name={subdir_name}, text={text}')
    full_subdir_path = ojoin(raw_audio_base_dir, subdir_name)
    for i, filename in enumerate(os.listdir(full_subdir_path)):
        
        in_fpath = ojoin(full_subdir_path, filename)
        out_fname = f'{subdir_name}_{i}.wav'
        out_fpath = ojoin(wavs_dir, out_fname)
        
        fix_sr_and_trim(in_fpath, norm=True, hop_length=256)
        copy_file(in_fpath, out_fpath)
        
        wav_filelist.append(f'my_voice/wavs/{out_fname}|{text}')
        print(f'[i={i}] filename={filename}, out_fpath={out_fpath}')
        

Prepping for subdir_name=I_like_dogs, text=I like dogs.
[i=0] filename=November 25, 2019 at 3_03_33 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_0.wav
[i=1] filename=November 25, 2019 at 4_25_31 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_1.wav
[i=2] filename=November 26, 2019 at 9_03_06 AM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_2.wav
[i=3] filename=November 25, 2019 at 4_25_24 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_3.wav
[i=4] filename=November 26, 2019 at 9_03_12 AM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_4.wav
[i=5] filename=November 26, 2019 at 9_03_02 AM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_5.wav
[i=6] filename=November 25, 2019 at 3_03_36 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_6.wav
[i=7] filename=November 26, 2019 at 9_03_09 AM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/I_like_dogs_7.wav
[i=8] filename=November 

In [27]:
import re
from datetime import datetime

print(f'Prepping for subdir_name=LJSpeech')
subdir_name = 'LJSpeech'
full_subdir_path = ojoin(raw_audio_base_dir, 'LJSpeech')
files_sorted_by_datetime = sorted(
    os.listdir(full_subdir_path), 
    key=lambda x: datetime.strptime(os.path.splitext(x)[0], '%B %d, %Y at %I_%M_%S %p'))
for i, filename in enumerate(files_sorted_by_datetime):

    in_fpath = ojoin(full_subdir_path, filename)
    
    out_fname = f'{subdir_name}_{i}.wav'
    out_fpath = ojoin(wavs_dir, out_fname)

    fix_sr_and_trim(in_fpath, norm=True, hop_length=256)
    copy_file(in_fpath, out_fpath)

    wav_filelist.append(f'my_voice/wavs/{out_fname}|{ljspeech_text[i]}')
    print(f'[i={i}] filename={filename}, out_fpath={out_fpath}')        
        
wav_filelist

Prepping for subdir_name=LJSpeech
[i=0] filename=November 25, 2019 at 4_21_21 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_0.wav
[i=1] filename=November 25, 2019 at 4_21_33 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_1.wav
[i=2] filename=November 25, 2019 at 4_22_38 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_2.wav
[i=3] filename=November 25, 2019 at 4_23_09 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_3.wav
[i=4] filename=November 25, 2019 at 4_23_24 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_4.wav
[i=5] filename=November 25, 2019 at 4_23_31 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_5.wav
[i=6] filename=November 25, 2019 at 4_23_39 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_6.wav
[i=7] filename=November 25, 2019 at 4_23_51 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_7.wav
[i=8] filename=November 25, 2019 at 4_24_24 PM.wav, out_fpath=/root/Ta

[i=94] filename=November 26, 2019 at 3_15_19 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_94.wav
[i=95] filename=November 26, 2019 at 3_15_30 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_95.wav
[i=96] filename=November 26, 2019 at 3_15_42 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_96.wav
[i=97] filename=November 26, 2019 at 3_15_53 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_97.wav
[i=98] filename=November 26, 2019 at 3_16_06 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_98.wav
[i=99] filename=November 26, 2019 at 3_16_38 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_99.wav
[i=100] filename=November 26, 2019 at 3_17_23 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_100.wav
[i=101] filename=November 26, 2019 at 3_17_41 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_101.wav
[i=102] filename=November 26, 2019 at 3_17_54 PM.wav, out_fpath=/root/Tacotron2/data

[i=186] filename=November 26, 2019 at 3_35_34 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_186.wav
[i=187] filename=November 26, 2019 at 3_35_47 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_187.wav
[i=188] filename=November 26, 2019 at 3_35_59 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_188.wav
[i=189] filename=November 26, 2019 at 3_36_09 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_189.wav
[i=190] filename=November 26, 2019 at 3_36_17 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_190.wav
[i=191] filename=November 26, 2019 at 3_36_31 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_191.wav
[i=192] filename=November 26, 2019 at 3_36_45 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_192.wav
[i=193] filename=November 26, 2019 at 3_36_55 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_193.wav
[i=194] filename=November 26, 2019 at 3_37_09 PM.wav, out_fpath=/root/Ta

[i=284] filename=November 28, 2019 at 3_20_25 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_284.wav
[i=285] filename=November 28, 2019 at 3_20_44 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_285.wav
[i=286] filename=November 28, 2019 at 3_21_01 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_286.wav
[i=287] filename=November 28, 2019 at 3_21_11 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_287.wav
[i=288] filename=November 28, 2019 at 3_21_22 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_288.wav
[i=289] filename=November 28, 2019 at 3_21_31 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_289.wav
[i=290] filename=November 28, 2019 at 3_21_47 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_290.wav
[i=291] filename=November 28, 2019 at 3_21_59 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_291.wav
[i=292] filename=November 28, 2019 at 3_22_11 PM.wav, out_fpath=/root/Ta

[i=380] filename=November 28, 2019 at 3_42_42 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_380.wav
[i=381] filename=November 28, 2019 at 3_42_49 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_381.wav
[i=382] filename=November 28, 2019 at 3_42_59 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_382.wav
[i=383] filename=November 28, 2019 at 3_43_08 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_383.wav
[i=384] filename=November 28, 2019 at 3_43_20 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_384.wav
[i=385] filename=November 28, 2019 at 3_43_31 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_385.wav
[i=386] filename=November 28, 2019 at 3_43_44 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_386.wav
[i=387] filename=November 28, 2019 at 3_43_57 PM.wav, out_fpath=/root/Tacotron2/data/my_voice/wavs/LJSpeech_387.wav
[i=388] filename=November 28, 2019 at 3_44_05 PM.wav, out_fpath=/root/Ta

['my_voice/wavs/I_like_dogs_0.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_1.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_2.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_3.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_4.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_5.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_6.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_7.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs_8.wav|I like dogs.',
 'my_voice/wavs/I_like_dogs._I_really_like_dogs_0.wav|I like dogs. I really like dogs.',
 'my_voice/wavs/I_like_dogs._I_really_like_dogs_1.wav|I like dogs. I really like dogs.',
 'my_voice/wavs/I_like_dogs._I_really_like_dogs_2.wav|I like dogs. I really like dogs.',
 'my_voice/wavs/I_like_dogs._I_really_like_dogs_3.wav|I like dogs. I really like dogs.',
 'my_voice/wavs/I_like_dogs._I_really_like_dogs_4.wav|I like dogs. I really like dogs.',
 'my_voice/wavs/I_like_dogs._I_really_like_dogs_5.wav|I like dogs. I really like dogs.',
 'my_voice/wavs/I_like_do

In [33]:
import IPython.display as ipd
import numpy as np


def lookup_entry(index):
    for entry in wav_filelist:
        if f'LJSpeech_{index}.wav' in entry:
            return entry
    else:
        raise RuntimeError('this should be impossible')
        

index = np.random.randint(NUM_LJSPEECH_RECORDINGS)
print(f'index={index}')
print('expected_text:', ljspeech_text[index])
print('Entry saved in filelist:', lookup_entry(index))
ipd.Audio(ojoin(wavs_dir, f'LJSpeech_{index}.wav'), rate=22050)

index=36
expected_text: In spite of the throng, owing to the excellent arrangements made by the sheriffs,
Entry saved in filelist: my_voice/wavs/LJSpeech_36.wav|In spite of the throng, owing to the excellent arrangements made by the sheriffs,


In [34]:

def lookup_entry(index):
    for entry in wav_filelist:
        if f'LJSpeech_{index}.wav' in entry:
            return entry
    else:
        raise RuntimeError('this should be impossible')
        
def yield_ordered(step):
    for index in range(0, NUM_LJSPEECH_RECORDINGS, step):
        print(f'index={index}')
        print('expected_text:', ljspeech_text[index])
        print('Entry saved in filelist:', lookup_entry(index))
        yield ipd.Audio(ojoin(wavs_dir, f'LJSpeech_{index}.wav'), rate=22050)
        
gen = yield_ordered(step=13)

In [39]:
next(gen)

index=52
expected_text: Barber and Fletcher were both transported for life, although Fletcher declared that Barber was innocent, and had no guilty knowledge of what was being done.
Entry saved in filelist: my_voice/wavs/LJSpeech_52.wav|Barber and Fletcher were both transported for life, although Fletcher declared that Barber was innocent, and had no guilty knowledge of what was being done.


Save the filelists.

In [41]:
wavs_train_list_path = ojoin(my_voice_dir, 'my_voice_train_filelist.txt')
wavs_valid_list_path = ojoin(my_voice_dir, 'my_voice_valid_filelist.txt')

with open(wavs_train_list_path, 'w+') as f:
    f.write('\n'.join(wav_filelist[20:]) + '\n')

with open(wavs_valid_list_path, 'w+') as f:
    f.write('\n'.join(wav_filelist[:20]) + '\n')

In [42]:
len(wav_filelist)

437

# Graveyard

In [None]:
import shutil

def move_file(src_path, dst_path):
    shutil.move(src_path, dst_path)


def copy_file(src_path, dst_path):
    shutil.copy(src_path, dst_path)


wav_files = ['my_voice/wavs/i_like_dogs_0.wav|I like dogs.']
mel_files = ['my_voice/mels/i_like_dogs_0.pt|I like dogs.']
for i in range(1, 100):
    copy_file(ojoin(wavs_dir, 'i_like_dogs_0.wav'), ojoin(wavs_dir, f'i_like_dogs_{i}.wav'))
    wav_files.append(f'my_voice/wavs/i_like_dogs_{i}.wav|I like dogs.')
    
for i in range(1, 100):
    ildirld = 'i_like_dogs_i_really_like_dogs'
    copy_file(ojoin(wavs_dir, f'{ildirld}_0.wav'), ojoin(wavs_dir, f'{ildirld}_{i}.wav'))
    wav_files.append(f'my_voice/wavs/{ildirld}_{i}.wav|I like dogs. I really like dogs.')
    
for i in range(1, 100):
    ildirld = 'ildirld'
    copy_file(ojoin(wavs_dir, f'{ildirld}_0.wav'), ojoin(wavs_dir, f'{ildirld}_{i}.wav'))
    wav_files.append(f'my_voice/wavs/{ildirld}_{i}.wav|I like dogs. I really like dogs.')

with open(wavs_train_list_path, 'w+') as f:
    f.write('\n'.join(wav_files))
    
# with open(mels_train_list_path, 'w+') as f:
#     f.write('\n'.join(mel_files))

In [None]:
t, sr = load_wav_to_torch(ojoin(wavs_dir, 'I_like_dogs_0.wav'))
print(t.shape, sr)

In [None]:
p = ojoin(wavs_dir, 'I_like_dogs_0.wav')
librosa.load(p, sr=22050)[0].shape

In [None]:
fix_sample_rate(ojoin(wavs_dir, 'ildirld_0.wav'))
fix_sample_rate(ojoin(wavs_dir, 'i_like_dogs_0.wav'))
fix_sample_rate(ojoin(wavs_dir, 'i_like_dogs_i_really_like_dogs_0.wav'))

In [None]:
fix_sample_rate(ojoin(wavs_dir, 'ildirld_0.wav'))
fix_sample_rate(ojoin(wavs_dir, 'i_like_dogs_0.wav'))
fix_sample_rate(ojoin(wavs_dir, 'i_like_dogs_i_really_like_dogs_0.wav'))