In [38]:
import os
import numpy as np
import torch
import scipy.io.wavfile
import cupy as cp
import cusignal

from IPython.display import Audio

In [33]:
class data_from_dir(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, file_dir):
        'Initialization'
        self.file_dir = file_dir
        self.file_filenames = [ x for x in os.listdir(file_dir) ]
        
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.file_filenames)

    def __getitem__(self, index):
        'Load one sample of data'
        # Select sample
        sr, wave = scipy.io.wavfile.read(os.path.join(self.file_dir, self.file_filenames[index]))
        
        # Let's resample and scale:
        target_length = 5 # seconds
        target_sr = 48000 # sampling rate, samples per second
        wave_gpu = cp.asarray(wave)
        resampled_wave_gpu = cusignal.resample(wave_gpu, target_sr*target_length)
        resampled_wave = cp.asnumpy(resampled_wave_gpu).astype(int)
        scaled_wave = np.int16(resampled_wave/np.max(np.abs(resampled_wave)) * 32767)

        return scaled_wave

In [73]:
dataset = data_from_dir("../data/dataset/wav")
len(dataset)

78581

In [89]:
%%time
# check one file in dataset to be sure everything is good:
file_number=7
wave = dataset[file_number]
sr = 48000

print('Type:              ', type(wave))
print('Data:              ', wave)
print('Sampling rate:     ', sr)
print('Samples:           ', len(wave))
print('Audio length:      ', round(wave.size/sr, 4), 'seconds')
print('Lowest amplitude:  ', min(wave))
print('Highest amplitude: ', max(wave))
print()
print("Scaled from dataset:")
Audio(wave, rate=sr)


Type:               <class 'numpy.ndarray'>
Data:               [-1748 -1502 -2013 ... -5373 -4104 -2723]
Sampling rate:      48000
Samples:            240000
Audio length:       5.0 seconds
Lowest amplitude:   -31948
Highest amplitude:  32767

Scaled from dataset:
CPU times: user 111 ms, sys: 7.89 ms, total: 119 ms
Wall time: 117 ms


In [90]:
print("Original source:")
sr, wave = scipy.io.wavfile.read(os.path.join("../data/sliced", dataset.file_filenames[file_number]))
Audio(wave, rate=sr)

Original source:
