#### Creating Audio Item from Spectrogram Input

##### Single Spectrogram Image

In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
from cac.utils.io import read_yml
from os.path import join
from glob import glob
import matplotlib.pyplot as plt
import numpy as np

from cac.data.audio import AudioItem
from cac.data.transforms import DataProcessor
from cac.utils.viz import plot_raw_audio_signal, plot_spectrogram_image

In [2]:
DATA_DIR = "/data/wiai-facility/"
all_audio_files = glob(join(DATA_DIR, "processed", "audio", "*.wav"))

In [3]:
len(all_audio_files)

83838

#### Transform to convert audio to spectrogram

In [4]:
transforms_cfg = [
    {
        "name": "ToTensor",
        "params": {"device": "cpu"}
    },
    {
        "name": "Resample",
        "params": {
            "orig_freq": 44100,
            "new_freq": 16000
        }
    },
    {
        "name": "BackgroundNoise",
        "params": {
            "dataset_config": [
                {
                    "name": "esc-50",
                    "version": "default",
                    "mode": "all"
                }
            ],
            "min_noise_scale": 0.4,
            "max_noise_scale": 0.75
        }
    },
    {
        "name": "Spectrogram",
        "params": {
            "n_fft": 512,
            "win_length": 512,
            "hop_length": 160
        }
    },
    {
        "name": "MelScale",
        "params": {
            "n_mels": 64,
            "sample_rate": 16000,
            "f_min": 125,
            "f_max": 7500
        }
    },
    {
        "name": "AmplitudeToDB",
        "params": {}
    },
    {
        "name": "ToNumpy",
        "params": {}
    },
]

In [5]:
signal_transform = DataProcessor(transforms_cfg)

Loading items: 100%|██████████| 2000/2000 [00:00<00:00, 338810.45it/s]


In [6]:
item = AudioItem(path=all_audio_files[0])

In [7]:
signal = item.load()["signal"]
transformed_signal = signal_transform(signal)

In [8]:
path = '/data/tmp/temp_spec.npy'
np.save(path, transformed_signal)

#### Create AudioItem from npy file

In [9]:
spec = np.load(path)
spec.shape

(64, 2032)

In [19]:
from cac.data.audio import AudioItem

In [20]:
path = '/data/tmp/temp_spec.npy'

In [27]:
spec_item = AudioItem(path = path, raw_waveform = False, start = 1, end = 2)

In [28]:
spec_signal = spec_item.load()['signal']

In [29]:
spec_signal.shape

(64, 100)

In [17]:
a = [1, 2, 3]

In [18]:
a[0:-1]

[1, 2]