In [1]:
import wave
import numpy as np
import python_speech_features as ps
import os
import glob
import pickle
from scipy.io import wavfile

epsilon = 1e-5

def wgn(x, snr):
    snr = 10**(snr/10.0)
    xpower = np.sum(x**2)/len(x)
    npower = xpower / snr
    return np.random.randn(len(x)) * np.sqrt(npower)

def read_file(filename):
    file = wave.open(filename,'r')    
    audio_sample = wavfile.read(filename)  
    # print(audio_sample[1])  
    params = file.getparams()
    nchannels, sampwidth, framerate, wav_length = params[:4]
    str_data = file.readframes(wav_length)
    wavedata = np.fromstring(str_data, dtype = np.short)
    # librosa.load(wav_file_path + orig_wav_file, sr=sr)
    time = np.arange(0,wav_length) * (1.0/framerate)
    file.close()
    return audio_sample[1], time, framerate

def generate_label(control):
    label = 0
    if(control == 'cc'):
        label = 0
    elif(control == 'cd'):
        label = 1
    return label


filter_num = 40
_t = 300
rootdir = 'train/Full_wave_enhanced_audio'

train_label = []
train_data = []

In [2]:
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
import numpy as np

augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
])

# Generate 2 seconds of dummy audio for the sake of example
# samples = np.random.uniform(low=-0.2, high=0.2, size=(32000,)).astype(np.float32)

# Augment/transform/perturb the audio data
# augmented_samples = augment(samples=samples, sample_rate=16000)

In [3]:
rate = 44100
for control in os.listdir(rootdir):
    
    sub_dir = rootdir + '/' + control
    
    for sample in os.listdir(sub_dir):
        data = wavfile.read(sub_dir+'/'+sample)[1]
        augmented_samples = augment(samples=np.array(data, np.float64), sample_rate=rate)
        mel_spec = ps.logfbank(augmented_samples,rate,nfilt = filter_num)
        time = mel_spec.shape[0] 
        mel_spec = mel_spec[:time//_t *_t,:]
        time = mel_spec.shape[0] 
        delta1 = ps.delta(mel_spec, 2)
        delta2 = ps.delta(delta1, 2)
        for i in range(time//_t):
            begin = _t*i
            end = begin + _t
            
            part = mel_spec[begin:end,:]
            delta11 = delta1[begin:end,:]
            delta21 = delta2[begin:end,:]
            
            _data = np.empty((3,_t,filter_num),dtype = np.float32)
            
            _data[0,:,:,] = part
            _data[1,:,:] = delta11
            _data[2,:,:] = delta21
            
            train_label.append(generate_label(control))
            train_data.append(_data)






In [4]:
np.array(train_data).shape

(2541, 3, 300, 40)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(np.array(train_data),np.array(train_label), train_size=0.95)

output = './adress_pssat.pkl'
f=open(output,'wb') 
pickle.dump((X_train,y_train,X_valid,y_valid),f)
f.close() 