*Importing necessary libraries*

In [314]:
import librosa # open wav files
import librosa.display
import soundfile as sf
import matplotlib.pyplot as plt
from IPython.display import Audio
import os # operating system via python
import numpy as np  # linear algebra
import pandas as pd  # CSV file
import scipy.io.wavfile as sci_wav  # Open wav files
import re # Using regex to match

*Loading the wavesounds*

In [315]:
# Defining the global path to wav files
PATH = './cats_dogs/'

In [316]:
# Reading all the list of files in the directory path
def read_file_names(PATH):
    file_name_cats = []
    file_name_dogs = []
    for file_path in os.listdir(PATH):
        if os.path.isfile(os.path.join(PATH, file_path)):
            if 'cat' in file_path:
                file_name_cats.append(file_path)
            elif 'dog' in file_path:
                file_name_dogs.append(file_path)
    return file_name_cats, file_name_dogs

In [317]:
cats_files, dogs_files = read_file_names(PATH)

In [352]:
def read_wav_files(wav_files):
    '''Returns a list of audio waves
    Params:
        wav_files: List of .wav paths

    Returns:
        List of audio signals
    '''
    if not isinstance(wav_files, list):
        wav_files = [wav_files]
    return [librosa.load(PATH + f) for f in wav_files]
    # return [sci_wav.read(PATH + f)[1] for f in wav_files]

In [322]:
def train_test_split(file_names, split_porcentage=0.2, seed=42):
     np.random.seed(seed) # Set a seed for reproducibility
     n = len(file_names)
     k = int(n * split_porcentage)

     random_permutation = np.random.permutation(n)     
     train = [file_names[i] for i in random_permutation]

     train_file_names = train[:n - k]
     train_wav_file = read_wav_files(train_file_names)

     test_file_names = train[n - k:]
     test_wav_files = read_wav_files(test_file_names)

     return train_file_names, train_wav_file, test_file_names, test_wav_files

In [334]:
# Split train wav's from test wav's
train_cat_names, train_cat_wav, test_cat_names, test_cat_wav = train_test_split(cats_files)
train_dog_names, train_dog_wav, test_dog_names, test_dog_wav = train_test_split(dogs_files)

In [372]:
dataframe = {
     'train_cat': train_cat_wav,
     'train_dog': train_dog_wav,
     'test_cat': test_cat_wav,
     'test_dog': test_dog_wav,
}

In [373]:
def load_dataset(dataframe):
    df = dataframe
    dataset = {}
    for k in ['train_cat', 'train_dog', 'test_cat', 'test_dog']:
        v = np.concatenate([item[0] for item in df[k]], axis=0).astype('float32')

        # Compute mean and variance
        if k == 'train_cat':
            dog_std = dog_mean = 0
            cat_std, cat_mean = v.std(), v.mean()
        elif k == 'train_dog':
            dog_std, dog_mean = v.std(), v.mean()

        # Mean and variance suppression
        std, mean = (cat_std, cat_mean) if 'cat' in k else (dog_std, dog_mean)
        v = (v - mean) / std
        dataset[k] = v

        print('loaded {} with {} sec of audio'.format(k, len(v) / 16000))

    return dataset

In [374]:
load_dataset(dataframe)

loaded train_cat with 1431.456125 sec of audio
loaded train_dog with 660.626 sec of audio
loaded test_cat with 393.0434375 sec of audio
loaded test_dog with 164.100125 sec of audio


{'train_cat': array([-0.00340083, -0.00340083, -0.00340083, ..., -0.07006193,
        -0.36248338, -0.26494327], dtype=float32),
 'train_dog': array([-0.02417978, -0.02697763, -0.02313133, ...,  0.00651677,
         0.01868594,  0.0135517 ], dtype=float32),
 'test_cat': array([-0.00340083, -0.00340083, -0.00340083, ..., -0.00161592,
        -0.00312608, -0.00340083], dtype=float32),
 'test_dog': array([-0.01475155, -0.01616455, -0.01420115, ..., -0.00322242,
        -0.0046466 , -0.00214317], dtype=float32)}

In [394]:
cat_train_df = pd.DataFrame({
     'train_cat_name': train_cat_names,
     'train_cat_wav': [item[0] for item in train_cat_wav],
     'sample_rate': [item[1] for item in train_cat_wav] 
})
cat_test_df = pd.DataFrame({
     'test_cat_name': test_cat_names,
     'test_cat_wav': [item[0] for item in test_cat_wav],
     'sample_rate': [item[1] for item in test_cat_wav] 
})
dog_train_df = pd.DataFrame({
     'train_dog_name': train_dog_names,
     'train_dog_wav': [item[0] for item in train_dog_wav],
     'sample_rate': [item[1] for item in train_dog_wav] 
})
dog_test_df = pd.DataFrame({
     'test_dog_name': test_dog_names,
     'test_dog_wav': [item[0] for item in test_dog_wav],
     'sample_rate': [item[1] for item in test_dog_wav] 
})

In [407]:
cat_train_df.head()
# cat_train_df['train_cat_wav'][0]

Unnamed: 0,train_cat_name,train_cat_wav,sample_rate
0,cat_84.wav,"[1.3545656e-10, -2.2184848e-11, -2.6804926e-11...",22050
1,cat_68.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050
2,cat_1.wav,"[0.0036967827, 0.0040170797, 0.0015610287, 6.8...",22050
3,cat_147.wav,"[0.024267724, 0.029048927, 0.027480468, 0.0294...",22050
4,cat_33.wav,"[-0.0055221445, -0.0021317091, 0.0021610605, 0...",22050


In [396]:
dog_test_df.head()

Unnamed: 0,test_dog_name,test_dog_wav,sample_rate
0,dog_barking_11.wav,"[-0.0017590539, -0.0019561895, -0.001682266, -...",22050
1,dog_barking_9.wav,"[-2.9603589e-05, -3.141236e-05, -2.9355077e-05...",22050
2,dog_barking_92.wav,"[-0.00032149986, -0.0005554668, -0.00056589936...",22050
3,dog_barking_77.wav,"[-1.9376983e-05, -3.2515894e-05, -0.0001455399...",22050
4,dog_barking_33.wav,"[-3.1116426e-05, 4.7645976e-06, 6.317729e-05, ...",22050


*To plot some of this waves*

In [409]:
# Cat wave
librosa.display.waveshow(cat_train_df['train_cat_wav'][10], sr = cat_train_df['sample_rate'][10])

<librosa.display.AdaptiveWaveplot at 0x28335fa90>