In [1]:
import numpy as np
import librosa
import librosa.display
import librosa.feature
import random
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import matplotlib.pyplot as plt
import IPython.display as ipd
import glob
import os
import pandas as pd
import scipy
from scipy import stats
from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import validation_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import seaborn as sns
import jams
import matplotlib.lines as mlines
import pickle
import pprint

Using TensorFlow backend.


<font size = 4> <b> Reading all files

In [2]:
def path_gen(audio_paths, audio_type):
    all_paths = []
    for path in audio_paths:
        audio = path.split('/')[3].split('_')[2]
        if audio == audio_type:
            all_paths.append(path)
    return all_paths

def save_pkl(data, output_filename):   
    output = open(output_filename, 'wb')
    pickle.dump(data, output)
    output.close()

def read_pkl(filename):
    pkl_file = open(filename, 'rb')
    data = pickle.load(pkl_file)
    pkl_file.close()
    return data

In [3]:
# Generating paths to read all types of files
audio_paths = glob.glob('GuitarSet/audio/audio_mic/*.wav')
solo_paths = path_gen(audio_paths, 'solo')
comp_paths = path_gen(audio_paths, 'comp')

In [4]:
print (len(solo_paths))
print (len(comp_paths))

180
180


In [5]:
# Reading solo files separately (single mic, not hex pickup)
solo_audio = []
for path in solo_paths:
    audio, sr = librosa.load(path)
    solo_audio.append(audio)
solo_audio = np.array(solo_audio)

In [6]:
# Reading comp files separately (single mic, not hex pickup)
comp_audio = []
for path in comp_paths:
    audio, sr = librosa.load(path)
    comp_audio.append(audio)
comp_audio = np.array(comp_audio)

In [26]:
path = '/Volumes/Saksham - HD/Project/'
save_pkl(solo_audio, path + 'solo_audio.pkl')

In [27]:
data = read_pkl(path + 'comp_audio.pkl')

<font size = 4> <b> Clipping audio files

In [12]:
def clip_audio(audio, dur, sr, start):
    all_clips = []
    clip_len = int(dur * sr)
    pin = int(start * sr)
    for clip in audio:
        num_chunks = (len(clip)-pin)//clip_len
        for i in np.arange(num_chunks):
            clip_temp = clip[i*clip_len + pin:(i+1)*clip_len + pin]
            all_clips.append(clip_temp)
    all_clips = np.array(all_clips)
    return all_clips

def clip_QC(audio, dur, sr):
    all_clips = np.array([])
    clip_len = dur*sr
    for clip in audio:
        duration = len(clip)//clip_len
        all_clips = np.append(all_clips, duration)
    return int(np.sum(all_clips))

def gen_labels(paths, audio, dur, sr, start):
    clip_len = int(dur * sr)
    pin = int(start * sr)
    genres = np.array([])
    i = 0
    for clip in audio:
        num_chunks = (len(clip)-pin)//clip_len
        genre = paths[i].split('/')[-1].split('_')[1].split('-')[0][:-1]
        for chunk in np.arange(num_chunks):
            genres = np.append(genres, genre)
        i += 1
    return genres

In [13]:
def gen_dataset(paths, audio, dur, num_sets, sr):
    stride = float(dur)/num_sets
    pin = np.arange(num_sets) * stride
    clips_all = []
    labels_all = []
#     clips_all = np.array([])
#     labels_all = np.array([])
    for start in pin:
        clips = clip_audio(audio, dur, sr, start)
        clips_all.append(clips)
#         clips_all = np.append(clips_all, clips)
        labels = gen_labels(paths, audio, dur, sr, start)
        labels_all.append(labels)
#         labels_all = np.append(labels_all, labels)
    
    clips_all = np.array(clips_all)
    labels_all = np.array(labels_all)
    
    all_clips = []
    for clips in clips_all:
        for clip in clips:
            all_clips.append(clip)
    all_clips = np.array(all_clips)
    
    all_labels = []
    for labels in labels_all:
        for label in labels:
            all_labels.append(label)
    all_labels = np.array(all_labels)    
    
    return all_clips, all_labels

In [14]:
def gen_STFT(clips, n_fft, w, hop_length):
    X = []
    for clip in clips:
        STFT = np.abs(librosa.stft(clip, n_fft=n_fft, hop_length=hop_length, window = w))
        X.append(STFT)
    X = np.array(X)
    return X

def gen_MelSpectrogram(clips, n_fft, sr, hop_length):
    X = []
    for clip in clips:
        spect = librosa.feature.melspectrogram(y=clip, sr=sr,n_fft=n_fft, hop_length=hop_length, power = 2)
        spect = librosa.power_to_db(spect, ref=np.max)
        X.append(spect)
    X = np.array(X)

In [15]:
"""
Setting up duration and num_sets.
Duration is in seconds.
num_sets is the number of times clip-cutting process should take place. (Finding more sets of 't' second clips in the
audio by striding the starting point of clip)
"""

dur_1 = 1
dur_2 = 2
dur_3 = 3
dur_4 = 4
dur_5 = 5

num_sets_1 = 5
num_sets_2 = 5
num_sets_3 = 5
num_sets_4 = 5
num_sets_5 = 5

In [16]:
# Generating solo clips of various durations

solo_clips_1, solo_labels_1 = gen_dataset(solo_paths, solo_audio, dur_1, num_sets_1, sr)
# solo_clips_2, solo_labels_2 = gen_dataset(solo_paths, solo_audio, dur_2, num_sets_2, sr)
# solo_clips_3, solo_labels_3 = gen_dataset(solo_paths, solo_audio, dur_3, num_sets_3, sr)
# solo_clips_4, solo_labels_4 = gen_dataset(solo_paths, solo_audio, dur_4, num_sets_4, sr)
# solo_clips_5, solo_labels_5 = gen_dataset(solo_paths, solo_audio, dur_5, num_sets_5, sr)

In [19]:
# Generating comp clips of various durations

comp_clips_1, comp_labels_1 = gen_dataset(comp_paths, comp_audio, dur_1, num_sets_1, sr)
comp_clips_2, comp_labels_2 = gen_dataset(comp_paths, comp_audio, dur_2, num_sets_2, sr)
comp_clips_3, comp_labels_3 = gen_dataset(comp_paths, comp_audio, dur_3, num_sets_3, sr)
comp_clips_4, comp_labels_4 = gen_dataset(comp_paths, comp_audio, dur_4, num_sets_4, sr)
comp_clips_5, comp_labels_5 = gen_dataset(comp_paths, comp_audio, dur_5, num_sets_5, sr)

<font size = 4> <b> Generating spectogram and visual representation of audio

In [17]:
# Parameters
n_fft_1 = 2048
n_fft_2 = 1024
w_1 = np.hamming(n_fft_1)
w_2 = np.hamming(n_fft_2)
hop_length = int(0.01 * sr)

<font size = 3> STFT (Solo)

In [18]:
# STFT for 1s solo clips
X_solo_100msSTFT_1 = gen_STFT(solo_clips_1, n_fft_1, w_1, hop_length)
X_solo_50msSTFT_1 = gen_STFT(solo_clips_1, n_fft_2, w_2, hop_length)

In [None]:
X_solo_100msSTFT_1_v1 = X_solo_100msSTFT_1[:X_solo_100msSTFT_1.shape[0]//3, :, :]
X_solo_100msSTFT_1_v2 = X_solo_100msSTFT_1[X_solo_100msSTFT_1.shape[0]//3:X_solo_100msSTFT_1.shape[0] * 2//3, :, :]
X_solo_100msSTFT_1_v3 = X_solo_100msSTFT_1[X_solo_100msSTFT_1.shape[0] * 2//3:, :, :]

In [48]:
X_solo_100msSTFT_1_v1.shape

(8872, 1025, 101)

In [54]:
def save_txt(data, filename):

    # Write the array to disk
    with open(filename, 'w') as outfile:
        outfile.write('# Array shape: {0}\n'.format(data.shape))

        # Iterating through a ndimensional array produces slices along
        # the last axis. This is equivalent to data[i,:,:] in this case
        for data_slice in data:

            # The formatting string indicates that I'm writing out
            # the values in left-justified columns 7 characters in width
            # with 2 decimal places.  
            np.savetxt(outfile, data_slice, fmt='%-7.2f')

            # Writing out a break to indicate different slices...
            outfile.write('# New slice\n')

In [56]:
path = '/Volumes/Saksham - HD/Project/CNN_Pkl/X_solo_100msSTFT_1/'
save_txt(X_solo_100msSTFT_1_v1, path + 'X_solo_100msSTFT_1_v1.txt')

In [61]:
path = '/Volumes/Saksham - HD/Project/CNN_Pkl/X_solo_100msSTFT_1/'
save_pkl(X_solo_100msSTFT_1_v1,path + 'X_solo_100msSTFT_1_v1.pkl')

OSError: [Errno 22] Invalid argument

In [57]:
new_data = np.loadtxt(path + 'X_solo_100msSTFT_1_v1.txt')

In [58]:
print (new_data.shape)

(9093800, 101)


In [59]:
data = read_pkl(path + 'X_solo_100msSTFT_1_v1.pkl')

UnpicklingError: invalid load key, '#'.

In [None]:
data.shape

In [None]:
print (len(data))
print (len(data[0]))

In [None]:
# STFT for 2s solo clips
X_solo_100msSTFT_2 = gen_STFT(solo_clips_2, n_fft_1, w_1, hop_length)
X_solo_50msSTFT_2 = gen_STFT(solo_clips_2, n_fft_2, w_2, hop_length)

In [None]:
# STFT for 3s solo clips
X_solo_100msSTFT_3 = gen_STFT(solo_clips_3, n_fft_1, w_1, hop_length)
X_solo_50msSTFT_3 = gen_STFT(solo_clips_3, n_fft_2, w_2, hop_length)

In [None]:
# STFT for 4s solo clips
X_solo_100msSTFT_4 = gen_STFT(solo_clips_4, n_fft_1, w_1, hop_length)
X_solo_50msSTFT_4 = gen_STFT(solo_clips_4, n_fft_2, w_2, hop_length)

In [None]:
# STFT for 5s solo clips
X_solo_100msSTFT_5 = gen_STFT(solo_clips_5, n_fft_1, w_1, hop_length)
X_solo_50msSTFT_5 = gen_STFT(solo_clips_5, n_fft_2, w_2, hop_length)

<font size = 3> MelSpectrogram (Solo)

In [None]:
# MelSpectrogram for 1s solo clips
X_solo_100msMelSpect_1 = gen_MelSpectrogram(solo_clips_1, n_fft_1, sr, hop_length)
X_solo_50msMelSpect_1 = gen_MelSpectrogram(solo_clips_1, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 2s solo clips
X_solo_100msMelSpect_2 = gen_MelSpectrogram(solo_clips_2, n_fft_1, sr, hop_length)
X_solo_50msMelSpect_2 = gen_MelSpectrogram(solo_clips_2, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 3s solo clips
X_solo_100msMelSpect_3 = gen_MelSpectrogram(solo_clips_3, n_fft_1, sr, hop_length)
X_solo_50msMelSpect_3 = gen_MelSpectrogram(solo_clips_3, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 4s solo clips
X_solo_100msMelSpect_4 = gen_MelSpectrogram(solo_clips_4, n_fft_1, sr, hop_length)
X_solo_50msMelSpect_4 = gen_MelSpectrogram(solo_clips_4, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 5s solo clips
X_solo_100msMelSpect_5 = gen_MelSpectrogram(solo_clips_5, n_fft_1, sr, hop_length)
X_solo_50msMelSpect_5 = gen_MelSpectrogram(solo_clips_5, n_fft_2, sr, hop_length)

<font size = 3> STFT (Comp)

In [None]:
# STFT for 1s comp clips
X_comp_100msSTFT_1 = gen_STFT(comp_clips_1, n_fft_1, w_1, hop_length)
X_comp_50msSTFT_1 = gen_STFT(comp_clips_1, n_fft_2, w_2, hop_length)

In [None]:
save_pkl(X_comp_100msSTFT_1, 'X_comp_100msSTFT_1.pkl')
save_pkl(X_comp_50msSTFT_1, 'X_comp_50msSTFT_1.pkl')

In [None]:
# STFT for 2s comp clips
X_comp_100msSTFT_2 = gen_STFT(comp_clips_2, n_fft_1, w_1, hop_length)
X_comp_50msSTFT_2 = gen_STFT(comp_clips_2, n_fft_2, w_2, hop_length)

In [None]:
# STFT for 3s comp clips
X_comp_100msSTFT_3 = gen_STFT(comp_clips_3, n_fft_1, w_1, hop_length)
X_comp_50msSTFT_3 = gen_STFT(comp_clips_3, n_fft_2, w_2, hop_length)

In [None]:
# STFT for 4s comp clips
X_comp_100msSTFT_4 = gen_STFT(comp_clips_4, n_fft_1, w_1, hop_length)
X_comp_50msSTFT_4 = gen_STFT(comp_clips_4, n_fft_2, w_2, hop_length)

In [None]:
# STFT for 5s comp clips
X_comp_100msSTFT_5 = gen_STFT(comp_clips_5, n_fft_1, w_1, hop_length)
X_comp_50msSTFT_5 = gen_STFT(comp_clips_5, n_fft_2, w_2, hop_length)

<font size = 3> MelSpectrogram (Comp)

In [None]:
# MelSpectrogram for 1s comp clips
X_comp_100msMelSpect_1 = gen_MelSpectrogram(comp_clips_1, n_fft_1, sr, hop_length)
X_comp_50msMelSpect_1 = gen_MelSpectrogram(comp_clips_1, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 2s comp clips
X_comp_100msMelSpect_2 = gen_MelSpectrogram(comp_clips_2, n_fft_1, sr, hop_length)
X_comp_50msMelSpect_2 = gen_MelSpectrogram(comp_clips_2, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 3s comp clips
X_comp_100msMelSpect_3 = gen_MelSpectrogram(comp_clips_3, n_fft_1, sr, hop_length)
X_comp_50msMelSpect_3 = gen_MelSpectrogram(comp_clips_3, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 4s comp clips
X_comp_100msMelSpect_4 = gen_MelSpectrogram(comp_clips_4, n_fft_1, sr, hop_length)
X_comp_50msMelSpect_4 = gen_MelSpectrogram(comp_clips_4, n_fft_2, sr, hop_length)

In [None]:
# MelSpectrogram for 5s comp clips
X_comp_100msMelSpect_5 = gen_MelSpectrogram(comp_clips_5, n_fft_1, sr, hop_length)
X_comp_50msMelSpect_5 = gen_MelSpectrogram(comp_clips_5, n_fft_2, sr, hop_length)

<font size = 4> <b> Data Preparation

In [None]:
print (X_solo_100msSTFT_1.shape)
print (X_solo_50msSTFT_1.shape)

In [None]:
len(solo_labels_1)

In [None]:
# pkl_file = open('data.pkl', 'rb')

# data1 = pickle.load(pkl_file)
# pprint.pprint(data1)

# pkl_file.close()

In [None]:
save_pkl(X_solo_100msSTFT_1, 'X_solo_100msSTFT_1.pkl')

In [None]:
data = read_pkl('X_solo_100msSTFT_1.pkl')

In [None]:
data.shape