## Feature Extraction

In this version I create 10 CQT images per 20ms audio segment

In [1]:
import librosa as _librosa
import librosa.display as _display
from presets import Preset

import numpy as np
import matplotlib.pyplot as plt

from os import listdir, mkdir, makedirs, rmdir
from os.path import isfile, join, isdir
from shutil import rmtree

import pickle

import pandas as pd

import jams

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

librosa = Preset(_librosa)
librosa.display = _display

duration= 0.20
strings = [ 'e string', 'A string', 'D string', 'G string', 'B string', 'E string']

notesInString = [['e string OFF', 'E2', 'F2', 'F#2', 'G2', 'G#2', 'A2', 'A#2', 'B2', 
                  'C3', 'C#3', 'D3', 'D#3', 'E3', 'F3', 'F#3', 'G3', 'G#3', 'A3', 'A#3', 'B3'],
                 ['A string OFF', 'A2', 'A#2', 'B2', 'C3', 'C#3', 'D3', 'D#3', 'E3', 
                  'F3', 'F#3', 'G3', 'G#3', 'A3', 'A#3', 'B3', 'C4', 'C#4', 'D4', 'D#4', 'E4'],
                 ['D string OFF', 'D3', 'D#3', 'E3', 'F3', 'F#3', 'G3', 'G#3', 'A3', 
                  'A#3', 'B3', 'C4', 'C#4', 'D4', 'D#4', 'E4', 'F4', 'F#4', 'G4', 'G#4', 'A4'],
                 ['G string OFF', 'G3', 'G#3', 'A3', 'A#3', 'B3', 'C4', 'C#4', 'D4', 
                  'D#4', 'E4', 'F4', 'F#4', 'G4', 'G#4', 'A4', 'A#4', 'B4', 'C5', 'C#5', 'D5'],
                 ['B string OFF', 'B3', 'C4', 'C#4', 'D4', 'D#4', 'E4', 'F4', 'F#4', 
                  'G4', 'G#4', 'A4', 'A#4', 'B4', 'C5', 'C#5', 'D5', 'D#5', 'E5', 'F5', 'F#5'],
                 ['E string OFF', 'E4', 'F4', 'F#4', 'G4', 'G#4', 'A4', 'A#4', 'B4', 
                  'C5', 'C#5', 'D5', 'D#5', 'E5', 'F5', 'F#5', 'G5', 'G#5', 'A5', 'A#5', 'B5']]

In [2]:
dataFolder = 'data/GuitarSet/audio/audio_mic'
annotationFolder = 'data/GuitarSet/annotation'

ds = pd.DataFrame({'CQT' : [], 'e_String': [], 'A_String': [], 'D_String': []
                  , 'G_String': [], 'B_String': [], 'E_String': []})

print('Creating Dataset')

def getNoteClassification(note, string):    
    if note in notesInString[string]:
        return notesInString[string].index(note)
    raise ValueError('Exception Note', note, 'not in string', string)
    
def getNextAnnotation(observations):
    # Retrieve next annotation time for each string
    annotationTime = [-1 for i in range(6)]
    annotationFreq = [-1 for i in range(6)]
    for i in range(len(annotationTime)):
        if len(observations[i]) > 0:
            annotationTime[i] = observations[i][0].time
            annotationFreq[i] = observations[i][0].value['frequency']
#     print(annotationTime)
    
    # Select next annotation, detect if there are
    # multiple annotations in this observation.
    minTime = min(annotationTime)
    for i in range(len(annotationTime)):
        if annotationTime[i] != -1:
            while len(observations[i]) > 0 and observations[i][0].time < minTime:
                observations[i].pop(0)
            
            if len(observations[i]) == 0:
                annotationTime[i] = -1
                annotationFreq[i] = -1
            else:
                if observations[i][0].time < minTime or observations[i][0].time > (minTime+duration):
                    annotationTime[i] = -1
                    annotationFreq[i] = -1
#     print(annotationTime)
    
    # Remove the annotation we just processed until next audio chunk
    for i in range(len(annotationTime)):
        while len(observations[i]) > 0 and observations[i][0].time < minTime + (duration/10):
            observations[i].pop(0)

            
#     print(annotationFreq)
    # Replace frequency value with note name
    for i in range(len(annotationFreq)):
        if annotationFreq[i] == -1 or annotationFreq[i] == 0:
            annotationFreq[i] = getNoteClassification(strings[i] + ' OFF', i)
        else:
            annotationFreq[i] = getNoteClassification(librosa.hz_to_note(annotationFreq[i]), i)
#     print(annotationFreq)
    
    if all(v == -1 for v in annotationTime):
        return None
    else:
        return [minTime, annotationFreq]

dirs = listdir(dataFolder)
dirs.sort()
for f in dirs:
    print('Processing', f)

    y, sr = librosa.load(join(dataFolder, f))
    chunk = int(sr*duration)    


    annotationFile = f.replace('_mic.wav', '.jams')
    with open(join(annotationFolder, annotationFile)) as jams_file:
        data = jams.load(jams_file)

        observations = [[] for i in range (6)]
        indexes = [0 for i in range(6)]
        for a in data['annotations']:            
            if a['namespace'] == 'pitch_contour':
                currentIndex = int(a['annotation_metadata']['data_source'])
                observations[currentIndex] = a['data']
        
        
        annotation = getNextAnnotation(observations)           
            
        while annotation:            
            curTime = annotation[0]
            offset = int(curTime*sr)
            noteData = annotation[1]

            
            noteData.insert(0, np.abs(librosa.cqt(y[offset:offset+chunk], sr=sr, 
                                       n_bins=7*12, fmin=librosa.note_to_hz('C2'),
                                       bins_per_octave=12)))

            # Filter out misshaped CQTs
            if noteData[0].shape == (84, 9):                            
                ds = ds.append(pd.DataFrame([noteData], columns=ds.columns))
            else:
                print('Dropping misshaped CQT', noteData[0].shape)
            
            nextAnnotationOK = False
            while not nextAnnotationOK:
                try: 
                    annotation = getNextAnnotation(observations)
                    nextAnnotationOK = True
                except ValueError as e:
                    print(e)

print('Done, dumping dataset...')
pickle.dump(ds, open('data/dataset5', 'wb'))
print('Data dumped.')






Creating Dataset
Processing 00_BN1-129-Eb_comp_mic.wav
Processing 00_BN1-129-Eb_solo_mic.wav
Processing 00_BN1-147-Gb_comp_mic.wav
('Exception Note', 'D2', 'not in string', 0)
('Exception Note', 'B1', 'not in string', 0)
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'C2', 'not in string', 0)
('Exception Note', 'D2', 'not in string', 0)
('Exception Note', 'A3', 'not in string', 4)
('Exception Note', 'F3', 'not in string', 3)
('Exception Note', 'D2', 'not in string', 0)
('Exception Note', 'B1', 'not in string', 0)
('Exception Note', 'B1', 'not in string', 0)
('Exception Note', 'B1', 'not in string', 0)
('Exception Note', 'D2', 'not in string', 0)
('Exception Note', 'B1', 'not in string', 0)
('Exception Note', 'D2', 'not in string', 0)
('Exception Note', 'C2', 'not in string', 0)
('Exception Note', 'B1', 'not in string', 0)
('Exception Note', 'C#2', 'not in string', 0)
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'C2', 'not in string', 0)
('Exception

('Exception Note', 'G3', 'not in string', 4)
Processing 00_Jazz1-200-B_comp_mic.wav
Processing 00_Jazz1-200-B_solo_mic.wav
Processing 00_Jazz2-110-Bb_comp_mic.wav
('Exception Note', 'A3', 'not in string', 4)
('Exception Note', 'F#3', 'not in string', 4)
('Exception Note', 'C#4', 'not in string', 5)
Dropping misshaped CQT (84, 8)
Processing 00_Jazz2-110-Bb_solo_mic.wav
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C#3', 'not in string', 2

Processing 00_SS2-88-F_comp_mic.wav
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'F#3', 'not in string', 3)
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'A3', 'not in string', 4)
('Exception Note', 'F3', 'not in string', 3)
('Exception Note', 'D#3', 'not in string', 3)
('Exception Note', 'G#2', 'not in string', 1)
Processing 00_SS2-88-F_solo_mic.wav
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'F2

('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'G#2', 'not in string', 1)
('Exception Note', 'F2', 'not in string', 1)
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'C#3', 'not in string', 2)
('Exception Note', 'A#2', 'not in string', 2)
Processing 01_Rock3-117-Bb_solo_mic.wav
Processing 01_Rock3-148-C_comp_mic.wav
Processing 01_Rock3-148-C_solo_mic.wav
Processing 01_SS1-100-C#_comp_mic.wav
Processing 01_SS1-100-C#_solo_mic.wav
Processing 01_SS1-68-E_comp_mic.wav
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'A3', 'not in string', 5)
('Exception Note', 'A3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 5)
('Exception Note', 'B3', 'not in strin

('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'C#3', 'not in string', 2)
('Exception Note', 'D#2', 'not in string', 0)
('Exception Note', 'C2', 'not in string', 0)
('Exception Note', 'C3', 'not in string', 2)
('Exception Note', 'D3', 'not in string', 3)
('Exception Note', 'D3', 'not in string', 3)
('Exception Note', 'D3', 'not in string', 3)
('Exception Note', 'D3', 'not in string', 3)
('Except

('Exception Note', 'G#2', 'not in string', 1)
('Exception Note', 'G#2', 'not in string', 1)
('Exception Note', 'G#2', 'not in string', 1)
('Exception Note', 'G#2', 'not in string', 1)
('Exception Note', 'G#2', 'not in string', 1)
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A#2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A#2', 'not in string', 2)
('Exception Note', 'C#3', 'not in string', 2)
('Exception Note', 'C#3', 'not in string', 2)
('Exception Note', 'C#3', 'not in string', 2)


Processing 03_Jazz3-137-Eb_solo_mic.wav
('Exception Note', 'C#3', 'not in string', 2)
('Exception Note', 'A#2', 'not in string', 2)
('Exception Note', 'C#3', 'not in string', 2)
('Exception Note', 'D#4', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 4)
Processing 03_Jazz3-150-C_comp_mic.wav
Processing 03_Jazz3-150-C_solo_mic.wav
Processing 03_Rock1-130-A_comp_mic.wav
('Exception Note', 'B2', 'not in string', 2)
('Exception Note', 'A3', 'not in string', 4)
('Exception Note', 'G3', 'not in string', 4)
('Exception Note', 'D#3', 'not in string', 4)
('Exception Note', 'C3', 'not in string', 4)
('Exception Note', 'C3', 'not in string', 2)
Processing 03_Rock1-130-A_solo_mic.wav
Processing 03_Rock1-90-C#_comp_mic.wav
Processing 03_Rock1-90-C#_solo_mic.wav
Processing 03_Rock2-142-D_comp_mic.wav
('Exception Note', 'D#4', 'not in string', 5)
('Exception Note', 'A#3', 'not in string', 4)
('Exception Note', 'D#4', 'not in string', 5)
('Exception Note', 'C4', 'not in string', 5)
('E

Processing 04_Rock3-117-Bb_solo_mic.wav
Processing 04_Rock3-148-C_comp_mic.wav
('Exception Note', 'E3', 'not in string', 3)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A2', 'not in string', 2)
('Exception Note', 'A#2', 'not in string', 2)
('Exception Note', 'A#2', 'not in string', 2)
Processing 04_Rock3

Processing 05_Rock2-142-D_comp_mic.wav
('Exception Note', 'G2', 'not in string', 1)
('Exception Note', 'G3', 'not in string', 4)
('Exception Note', 'G3', 'not in string', 4)
('Exception Note', 'G3', 'not in string', 4)
('Exception Note', 'G3', 'not in string', 4)
('Exception Note', 'G3', 'not in string', 4)
('Exception Note', 'G3', 'not in string', 4)
Processing 05_Rock2-142-D_solo_mic.wav
('Exception Note', 'G#2', 'not in string', 1)
Processing 05_Rock2-85-F_comp_mic.wav
('Exception Note', 'C3', 'not in string', 2)
Processing 05_Rock2-85-F_solo_mic.wav
Processing 05_Rock3-117-Bb_comp_mic.wav
('Exception Note', 'A#3', 'not in string', 4)
('Exception Note', 'C3', 'not in string', 2)
Processing 05_Rock3-117-Bb_solo_mic.wav
Processing 05_Rock3-148-C_comp_mic.wav
Processing 05_Rock3-148-C_solo_mic.wav
Processing 05_SS1-100-C#_comp_mic.wav
('Exception Note', 'E3', 'not in string', 3)
Processing 05_SS1-100-C#_solo_mic.wav
('Exception Note', 'F#3', 'not in string', 3)
Processing 05_SS1-68-E_c