In [1]:
%run utils.ipynb

In [2]:
import re
import glob
import math
import numpy as np
from scipy.io import wavfile

In [3]:
def default_params(params = {}):
    return {
        'bin_size': params.get('bin_size', 10),
        'threshold': params.get('threshold', 3),
        'max_frequency': params.get('max_frequency', 5000)
    }

# Data Classes

In [4]:
class DataPoint:
    
    def __init__(self, source_name, note, data, params):
        self.source_name = source_name
        self.note = note
        self.data = data
        self.params = params
        
    def __str__(self):
        return "<Note {} from {}>".format(self.note, self.source_name)
    
    def frequencies(self):
        return np.arange(0, self.params['max_frequency'], self.params['bin_size'])

class DataSet:
    
    def __init__(self, source_name, train_data, test_data):
        self.source_name = source_name
        self.train_data = train_data
        self.test_data = test_data
        
        self.X_train = [data_point.data for data_point in train_data]
        self.y_train = [data_point.note for data_point in train_data]
        self.X_test = [data_point.data for data_point in test_data]
        self.y_test = [data_point.note for data_point in test_data]

# Encoding Functions

In [5]:
def encode_wav(source_name, note, wav_data, sampling_rate, params = {}):
    params = default_params(params)
    frequencies, amplitudes = fourier_transform(wav_data, sampling_rate)
    
    mean = np.mean(amplitudes)
    stdev = np.std(amplitudes)
    threshold = mean + (params['threshold'] * stdev)
    
    data = np.zeros(math.ceil(params['max_frequency'] / params['bin_size']))
    
    max_amplitude = max(amplitudes)
    for (freq, ampl) in zip(frequencies, amplitudes):
        if ampl < threshold:
            continue
            
        if freq > params['max_frequency']:
            break
        
        # Normalize amplitude to be in the range [0, 1]
        ampl /= max_amplitude
        
        bin_idx = int(freq // params['bin_size'])
        data[bin_idx] = max(ampl, data[bin_idx])
        
    return DataPoint(source_name, note, data, params)

def encode_file(input_file, source_name, note, params = {}, start_tick=0, num_ticks=44100):
    sampling_rate, wav_data = wavfile.read(input_file)
    wav_data = wav_data[start_tick:(start_tick+num_ticks), 0]
    return encode_wav(source_name, note, wav_data, sampling_rate, params)

In [6]:
TestCode = False

if TestCode:
    %run view_frequencies.ipynb
    %matplotlib notebook
    import matplotlib.pyplot as plt
    
    input_file = './data/UprightPiano-FreePats/F#4.wav'
    data = encode_file(input_file, "UprightPiano-FreePats", ('F#', 4), {}, start_tick=70_000, num_ticks=7000)
    
    _ = plot_frequency_range(input_file, data.params['threshold'])
    
    plt.plot(data.frequencies(), data.data)
    plt.xlim(0, data.params['max_frequency'])
    plt.show()

# Encoder for UprightPiano-FreePats dataset

In [7]:
def upright_piano_note_encoder(note):
    return note[0] + str(note[1])

def upright_piano_note_decoder(note):
    return (note[:-1], int(note[-1]))

def encode_upright_piano_data():
    params = default_params()
    source_name = 'UprightPiano-FreePats'
    
    print('Extracting data from {}...'.format(source_name))
    
    # Declare constants
    num_ticks = 7000
    tick_jumps = 3500
    train_to_test_ratio = 4
    
    
    # Prepare data containers
    train_data = []
    test_data = []
    
    file_paths = list(glob.iglob('../data/UprightPiano-FreePats/*.wav'))
    for file in file_paths:
        
        sampling_rate, wav_data = wavfile.read(file)
        wav_data = wav_data[:, 0]
        
        max_tick = max(0, min(3*sampling_rate, len(wav_data)) - num_ticks)
        
        m = re.search('([\w#]+)(\d)\.wav$', file)
        note = (m.group(1), m.group(2))
        encoded_note = upright_piano_note_encoder(note)
        
        counter = 0
        for start_tick in range(0, max_tick, tick_jumps):
            data_point = encode_wav(source_name,
                                    encoded_note,
                                    wav_data[start_tick:(start_tick+num_ticks)],
                                    sampling_rate,
                                    params)
            
            if counter % train_to_test_ratio == 0:
                test_data.append(data_point)
            else:
                train_data.append(data_point)
                
            counter += 1
            
    print("Extracted {} + {} data points from {}!"
          .format(len(train_data), len(test_data), source_name))
    
    return DataSet(source_name, train_data, test_data), upright_piano_note_encoder, upright_piano_note_decoder

# Encoder for Salamander Grand Piano dataset

In [8]:
def salamander_piano_note_encoder(note):
    return note[0] + str(note[1])

def salamander_piano_note_decoder(note):
    return (note[:-1], int(note[-1]))

def encode_salamander_piano_data():
    params = default_params()
    source_name = 'SalamanderGrandPiano-FreePats'
    
    print('Extracting data from {}...'.format(source_name))
    
    # Declare constants
    num_ticks = 7000
    tick_jumps = 3500
    train_to_test_ratio = 5
    
    # Prepare data containers
    train_data = []
    test_data = []
    
    file_paths = list(glob.iglob('../data/SalamanderGrandPiano-FreePats/*.wav'))
    for file in file_paths:
        
        sampling_rate, wav_data = wavfile.read(file)
        wav_data = wav_data[:, 0]
        
        max_tick = max(0, min(sampling_rate, len(wav_data)) - num_ticks)
        
        m = re.search('([\w#]+)(\d)v(\d+)\.wav$', file)
        note = (m.group(1), m.group(2))
        volume = int(m.group(3))
        encoded_note = salamander_piano_note_encoder(note)
        
        # Skip octave 8 because the data is not good enough
        if note[1] == '8':
            continue
        
        for start_tick in range(0, max_tick, tick_jumps):
            data_point = encode_wav(source_name,
                                    encoded_note,
                                    wav_data[start_tick:(start_tick+num_ticks)],
                                    sampling_rate,
                                    params)
            
            if (volume+1) % train_to_test_ratio == 0:
                test_data.append(data_point)
            else:
                train_data.append(data_point)
            
    print("Extracted {} + {} data points from {}!"
          .format(len(train_data), len(test_data), source_name))
    
    return DataSet(source_name, train_data, test_data), salamander_piano_note_encoder, salamander_piano_note_decoder

# Encoder for Spanish Classical Guitar dataset

In [9]:
def classical_guitar_note_encoder(note):
    return note[0] + str(note[1])

def classical_guitar_note_decoder(note):
    return (note[:-1], int(note[-1]))

def encode_classical_guitar_data():
    params = default_params()
    source_name = 'SpanishClassicalGuitar-FreePats'
    
    print('Extracting data from {}...'.format(source_name))
    
    # Declare constants
    num_ticks = 7000
    tick_jumps = 3500
    train_to_test_ratio = 5
    
    # Prepare data containers
    train_data = []
    test_data = []
    
    file_paths = list(glob.iglob('../data/SpanishClassicalGuitar-FreePats/*.wav'))
    for file in file_paths:
        
        sampling_rate, wav_data = wavfile.read(file)
        #wav_data = wav_data[:, 0]
        
        max_tick = max(0, min(sampling_rate, len(wav_data)) - num_ticks)
        
        m = re.search('([\w#]+)(\d)\.wav$', file)
        note = (m.group(1), m.group(2))
        encoded_note = salamander_piano_note_encoder(note)
        
        counter = 0
        for start_tick in range(0, max_tick, tick_jumps):
            data_point = encode_wav(source_name,
                                    encoded_note,
                                    wav_data[start_tick:(start_tick+num_ticks)],
                                    sampling_rate,
                                    params)
            
            if counter % train_to_test_ratio == 0:
                test_data.append(data_point)
            else:
                train_data.append(data_point)
                
            counter += 1
            
    print("Extracted {} + {} data points from {}!"
          .format(len(train_data), len(test_data), source_name))
    
    return DataSet(source_name, train_data, test_data), classical_guitar_note_encoder, classical_guitar_note_decoder