In [6]:
import librosa
import tensorflow
import numpy as np
import parse_samples

In [7]:
def get_feature_vector(audio_path, n_mels=256):
    y, sr = librosa.load(audio_path)
    
    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels)
    timesteps = len(S[0])

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)
    sum_log_S = np.sum(log_S, axis=1) 
    max_ind = np.argmax(sum_log_S)
    pad_sum_log_S = np.pad(sum_log_S, (n_mels - 1 - max_ind, max_ind), 'constant', constant_values=(-80 * timesteps, -80 * timesteps))
    pad_sum_log_S_norm = pad_sum_log_S + 80 * timesteps
    pad_sum_log_S_norm = pad_sum_log_S_norm / pad_sum_log_S_norm[n_mels - 1]
    pad_sum_log_S_norm = pad_sum_log_S_norm[n_mels - 1:]
    
    return pad_sum_log_S_norm

In [11]:
flute_samples = parse_samples.get_flute_samples()
print(len(flute_samples))
violin_samples = parse_samples.get_violin_samples()
print(len(violin_samples))

data = []

print("Getting flute samples...")
for flute_sample in flute_samples:
    data += [(get_feature_vector(flute_sample.filename), -1)]
    if len(data) % 50 == 0:
        print(len(data))
    
print("Getting violin samples...")
for violin_sample in violin_samples:
    data += [(get_feature_vector(violin_sample.filename), 1)]
    if len(data) % 50 == 0:
        print(len(data))
    
print("Done.")

878
1502
Getting flute samples...
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
Getting violin samples...
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
Done.


In [12]:
np.random.shuffle(data)
train_data = data[:1600]
test_data = data[1600:]

In [15]:
x_train = []
x_test = []
y_train = []
y_test = []
for sample in train_data:
    x_train += [sample[0]]
    y_train += [sample[1]]
for sample in test_data:
    x_test += [sample[0]]
    y_test += [sample[1]]

In [17]:
import sklearn
from sklearn.svm import SVC

model = SVC()
model.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [19]:
total = 0.
correct = 0.
for (x, y) in zip(x_test, y_test):
    if model.predict([x]) == y:
        correct += 1
    total += 1
print(correct)
print(total)

576.0
780.0
