# Listening to the audio
This code enables you to listen to a sample sine wave audio.

In [1]:
%matplotlib inline
import re
import numpy as np
import pandas as pd
import string
import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd
import numpy
import torch
import csv
from sklearn.decomposition import PCA
import itertools
from sklearn.ensemble import RandomForestClassifier

In [2]:
SAMPLE_RATE = 22050

In [3]:
T = 4.0    # seconds
t = np.linspace(0, T, int(T*SAMPLE_RATE), endpoint=False) # time variable
freq = 440
signal = np.sin(2*np.pi*freq*t)
#print len(signal)
ipd.Audio(signal, rate=SAMPLE_RATE)

# Creating spectrograms
Spectrograms provide a 2D feature space to analyze signals in the joint time-frequency domain.

This code generates the spectrogram files for use with the CNN example. It makes files for non-noised data!

In [4]:


f = open('train.csv', 'r')
reader = csv.reader(f)

# Get desired columns and rows from csv, 
# Each row is a sublist inside of lst
# Right now it is taking rows 0 to 70
# and columns 0:10
train = []
for row in itertools.islice(reader, 0, 20):
    train.append(map(float, row[0:88201]))
train=np.array(train)
#Just some re-shaping and dimension finding
#N = 1
#print "N:",N
# train = signal[np.newaxis,:]
print "Train shape",train.shape
N_train = train.shape[0]
NUM_SAMPLES = train.shape[1]-1

X_train = train[:,:-1]
y_train = train[:,-1]
y_train = y_train.reshape(N_train,1)

print X_train.shape
print y_train

Train shape (20, 88201)
(20, 88200)
[[ 5.]
 [ 0.]
 [ 8.]
 [ 8.]
 [ 5.]
 [ 0.]
 [ 4.]
 [ 9.]
 [ 9.]
 [ 0.]
 [ 9.]
 [ 2.]
 [ 9.]
 [ 0.]
 [ 8.]
 [ 5.]
 [ 0.]
 [ 9.]
 [ 7.]
 [ 0.]]


In [5]:
# JUST SOME FOURIER TRANSFORM PARAMETERS
BINS_OCTAVE = 12*2
N_OCTAVES = 7
NUM_BINS = BINS_OCTAVE * N_OCTAVES

In [6]:
# Given a wav time series, makes a mel spectrogram
# which is a short-time fourier transform with
# frequencies on the mel (log) scale.
def mel_spec(y):
    Q = librosa.cqt(y=y, sr=SAMPLE_RATE, bins_per_octave=BINS_OCTAVE,n_bins=NUM_BINS)
    Q_db = librosa.amplitude_to_db(Q,ref=np.max)
    return Q_db

In [7]:
# This means that the spectrograms are 168 rows (frequencies)
# By 173 columns (time frames)
i=19
song = X_train[i]
print y_train[i]


#print len(song)
#ipd.Audio(song, rate=SAMPLE_RATE)
#print(song)
#test_spec = mel_spec(song)
test_spec = librosa.feature.mfcc(song)
print test_spec
FEATS = test_spec.shape[0]
FRAMES = test_spec.shape[1]
print FEATS
print FRAMES

[ 0.]
[[-264.43353185 -262.19031393 -267.36055129 ..., -265.9187     -261.88793023
  -263.07349524]
 [ 136.19503887  135.43527643  129.18106832 ...,  127.70202003
   130.88045522  136.15870314]
 [ -32.94150493  -37.6044777   -34.96458227 ...,  -38.10535231
   -39.66040806  -33.65282637]
 ..., 
 [   9.37160854   11.23073764   10.66960619 ...,   10.54576475
    10.31790495    6.6092075 ]
 [  13.03338839   13.69946622    9.79281914 ...,    8.79209023
     8.81637504    7.88480787]
 [  -2.55063297    1.54369719    3.02408025 ...,   -8.33781475   -3.6540764
    -2.43447167]]
20
173


In [8]:
tmp_train = np.zeros((N_train,FEATS*FRAMES))

for i in range(N_train):
    test_spec = librosa.feature.mfcc(X_train[i])
    tmp_train[i,:] = [item for sublist in test_spec for item in sublist] #mel_spec(X_train[i])
#np.save('Data/xtrain_spec', tmp_train)


In [9]:
rf = RandomForestClassifier()
rf.fit(tmp_train,y_train)

  


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [10]:
rf.score(tmp_train, y_train)

1.0

In [11]:
f = open('train.csv', 'r')
reader = csv.reader(f)
test = []
for row in itertools.islice(reader, 100, 120):
    test.append(map(float, row[0:88201]))
test=np.array(test)


In [12]:
X_test = test[:,:-1]
y_test = test[:,-1]

In [13]:
N_test = test.shape[0]
tmp_test = np.zeros((N_test,FEATS*FRAMES))

for i in range(N_test):
    test_spec = librosa.feature.mfcc(X_test[i])
    tmp_test[i,:] = [item for sublist in test_spec for item in sublist] #mel_spec(X_train[i])

In [14]:
print tmp_test
print y_test
rf.score(tmp_test, y_test)

[[ -5.95986695e+02  -6.42503963e+02  -7.76417221e+02 ...,  -2.58987150e+00
   -4.01390748e-01  -4.93373174e-01]
 [ -9.33555991e+01  -1.00584341e+02  -1.18378927e+02 ...,   5.85014276e+00
    1.31588422e+00   2.97276621e+00]
 [ -8.61101639e+01  -8.58970280e+01  -9.07657677e+01 ...,  -5.94993920e+00
   -9.62539339e+00  -7.06046291e+00]
 ..., 
 [ -2.60929588e+02  -2.77144478e+02  -2.77196644e+02 ...,   7.57679663e+00
   -4.00908377e+00  -5.40595642e+00]
 [ -5.95986695e+02  -6.42503963e+02  -7.76417221e+02 ...,  -2.58987150e+00
   -4.01390748e-01  -4.93373174e-01]
 [ -2.19277859e+02  -2.16972481e+02  -2.29623064e+02 ...,   7.88263151e+00
    1.02938274e+00  -2.14844823e+00]]
[ 0.  7.  7.  0.  1.  7.  0.  0.  0.  0.  7.  7.  7.  4.  7.  0.  0.  9.
  0.  3.]


0.5

In [15]:
librosa.display.specshow(tmp_train[1], y_axis='hz')

IndexError: tuple index out of range