In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import IPython
from scipy.spatial.distance import euclidean,mahalanobis
from dtw import dtw

In [2]:
audiofile = "..\\data\\live-hip-hop-loop-81bpm-131102.mp3"

## Playing Audio inside Jupyter Notebook

In [3]:
IPython.display.Audio(audiofile)

ValueError: rate must be specified when data is a numpy array or list of audio samples.

## Create Spectrogram

**Sampling**

Sound is a continuous wave. We can digitise sound by breaking the continuous wave into discrete signals. This process is called sampling. Sampling converts a sound wave into a sequence of samples or a discrete-time signal.

The load functions loads the audio file and converts it into an array of values which represent the amplitude if a sample at a given point of time.

**Sampling Rate**

The sampling rate is the number of samples per second. Hz or Hertz is the unit of the sampling rate. 20 kHz is the audible range for human beings.

In [None]:
data, sr = librosa.load(audiofile)#, sr=22050, mono=True, offset=0.0, duration=50, res_type='kaiser_best')

In [None]:
plt.figure(figsize=(15,4))
librosa.display.waveshow(data,sr=sr, max_points=5000, axis='time', transpose=False)
# plt.xlim(0,10)

## librosa beat extraction

In [None]:
audiofile = librosa.example('nutcracker')

y, sr = librosa.load(audiofile)

# Run the default beat tracker
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

print('Estimated tempo: {:.2f} beats per minute'.format(tempo))

# Convert the frame indices of beat events into timestamps
beat_times = librosa.frames_to_time(beat_frames, sr=sr)

In [None]:
audio_len = len(y) / sr
timestamps = np.linspace(0, audio_len, num=len(y))

plt.figure(figsize=(20, 4))
plt.plot(timestamps, y)
plt.ylabel('Signal Value')
plt.xlabel('Time (s)')
plt.xlim(20, 30)
plt.vlines([beat_times], ymin=-0.5, ymax=0.5, linestyles="dashed", colors="black", linewidth=0.5)
plt.title("Beat Tracker Example: Nutcracker")
# plt.savefig("beat_extraction_sample.png")
plt.show()

In [None]:
#https://github.com/d4r3topk/comparing-audio-files-python/blob/master/mfcc.py

#Loading audio files
y1, sr1 = librosa.load('..\\data\\smooth-ac-guitar-loop-93bpm-137706.mp3') 
y2, sr2 = librosa.load('..\\data\\acoustic-guitar-loop-f-91bpm-132687.mp3') 

#Showing multiple plots using subplot
plt.subplot(1, 2, 1) 
mfcc1 = librosa.feature.mfcc(y=y1,sr=sr1)   #Computing MFCC values
librosa.display.specshow(mfcc1)

plt.subplot(1, 2, 2)
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
librosa.display.specshow(mfcc2)

dist, _, cost, path = dtw(mfcc1.T, mfcc2.T, dist=euclidean)
print("The normalized distance between the two : ",dist)   # 0 for similar audios 

plt.imshow(cost.T, origin='lower', cmap=plt.get_cmap('gray'), interpolation='nearest')
plt.plot(path[0], path[1], 'w')   #creating plot for DTW

plt.show()  #To display the plots graphically