In [1]:
msg = "Hello world"
print(msg)

Hello world


In [21]:
# Beat tracking example
import librosa

# Import IPython.display.Audio to play the audio
import IPython.display as ipd

# 1. Get the file path to an included audio example
filename = librosa.example('nutcracker')

# 2. Load the audio as a time series, waveform 'y', represented as a one-dimensional NumPy floating point array.
#   Store the sampling rate as 'sr' of y, that is, the number of samples per second of audio.
#   By default, all audio is mixed to mono and resampled to 22050 Hz at load time. This behavior can be overriden
#   by supplying additional arguments to librosa.load
y, sr = librosa.load(filename)
ipd.Audio(filename)

# 3. Run the default beat tracker
#   Output is an estimate of the tempo (in beats per minute)
#   and an array of frame numbers corresponding to detected beat events
#   Frames correspond to short windows of the signal y, each separated by hop_length = 512
#   samples. Librosa uses centered frames, so that the kth frame is centered around sample
#   k * hop_length
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

print('Estimated tempo: {:.2f} beats per minute'.format(tempo))

# 4. Convert the frame indices of beat events into timestamps
#   beat_times will be an array of timestamps (in seconds) corresponding to detected beat events
beat_times = librosa.frames_to_time(beat_frames, sr=sr)

Estimated tempo: 107.67 beats per minute


In [20]:
# Feature extraction sample
import numpy as np
import librosa

# Load the example clip
y, sr = librosa.load(librosa.ex('nutcracker'))

# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 512

# Using effects module, separate harmonics(tonal) and percussives(transient) into two waveforms.
# y is separated into these two time series and each have the same shape and duration as y.
# Motivation(s):
#   1. Percussive elements tend to be stronger indicators of rhythmic content, can help provide
#       stable beat tracking results
#   2. Percussive elements can pollute tonal feature representations (such as chroma) by contributing
#       energy across all frequency bands, so we'd be better of without them.
y_harmonic, y_percussive = librosa.effects.hpss(y)

# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)

# From feature module, extract the Mel-frequency cepstral coefficients from the raw signal y
# Output of this function is the matrix mfcc, which is a numpy.ndarray of shape (n_mfcc, T)
# where T denotes the track duration in frames. Note that we use the same hop_length here as
# in the beat tracker, so the detected beat_frames values correspond to columns of mfcc
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

# Use feature manipulation, delta, which computes (smoothed) first-order differences among columns
# of its input. Result matrix mfcc_delta has the same shape as the input mfcc.
mfcc_delta = librosa.feature.delta(mfcc)

# Feature manipulation, sync, which aggregates columns of its input between sample indices (e.g. beat frames)
# Here, we've vertically stacked the mfcc and mfcc_delta matrices together. The results of this operation is
# a matrix beat_mfcc_delta with the same number of rows as its input, but the number of columns depends on beat_frames.
# Each column beat_mfcc_delta[:, k] will be the average of input columns between beat_frames[k] and beat_frames[k+1].
# beat_frames will be expanded to span the full range [0, T] so that all data is accounted for.
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

# Compute chroma features from the harmonic signal
# Chromagram will be a numpy.ndarray of shape (12, T), and each row corresponds to a pitch class (e.g., C, C#, etc)
# Each column of chromagram is normalized by its peak value, though this behavior can be overridden by setting the
# norm parameter
chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)

# Syncronize chroma between beat events
# We've replaced the default aggregate operation(average, as used above for MFCCs), with the median
# In general, any statistical summarization function can be supplied here, including np.max(), np.min(), np.std()
beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median)

# Finally, stack all beat-synchronous features together
# Resulting in a feature matrix, beat_features, of shape (12+13+13, #beat intervals)
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])




beat_mfcc_delta:  [[-5.96049988e+02 -4.71975800e+02 -4.18740723e+02 ... -2.30209213e+02
  -1.81633041e+02 -4.22515228e+02]
 [ 5.29752111e+00  1.19491829e+02  1.19481026e+02 ...  6.63411026e+01
   6.66919479e+01  7.45729675e+01]
 [ 1.79374278e+00  5.01075478e+01  1.31605749e+01 ... -6.48235626e+01
  -6.93393021e+01 -2.06293182e+01]
 ...
 [-6.54889792e-02 -1.46449924e-01  5.33926859e-02 ... -1.69989929e-01
   6.22177720e-01  1.29084542e-01]
 [-2.32196786e-02 -1.37379631e-01 -1.14387721e-02 ... -2.49835059e-01
   2.49134541e-01  8.30471702e-03]
 [ 2.86480542e-02 -2.01024517e-01 -5.54223433e-02 ...  7.50200152e-02
  -6.15619838e-01  7.62829632e-02]]


In [1]:
import sys
print(sys.version)


3.8.2 (default, Nov  4 2020, 21:23:28) 
[Clang 12.0.0 (clang-1200.0.32.28)]
