<a href="https://colab.research.google.com/github/jcdevaney/pyAMPACTtutorials/blob/main/04_pyAMPACT_audio_Performance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1>pyAMPACT Performance Data Calculations</h1>

In [1]:
!git clone https://github.com/jcdevaney/pyAMPACTtutorials.git
from IPython.utils import io
print('Importing libraries...')
with io.capture_output() as captured:
    !pip install --upgrade pandas
    !pip install -i https://test.pypi.org/simple/ --no-deps pyampact
    import pyampact
import pandas as pd
import numpy as np
import librosa

Cloning into 'pyAMPACTtutorials'...
remote: Enumerating objects: 433, done.[K
remote: Counting objects: 100% (256/256), done.[K
remote: Compressing objects: 100% (149/149), done.[K
remote: Total 433 (delta 155), reused 182 (delta 106), pack-reused 177[K
Receiving objects: 100% (433/433), 19.45 MiB | 16.31 MiB/s, done.
Resolving deltas: 100% (216/216), done.
Importing libraries...


## Import and align symbolic and audio files (as in 01-pyAMPACT_introduction.ipynb for details)

In [2]:
audio_file = '/content/pyAMPACTtutorials/test_files/aveMaria_seg.wav'
score_file = '/content/pyAMPACTtutorials/test_files/aveMaria_seg.mei'

y, original_sr = librosa.load(audio_file)

piece = pyampact.Score(score_file)


target_sr = 4000
win_ms = 100
hop_length = 32
width = 3
n_harm = 3

res, dtw, spec, nmat = pyampact.run_alignment(
    y, original_sr, piece, piece.nmats(), width, target_sr, n_harm, win_ms, hop_length)

### Let's examaine the contents of data_compilation.py

In [3]:
from pyampact.alignmentUtils import f0_est_weighted_sum_spec
from pyampact.performance import estimate_perceptual_parameters

# index into audio using piece.nmats() to identify the beginning and ending of
# each note (this using code copied from the data_compilation function)
# then estimate each of the performance parameters using estimate_perceptual_parameters

nmat = piece.nmats()

y, original_sr = librosa.load(audio_file)

all_note_vals = []
all_note_ids = []

for key, df in nmat.items():

    midiList = np.array(nmat[key]['MIDI'])
    loc = 1
    f0 = []
    pwr = []
    t = []
    M = []
    xf = []

    ons = res['on']
    offs = res['off']

    note_vals = []
    note_ids = []

    # # ons = np.nonzero(estimatedOns)e[0]
    # # offs = np.nonzero(estimatedOffs)[0]
    for loc in range(len(ons)):
        #Estimate f0 for a matrix (or vector) of amplitudes and frequencies
        [f0, pwr, t, M, xf] = f0_est_weighted_sum_spec(audio_file, ons[loc], offs[loc], midiList[loc], y, original_sr);
        # Estimate note-wise perceptual values
        note_vals.append(estimate_perceptual_parameters(f0, pwr, M, original_sr, 256, 1))
        note_ids.append(nmat[key].index[loc])
    all_note_vals.append(note_vals)
    all_note_ids.append(note_ids)

# calculate paramters for the first note
i = 0
loc = 0

df.loc[i,'f0Vals'] = str(all_note_vals[loc][i]['f0_vals'])

# Mean f0
df.loc[i,'meanf0'] = np.mean(all_note_vals[loc][i]['f0_vals'])

# Perceived pitch based on
# H. Gockel, B. C. Moore, and R. P. Carlyon, “Influence
# of rate of change of frequency on the overall pitch of
# frequency-modulated tones,” The Journal of the Acoustical
# Society of America, vol. 109, no. 2, pp. 701–712,665 2001
df.loc[i,'ppitch1'] = all_note_vals[loc][i]['ppitch'][0]
df.loc[i,'ppitch2'] = all_note_vals[loc][i]['ppitch'][1]

# Jitter calculated from the difference between sequential
# frame-wise f0 estimates.
df.loc[i,'jitter'] = all_note_vals[loc][i]['jitter']

# Vibrato descriptors are calculated by first computing the spectrum of the
# note-segmented f0 trace with an FFT.

# Vibrato depth is estimated by doubling the maximum absolute value in the FFT
df.loc[i,'vibratoDepth'] = all_note_vals[loc][i]['vibrato_depth']

# Vibrato rate is estimated by finding the position of the maximum
# absolute value in the FFT.
df.loc[i,'vibratoRate'] = all_note_vals[loc][i]['vibrato_rate']

df.loc[i,'pwrVals'] = str(all_note_vals[loc][i]['pwr_vals'])

# Mean power is calculated from From the note-level frame-wise power
# estimates using the arithmetic mean.
df.loc[i,'meanPwr'] = np.mean(all_note_vals[loc][i]['pwr_vals'])

# Shimmer (\(S\)) is approximated an analogous manner to  Jitter,
# but by by calculating difference between sequential frame-wise
# power estimates
df.loc[i,'shimmer'] = all_note_vals[loc][i]['shimmer']

#Timbre is estimated from the spectral representation used to calculate
# the frame-wise \fo{} and power estimates for each note.
# The relevant \texttt{librosa} functions are run on these
# spectrogram-like representations to calculate spectral centroid,
# spectral flux, spectral slope, and spectral flatness.
df.loc[i,'specCentVals'] = str(all_note_vals[loc][i]['spec_centroid'])
df.loc[i,'meanSpecCent'] = np.mean(all_note_vals[loc][i]['spec_centroid'])
df.loc[i,'specBandwidthVals'] = str(all_note_vals[loc][i]['spec_bandwidth'])
df.loc[i,'meanSpecBandwidth'] = np.mean(all_note_vals[loc][i]['spec_bandwidth'])
df.loc[i,'specContrastVals'] = str(all_note_vals[loc][i]['spec_contrast'])
df.loc[i,'meanSpecContrast'] = np.mean(all_note_vals[loc][i]['spec_contrast'])
df.loc[i,'specFlatnessVals'] = str(all_note_vals[loc][i]['spec_flatness'])
df.loc[i,'meanSpecFlatness'] = np.mean(all_note_vals[loc][i]['spec_flatness'])
df.loc[i,'specRolloffVals'] = str(all_note_vals[loc][i]['spec_rolloff'])
df.loc[i,'meanSpecRolloff'] = np.mean(all_note_vals[loc][i]['spec_rolloff'])

#You can calculates other parameters from the f0, power, or
# magnitude spectrum chunks here