# What Type of Music Do you Like?

## Designed to take a user-selected set of songs as input and output similarities and differences across songs

### Importing Libraries

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import IPython
import IPython.display as ipd
import scipy
import librosa
import mirdata
import statistics as st
import tensorflow as tf
import jams
import madmom
import io
from pathlib import Path
import select
from shutil import rmtree
import subprocess as sp
import sys
from typing import Dict, Tuple, Optional, IO
import os
from chord_extractor.extractors import Chordino
import crepe


import basic_pitch
import pedalboard

In [None]:
# Print versions of the packages
print("Numpy version:", np.__version__)
print("Matplotlib version:", matplotlib.__version__)  # Use matplotlib directly for version
print("IPython version:", IPython.__version__)
print("Scipy version:", scipy.__version__)
print("Librosa version:", librosa.__version__)
# mirdata and mir_eval don't have a __version__ attribute in all versions
try:
    print("Mirdata version:", mirdata.__version__)
except AttributeError:
    print("Mirdata version: Version not available")
try:
    print("Mir_eval version:", mir_eval.__version__)
except AttributeError:
    print("Mir_eval version: Version not available")
print("TensorFlow version:", tf.__version__)
# jams might not have a __version__ attribute
try:
    print("JAMS version:", jams.__version__)
except AttributeError:
    print("JAMS version: Version not available")

try:
    print("Essentia version:", essentia.__version__)
except AttributeError:
    print("JAMS version: Version not available")


### Load the Audio Files

In [None]:
from IPython.display import Audio
# listen to the track
#in_path = '/Users/bcarone/PycharmProjects/WTOMDYL/uploaded_files'
in_path = '/Users/bcarone/PycharmProjects/WTOMDYL/songs/'
out_path = '/Users/bcarone/PycharmProjects/WTOMDYL/songs_separated/'
extensions = ["mp3", "wav", "ogg", "flac"]  # we will look for all those file types.

# Initialize dictionaries to store the data for each song
y_dict = {}
sr_dict = {}
stft_dict = {}
magnitude_spectrogram_dict = {}

# Initialize dictionaries to store the features for each song
chroma_dict = {}
tempo_dict = {}
pulse_clarity_dict = {}
spectral_centroids_dict = {}
spectral_bandwidth_dict = {}
spectral_flux_dict = {}
rms_energy_dict = {}

def find_files(in_path):
    out = []
    for file in Path(in_path).iterdir():
        if file.suffix.lower().lstrip(".") in extensions:
            out.append(file)
    return out

songs = [str(f) for f in find_files(in_path)]
if not songs:
    print(f"No valid audio files in {in_path}")


# Display the audio files
for song_path in songs:
    # Display audio
    y_dict[song_path], sr_dict[song_path] = librosa.load(song_path)
    # Compute the Short-Time Fourier Transform (STFT)
    stft_dict[song_path] = librosa.stft(y_dict[song_path])

    # Compute the chroma features
    chroma_dict[song_path] = librosa.feature.chroma_cqt(y=y_dict[song_path], sr=sr_dict[song_path])

    # Compute the tempo
    tempo, _ = librosa.beat.beat_track(y=y_dict[song_path], sr=sr_dict[song_path])
    tempo_dict[song_path] = tempo

    # Compute onset envelope and pulse clarity
    onset_env = librosa.onset.onset_strength(y=y_dict[song_path], sr=sr_dict[song_path])
    pulse_clarity_dict[song_path] = np.std(onset_env)

    # Compute timbre-related features
    spectral_centroids_dict[song_path] = librosa.feature.spectral_centroid(y=y_dict[song_path], sr=sr_dict[song_path])
    spectral_bandwidth_dict[song_path] = librosa.feature.spectral_bandwidth(y=y_dict[song_path], sr=sr_dict[song_path])

    # Compute the spectral flux
    magnitude_spectrogram_dict[song_path]= np.abs(librosa.stft(y_dict[song_path]))
    spectral_flux_dict[song_path] = np.sqrt(np.sum(np.diff(magnitude_spectrogram_dict[song_path], axis=1)**2, axis=0))

    # Compute RMS energy
    rms_energy_dict[song_path] = librosa.feature.rms(y=y_dict[song_path])

    # Print extracted features for each song
    print(f"Song: {song_path}")
    print(f"Tempo: {tempo_dict[song_path]} BPM")
    print(f"Pulse Clarity (std of onset strength): {pulse_clarity_dict[song_path]}")
    print(f"Average Spectral Centroid: {np.mean(spectral_centroids_dict[song_path])}")
    print(f"Average Spectral Bandwidth: {np.mean(spectral_bandwidth_dict[song_path])}")
    print(f"Spectral Flux: {np.mean(spectral_flux_dict[song_path])}")
    print(f"Average RMS Energy: {np.mean(rms_energy_dict[song_path])}")
    print("------")
    print("------")

    audio_widget = IPython.display.Audio(song_path, rate=sr_dict[song_path])
    IPython.display.display(audio_widget)

In [None]:

# Compute the short-time Fourier transform (STFT)
S = np.abs(librosa.stft(y_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3']))

# Compute the spectral centroid
spectral_centroid = librosa.feature.spectral_centroid(S=S, sr=sr_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'])

# Compute the fluctuation centroid
# First, compute the envelope of the audio signal
hop_length = 512
envelope = np.abs(librosa.onset.onset_strength(y=y_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'], sr=sr_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'], hop_length=hop_length))

# Compute the modulation spectrum
modulation_spectrum = np.abs(np.fft.fft(envelope))
frequencies = np.fft.fftfreq(len(envelope), d=1/sr_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'])

# Compute the fluctuation centroid
fluctuation_centroid = np.sum(frequencies * modulation_spectrum) / np.sum(modulation_spectrum)
print(f"Fluctuation Centroid: {fluctuation_centroid}")

peak_idx = np.argmax(envelope)
threshold = 0.1 * envelope[peak_idx]
attack_start_idx = np.where(envelope >= threshold)[0][0]
attack_time = (peak_idx - attack_start_idx) * 512 / 44100
print(attack_time)

In [None]:
# Compute the entropy
def shannon_entropy(signal):
    signal = np.abs(signal)
    signal_prob = signal / np.sum(signal)
    entropy = -np.sum(signal_prob * np.log2(signal_prob + 1e-10))  # Adding a small epsilon to avoid log(0)
    return entropy

# Calculate entropy
entropy = shannon_entropy(y_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'])
print(f"Entropy: {entropy}")

fluctuation_entropy = shannon_entropy(modulation_spectrum)
print(f"Fluctuation Entropy: {fluctuation_entropy}")
spectral_flatness = librosa.feature.spectral_flatness(y=y_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/You_DJRegard,TroySivan.mp3'])

# Print the spectral flatness
print(f"Spectral Flatness: {np.mean(spectral_flatness)}")

In [None]:



time, frequency, confidence, activation = crepe.predict(y_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'],sr_dict['/Users/bcarone/PycharmProjects/WTOMDYL/songs/Five_BrandonCarone.mp3'], viterbi=True)

In [None]:
notess = librosa.hz_to_note(frequency)

In [None]:
freqss = zip(notess, time)
freqss = set(freqss)
sorted(freqss, key=lambda x: x[1])
print(sorted(freqss, key=lambda x: x[1]))

In [None]:
time

In [None]:
import scipy.ndimage

# Load the audio file
filename = '/Users/bcarone/PycharmProjects/WTOMDYL/songs/You_DJRegard,TroySivan.mp3'
y, sr = librosa.load(filename, sr=None)

# Compute the STFT
S = np.abs(librosa.stft(y))

# Define the frequency bands
bands = [(0, 50), (50, 100), (100, 200), (200, 400), (400, 800),
         (800, 1600), (1600, 3200), (3200, 6400), (6400, 12800), (12800, 25600)]

# Function to create a band-pass filter
def band_pass_filter(S, sr, low_freq, high_freq):
    fft_frequencies = librosa.fft_frequencies(sr=sr, n_fft=S.shape[0]*2-1)
    band = np.logical_and(fft_frequencies >= low_freq, fft_frequencies < high_freq)
    S_band = np.copy(S)
    S_band[~band, :] = 0
    return S_band

# Function to compute spectral flux
def spectral_flux(S_band):
    flux = np.sqrt(np.sum(np.diff(S_band, axis=1)**2, axis=0))
    return flux

# Compute sub-band fluxes
sub_band_fluxes = []
for low, high in bands:
    S_band = band_pass_filter(S, sr, low, high)
    flux = spectral_flux(S_band)
    sub_band_fluxes.append(flux)

# Print the mean sub-band fluxes for each band
for i, flux in enumerate(sub_band_fluxes):
    print(f"Sub-band {i+1} (mean flux): {flux.mean():.4f}")

In [None]:
help(chordino.preprocess)

In [None]:
print(songs)

### Extract Global Features

In [None]:
# Compute all features, aggregate only 'mean' and 'stdev' statistics for all low-level, rhythm and tonal frame features
features, features_frames = es.MusicExtractor(lowlevelStats=['mean', 'stdev'],
                                              rhythmStats=['mean', 'stdev'],
                                              tonalStats=['mean', 'stdev'])(songs[0])

# See all feature names in the pool in a sorted order
print(sorted(features.descriptorNames()))

In [None]:
print("Filename:", features['metadata.tags.file_name'])
print("-"*80)
print("Replay gain:", features['metadata.audio_properties.replay_gain'])
print("EBU128 integrated loudness:", features['lowlevel.loudness_ebu128.integrated'])
print("EBU128 loudness range:", features['lowlevel.loudness_ebu128.loudness_range'])
print("-"*80)
print("MFCC mean:", features['lowlevel.mfcc.mean'])
print("-"*80)
print("BPM:", features['rhythm.bpm'])
print("Beat positions (sec.)", features['rhythm.beats_position'])
print("-"*80)
print("Key/scale estimation (using a profile specifically suited for electronic music):",
      features['tonal.key_edma.key'], features['tonal.key_edma.scale'])

In [None]:
def analyze_audio(file):
    # Load audio using Essentia's MonoLoader
    loader = es.MonoLoader(filename=file)
    audio = loader()

    # Basic audio features
    rhythm_extractor = es.RhythmExtractor2013()
    bpm, beats, beats_confidence, _, _ = rhythm_extractor(audio)

    loudness_extractor = es.Loudness()
    loudness = loudness_extractor(audio)

    key_extractor = es.KeyExtractor()
    key, scale, strength = key_extractor(audio)

    # Advanced spectral features
    spec = es.Spectrum()(audio)
    centroid = es.CentralMoments()(spec)
    spread = es.DistributionShape()(centroid)[1]  # Spread is the second value returned by DistributionShape

    # Pack all features into a dictionary
    features = {
        'tempo': bpm,
        'loudness': loudness,
        'key': key + ' ' + scale,
        'key_strength': strength,
        'spectral_centroid': centroid,
        'spectral_spread': spread
    }
    return features


In [None]:
analyze_audio(songs[0])

### Downbeats and onset detection

In [None]:
def estimate_beats(audio_path, onset_type, hop_length=512):
    """Compute beat positions using either a spectral flux or a machine learned onset novelty function,
    followed by computing a tempogram and PLP.

    When onset_type = 'spectral_flux', use librosa.onset.onset strength to compute the novelty function.
    When onset_type = 'machine_learning', use madmom.features.beats.RNNBeatProcessor() to compute the
    novelty/activation function.


    Parameters
    ----------
    audio_path : str
        Path to input audio file
    onset_type : str
        One of 'spectral_flux' or 'machine_learning'

    Returns
    -------
    beat_times : 1-d np.array
        Array of time stamps of the estimated beats in seconds.
    activation : 1-d np.array
        Array with the activation (or novelty function) values.
"""
    proc = madmom.features.beats.RNNBeatProcessor()
    db_proc = madmom.features.beats.DBNBeatTrackingProcessor(fps=100)
    activation = proc(audio_path)
    beat_times = db_proc(activation)


    return beat_times, activation

In [None]:
for song_path in songs:
  # run this code to test your function
  print("Estimating beats with a machine learned activation function...")
  example_beats_ml, novelty_ml = estimate_beats(song_path, 'machine_learning')
  print(f"The first 5 detected ML beats are at {example_beats_ml[:5]}")

In [None]:
def plot_and_sonify_track(track_id):
    """For a given track ID:
    Plot the machine learned activation function + the estimated beats as vertical lines.

    Additionally, generate a click track at the beat positions and add the resulting audio file to the original audio.

    Parameters
    ----------
    track_id : str
        GTZAN track_id

    Returns
    -------
    None
    """
    # Estimate beats using machine learning method
    beats_ml, novelty_ml = estimate_beats(track_id, 'machine_learning')
    frame_time_ml = np.arange(0, len(novelty_ml)/100, 1/100)

    # Load the audio for waveform plotting
    y, sr = librosa.load(track_id)

    # Plotting
    fig, ax = plt.subplots(nrows=1, figsize=(18,5))
    fig.suptitle('Machine Learned Beat Estimation')

    # Machine Learned activation and beat plot
    ax.plot(frame_time_ml, novelty_ml, label='Machine Learning Activation')
    ax.vlines(beats_ml, ymin = min(novelty_ml), ymax = max(novelty_ml), label = 'Estimated Beats', color = 'red', linestyle = ':', linewidth = 2)
    ax.legend(loc='upper right')
    ax.set_title('Machine Learning Beats')
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Activation')
    ax.set_xlim([0, len(y)/sr])  # Adjust the x-axis limits to the length of the audio

    plt.show()

    # Sonification
    y_beats_ml = librosa.clicks(times = beats_ml, sr=sr, click_freq=1000.0, click_duration=0.1, length = len(y))
    mixed_audio = 0.6 * y + 0.25 * y_beats_ml  # Mixing original audio with click track

    # Display mixed audio with beats
    audio_widget = IPython.display.Audio(mixed_audio, rate=sr)
    IPython.display.display(audio_widget)

In [None]:
# Assuming 'dataset' is your dataset object and 'track_id' is available
for song_path in songs:
  # run this code to test your function
  print("Estimating beats with a machine learned activation function...")
  example_beats_ml, novelty_ml = estimate_beats(song_path, 'machine_learning')
  print(f"The first 5 detected ML beats are at {example_beats_ml[:5]}")
  plot_and_sonify_track(song_path)

### Chroma Features

In [None]:
def compare_chroma(audio_path):
    """Compute classic chroma features (using librosa) and machine learned chroma (using madmom),
    using default parameters for both.

    Plot the chroma as two subplots, where the first row shows classic chroma and the second
    shows machine learned chroma. For both, the x axis should represent time and the y axis pitch.
    The subplots should have appropriate titles and axis labels.

    Parameters
    ----------
    audio_path : str
        Path to input audio file

    Returns
    -------
    None

    """
    # load and upsample audio since madmom seems to expect 44.1kHz

    print(f"Song: {audio_path}")
    y, sr = librosa.load(audio_path)
    y_441 = librosa.resample(y, orig_sr = sr, target_sr = 44100)
    # use madmom.audio.chroma.DeepChromaProcesser for machine learned chroma

    dcp = madmom.audio.chroma.DeepChromaProcessor()
    ml_chroma = dcp(y_441)

    # plot both chromas in the same plot, using plt.subplots

    # Plotting
    fig, ax = plt.subplots(nrows=1, figsize=(18,5))
    fig.suptitle(f"Song: {audio_path}")
    img = librosa.display.specshow(ml_chroma.T, hop_length=2048, y_axis='chroma', x_axis='time', ax=ax)
    fig.colorbar(img, ax=[ax])

    plt.show()


In [None]:
for path in songs:
  compare_chroma(path)

In [None]:
# Customize the following options!
model = "htdemucs"
two_stems = None   # only separate one stems from the rest, for instance
# two_stems = "vocals"  # will separate vocals from the rest. You can also use "drums", "bass" or "other".

# Options for the output audio.
mp3 = True
mp3_rate = 320
float32 = False  # output as float 32 wavs, unsused if 'mp3' is True.
int24 = False    # output as int24 wavs, unused if 'mp3' is True.
# You cannot set both `float32 = True` and `int24 = True` !!

In [None]:
def find_files(in_path):
    out = []
    for file in Path(in_path).iterdir():
        if file.suffix.lower().lstrip(".") in extensions:
            out.append(file)
    return out

def copy_process_streams(process: sp.Popen):
    def raw(stream: Optional[IO[bytes]]) -> IO[bytes]:
        assert stream is not None
        if isinstance(stream, io.BufferedIOBase):
            stream = stream.raw
        return stream

    p_stdout, p_stderr = raw(process.stdout), raw(process.stderr)
    stream_by_fd: Dict[int, Tuple[IO[bytes], io.StringIO, IO[str]]] = {
        p_stdout.fileno(): (p_stdout, sys.stdout),
        p_stderr.fileno(): (p_stderr, sys.stderr),
    }
    fds = list(stream_by_fd.keys())

    while fds:
        # `select` syscall will wait until one of the file descriptors has content.
        ready, _, _ = select.select(fds, [], [])
        for fd in ready:
            p_stream, std = stream_by_fd[fd]
            raw_buf = p_stream.read(2 ** 16)
            if not raw_buf:
                fds.remove(fd)
                continue
            buf = raw_buf.decode()
            std.write(buf)
            std.flush()

def separate(inp=None, outp=None):
    inp = inp or in_path
    outp = outp or out_path
    cmd = ["python3", "-m", "demucs.separate", "-o", str(outp), "-n", model]
    if mp3:
        cmd += ["--mp3", f"--mp3-bitrate={mp3_rate}"]
    if float32:
        cmd += ["--float32"]
    if int24:
        cmd += ["--int24"]
    if two_stems is not None:
        cmd += [f"--two-stems={two_stems}"]
    files = [str(f) for f in find_files(inp)]
    if not files:
        print(f"No valid audio files in {in_path}")
        return
    print("Going to separate the files:")
    print('\n'.join(files))
    print("With command: ", " ".join(cmd))
    p = sp.Popen(cmd + files, stdout=sp.PIPE, stderr=sp.PIPE)
    copy_process_streams(p)
    p.wait()
    if p.returncode != 0:
        print("Command failed, something went wrong.")


def from_upload():
    out_path = Path('separated')
    in_path = Path('tmp_in')

    if in_path.exists():
        rmtree(in_path)
    in_path.mkdir()

    if out_path.exists():
        rmtree(out_path)
    out_path.mkdir()

    uploaded = files.upload()
    for name, content in uploaded.items():
        (in_path / name).write_bytes(content)
    separate(in_path, out_path)


In [None]:
# This can be quite slow, in particular the loading. Please be patient!
# This will separate all the files inside the htdemucs folder,
# so when you are happy with the results, remove the songs from there.
# To adjust the settings, you can change the variables at the top of the cell.
separate()

In [None]:
import tensorflow as tf

from basic_pitch.inference import predict, Model
from basic_pitch import ICASSP_2022_MODEL_PATH

model_output, midi_data, note_events = predict('/Users/bcarone/PycharmProjects/WTOMDYL/songs/Continue.wav')

#basic_pitch_model = Model(ICASSP_2022_MODEL_PATH))

#for x in range():
#    ...
#    model_output, midi_data, note_events = predict(
#        <loop-x-input-audio-path>,
#        basic_pitch_model,
#    )
#    ...


In [None]:
from basic_pitch.inference import predict_and_save

predict_and_save(
    <input-audio-path-list>,
    <output-directory>,
    <save-midi>,
    <sonify-midi>,
    <save-model-outputs>,
    <save-notes>,
)

In [None]:
songs[2]

In [None]:
# Don't do import *! (It just makes this example smaller)
from pedalboard import Pedalboard, Chorus, Reverb, Compressor, Gain, Phaser, LadderFilter, Limiter, Convolution
from pedalboard.io import AudioFile

# Read in a whole file, resampling to our desired sample rate:
samplerate = 44100.0
#If asked, add 'pedals' to whichever song of their choice
with AudioFile(songs[2]).resampled_to(samplerate) as f:
  audio = f.read(f.frames)

# Make a pretty interesting sounding guitar pedalboard:
board = Pedalboard([
    Compressor(threshold_db=-50, ratio=25),
    Gain(gain_db=30),
    Chorus(),
#    LadderFilter(mode=LadderFilter.Mode.HPF12, cutoff_hz=900),
    Phaser(),
    #Convolution("./guitar_amp.wav", 1.0),
    Reverb(room_size=0.25),
])

# Pedalboard objects behave like lists, so you can add plugins:
board.append(Compressor(threshold_db=-25, ratio=10))
board.append(Gain(gain_db=10))
board.append(Limiter())

# ... or change parameters easily:
board[0].threshold_db = -40

# Run the audio through this pedalboard!
effected = board(audio, samplerate)

# Write the audio back as a wav file:
with AudioFile('processed-output.wav', 'w', samplerate, effected.shape[0]) as f:
  f.write(effected)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import IPython
import IPython.display as ipd
import librosa
import madmom
import io
import sys

def compare_chroma(audio_path):
    """Compute classic chroma features (using librosa) and machine learned chroma (using madmom),
    using default parameters for both.

    Plot the chroma as two subplots, where the first row shows classic chroma and the second
    shows machine learned chroma. For both, the x axis should represent time and the y axis pitch.
    The subplots should have appropriate titles and axis labels.

    Parameters
    ----------
    audio_path : str
        Path to input audio file

    Returns
    -------
    None

    """
    # load and upsample audio since madmom seems to expect 44.1kHz

    print(f"Song: {audio_path}")
    y, sr = librosa.load(audio_path)
    y_441 = librosa.resample(y, orig_sr = sr, target_sr = 44100)
    # use madmom.audio.chroma.DeepChromaProcesser for machine learned chroma

    dcp = madmom.audio.chroma.DeepChromaProcessor()
    ml_chroma = dcp(y_441)

    # plot both chromas in the same plot, using plt.subplots

    # Plotting
    fig, ax = plt.subplots(nrows=1, figsize=(18,5))
    fig.suptitle(f"Song: {audio_path}")
    img = librosa.display.specshow(ml_chroma.T, hop_length=2048, y_axis='chroma', x_axis='time', ax=ax)
    fig.colorbar(img, ax=[ax])

    plt.show()


In [None]:
from crema.analyze import analyze

jam = analyze(filename='/path/to/file.mp3')

In [None]:
def format_chord_annotations(chord_annotations):
    # Print header
    print("| {:<8} | {:<12} | {:<8} | {:<10} |".format("Time (s)", "Duration (s)", "Chord", "Confidence"))
    print("|{:->10}|{:->14}|{:->10}|{:->12}|".format("", "", "", ""))

    # Iterate over chord annotations and print each row
    for annotation in chord_annotations["annotations"]:
        for data in annotation["data"]:
            print("| {:>8.3f} | {:>12.3f} | {:<8} | {:>10.3f} |".format(data["time"], data["duration"], data["value"], data["confidence"]))

# Sample chord annotations in JSON format
chord_annotations_json = {
  "annotations": [
    {
      "annotation_metadata": {
        "curator": {
          "name": "",
          "email": ""
        },
        "annotator": {},
        "version": "a4c7d57.0",
        "corpus": "",
        "annotation_tools": "CREMA 0.2.0",
        "annotation_rules": "",
        "validation": "",
        "data_source": "program"
      },
      "namespace": "chord",
      "data": [
        {
          "time": 0.0,
          "duration": 3.5294331065759637,
          "value": "C:maj7",
          "confidence": 0.6888477802276611
        },
        {
          "time": 3.5294331065759637,
          "duration": 1.0216780045351475,
          "value": "D:min7",
          "confidence": 0.44941213726997375
        },
        {
          "time": 4.551111111111111,
          "duration": 2.693514739229025,
          "value": "C:maj7",
          "confidence": 0.5885221362113953
        },
        {
          "time": 7.244625850340136,
          "duration": 2.321995464852608,
          "value": "E:min7",
          "confidence": 0.5442448854446411
        },
        {
          "time": 9.566621315192744,
          "duration": 2.4148752834467118,
          "value": "C:maj7",
          "confidence": 0.6589974761009216
        },
        {
          "time": 11.981496598639456,
          "duration": 0.6501587301587293,
          "value": "E:min7",
          "confidence": 0.5313878059387207
        },
        {
          "time": 12.631655328798185,
          "duration": 0.5572789115646266,
          "value": "D#:min7",
          "confidence": 0.6097334027290344
        },
        {
          "time": 13.188934240362812,
          "duration": 0.8359183673469381,
          "value": "D:min7",
          "confidence": 0.7025358080863953
        },
        {
          "time": 14.02485260770975,
          "duration": 3.1579138321995472,
          "value": "C:maj7",
          "confidence": 0.7440720200538635
        },
        {
          "time": 17.182766439909297,
          "duration": 1.8575963718820852,
          "value": "E:min7",
          "confidence": 0.6710625886917114
        },
        {
          "time": 19.040362811791383,
          "duration": 2.507755102040818,
          "value": "C:maj7",
          "confidence": 0.7014257311820984
        },
        {
          "time": 21.5481179138322,
          "duration": 1.2074376417233559,
          "value": "E:min7",
          "confidence": 0.7216202020645142
        },
        {
          "time": 22.755555555555556,
          "duration": 1.1145578231292497,
          "value": "D:min7",
          "confidence": 0.7745717763900757
        },
        {
          "time": 23.870113378684806,
          "duration": 1.486077097505671,
          "value": "C:maj7",
          "confidence": 0.8393642902374268
        },
        {
          "time": 25.356190476190477,
          "duration": 1.021678004535147,
          "value": "B:7",
          "confidence": 0.676853597164154
        },
        {
          "time": 26.377868480725624,
          "duration": 2.4148752834467118,
          "value": "E:min7",
          "confidence": 0.9179165959358215
        },
        {
          "time": 28.792743764172336,
          "duration": 1.3931972789115648,
          "value": "C:maj7",
          "confidence": 0.8583433032035828
        },
        {
          "time": 30.1859410430839,
          "duration": 1.021678004535147,
          "value": "B:7",
          "confidence": 0.5581579208374023
        },
        {
          "time": 31.207619047619048,
          "duration": 1.3003174603174585,
          "value": "E:min7",
          "confidence": 0.5648024678230286
        },
        {
          "time": 32.507936507936506,
          "duration": 1.021678004535147,
          "value": "D:min7",
          "confidence": 0.629398763179779
        },
        {
          "time": 33.52961451247165,
          "duration": 1.5789569160997772,
          "value": "C:maj7",
          "confidence": 0.8511319160461426
        },
        {
          "time": 35.10857142857143,
          "duration": 0.9287981859410408,
          "value": "B:7",
          "confidence": 0.5815609097480774
        },
        {
          "time": 36.03736961451247,
          "duration": 2.4148752834467118,
          "value": "E:min7",
          "confidence": 0.9095653295516968
        },
        {
          "time": 38.45224489795918,
          "duration": 1.3931972789115648,
          "value": "C:maj7",
          "confidence": 0.8777265548706055
        },
        {
          "time": 39.84544217687075,
          "duration": 0.9287981859410408,
          "value": "B:7",
          "confidence": 0.7703116536140442
        },
        {
          "time": 40.77424036281179,
          "duration": 1.3003174603174656,
          "value": "E:min7/5",
          "confidence": 0.5140035152435303
        },
        {
          "time": 42.074557823129254,
          "duration": 1.1145578231292461,
          "value": "D:min7",
          "confidence": 0.5519083142280579
        },
        {
          "time": 43.1891156462585,
          "duration": 1.486077097505671,
          "value": "C:maj7",
          "confidence": 0.8533657789230347
        },
        {
          "time": 44.67519274376417,
          "duration": 0.9287981859410408,
          "value": "B:7",
          "confidence": 0.8011198043823242
        },
        {
          "time": 45.60399092970521,
          "duration": 2.600634920634924,
          "value": "E:min7",
          "confidence": 0.826475977897644
        },
        {
          "time": 48.204625850340136,
          "duration": 1.3003174603174585,
          "value": "C:maj7",
          "confidence": 0.8840567469596863
        },
        {
          "time": 49.504943310657595,
          "duration": 0.9287981859410479,
          "value": "B:7",
          "confidence": 0.752159059047699
        },
        {
          "time": 50.43374149659864,
          "duration": 1.2074376417233523,
          "value": "E:min7/5",
          "confidence": 0.5502618551254272
        },
        {
          "time": 51.641179138321995,
          "duration": 1.2074376417233594,
          "value": "D:min7",
          "confidence": 0.7790983319282532
        },
        {
          "time": 52.848616780045354,
          "duration": 1.4860770975056639,
          "value": "C:maj7",
          "confidence": 0.8724805116653442
        },
        {
          "time": 54.33469387755102,
          "duration": 1.021678004535147,
          "value": "B:7",
          "confidence": 0.5946125984191895
        },
        {
          "time": 55.356371882086165,
          "duration": 2.3219954648526127,
          "value": "E:min7",
          "confidence": 0.8120864033699036
        },
        {
          "time": 57.67836734693878,
          "duration": 1.57895691609977,
          "value": "C:maj7",
          "confidence": 0.8383042216300964
        },
        {
          "time": 59.25732426303855,
          "duration": 0.8359183673469417,
          "value": "B:7",
          "confidence": 0.6934025883674622
        },
        {
          "time": 60.09324263038549,
          "duration": 1.3003174603174585,
          "value": "E:min7",
          "confidence": 0.5664228200912476
        },
        {
          "time": 61.39356009070295,
          "duration": 0.6501587301587293,
          "value": "D:min7",
          "confidence": 0.5513476133346558
        },
        {
          "time": 62.04371882086168,
          "duration": 1.9504761904761878,
          "value": "C:maj7",
          "confidence": 0.8161636590957642
        },
        {
          "time": 63.994195011337865,
          "duration": 0.9287981859410479,
          "value": "B:7",
          "confidence": 0.7602234482765198
        },
        {
          "time": 64.92299319727891,
          "duration": 2.4148752834467047,
          "value": "E:min7",
          "confidence": 0.9154744744300842
        },
        {
          "time": 67.33786848072562,
          "duration": 1.486077097505671,
          "value": "C:maj7",
          "confidence": 0.8049619197845459
        },
        {
          "time": 68.82394557823129,
          "duration": 0.9287981859410479,
          "value": "B:7",
          "confidence": 0.6187557578086853
        },
        {
          "time": 69.75274376417234,
          "duration": 1.2074376417233594,
          "value": "E:min7",
          "confidence": 0.6586447358131409
        },
        {
          "time": 70.9601814058957,
          "duration": 0.8359183673469346,
          "value": "D:min7",
          "confidence": 0.5752996802330017
        },
        {
          "time": 71.79609977324263,
          "duration": 1.57895691609977,
          "value": "C:maj7",
          "confidence": 0.8874987363815308
        },
        {
          "time": 73.3750566893424,
          "duration": 1.2074376417233594,
          "value": "B:7",
          "confidence": 0.7803281545639038
        },
        {
          "time": 74.58249433106576,
          "duration": 2.4148752834467047,
          "value": "E:min7",
          "confidence": 0.9753153920173645
        },
        {
          "time": 76.99736961451246,
          "duration": 1.3003174603174728,
          "value": "C:maj7",
          "confidence": 0.7772683501243591
        },
        {
          "time": 78.29768707482994,
          "duration": 1.021678004535147,
          "value": "B:7",
          "confidence": 0.636987030506134
        },
        {
          "time": 79.31936507936508,
          "duration": 1.3003174603174585,
          "value": "E:min7",
          "confidence": 0.5974509119987488
        },
        {
          "time": 80.61968253968254,
          "duration": 0.9287981859410337,
          "value": "D:min7",
          "confidence": 0.4932454526424408
        },
        {
          "time": 81.54848072562358,
          "duration": 1.7647165532879825,
          "value": "C:maj7",
          "confidence": 0.7873513102531433
        },
        {
          "time": 83.31319727891156,
          "duration": 0.8359183673469488,
          "value": "B:7",
          "confidence": 0.767035961151123
        },
        {
          "time": 84.14911564625851,
          "duration": 2.4148752834467047,
          "value": "E:min7",
          "confidence": 0.9336474537849426
        },
        {
          "time": 86.56399092970521,
          "duration": 1.3003174603174585,
          "value": "C:maj7",
          "confidence": 0.8365268707275391
        },
        {
          "time": 87.86430839002267,
          "duration": 1.1145578231292603,
          "value": "B:7",
          "confidence": 0.6841025948524475
        },
        {
          "time": 88.97886621315193,
          "duration": 1.2074376417233452,
          "value": "E:min7",
          "confidence": 0.5621882081031799
        },
        {
          "time": 90.18630385487528,
          "duration": 1.021678004535147,
          "value": "D:min7",
          "confidence": 0.49868056178092957
        },
        {
          "time": 91.20798185941042,
          "duration": 1.5789569160997843,
          "value": "C:maj7",
          "confidence": 0.8057901859283447
        },
        {
          "time": 92.78693877551021,
          "duration": 0.9287981859410337,
          "value": "B:7",
          "confidence": 0.6728636026382446
        },
        {
          "time": 93.71573696145124,
          "duration": 2.507755102040818,
          "value": "E:min7",
          "confidence": 0.8593246340751648
        },
        {
          "time": 96.22349206349206,
          "duration": 1.2074376417233594,
          "value": "C:maj7",
          "confidence": 0.7941040992736816
        },
        {
          "time": 97.43092970521542,
          "duration": 1.1145578231292461,
          "value": "B:7",
          "confidence": 0.7425666451454163
        },
        {
          "time": 98.54548752834467,
          "duration": 1.3003174603174728,
          "value": "E:min7",
          "confidence": 0.6117319464683533
        },
        {
          "time": 99.84580498866214,
          "duration": 1.021678004535147,
          "value": "D:min7",
          "confidence": 0.6492061018943787
        },
        {
          "time": 100.86748299319729,
          "duration": 1.3931972789115576,
          "value": "C:maj7",
          "confidence": 0.8931423425674438
        },
        {
          "time": 102.26068027210884,
          "duration": 1.2074376417233594,
          "value": "B:7",
          "confidence": 0.8272048830986023
        },
        {
          "time": 103.4681179138322,
          "duration": 2.507755102040818,
          "value": "E:min7",
          "confidence": 0.8576298952102661
        },
        {
          "time": 105.97587301587302,
          "duration": 1.1145578231292461,
          "value": "C:maj7",
          "confidence": 0.817864716053009
        },
        {
          "time": 107.09043083900227,
          "duration": 1.2074376417233594,
          "value": "B:7",
          "confidence": 0.8381361365318298
        },
        {
          "time": 108.29786848072563,
          "duration": 1.1145578231292461,
          "value": "E:min7",
          "confidence": 0.4865381419658661
        },
        {
          "time": 109.41242630385487,
          "duration": 1.1145578231292603,
          "value": "D:min7",
          "confidence": 0.6319562196731567
        },
        {
          "time": 110.52698412698413,
          "duration": 1.3931972789115576,
          "value": "C:maj7",
          "confidence": 0.8528010845184326
        },
        {
          "time": 111.92018140589569,
          "duration": 1.2074376417233594,
          "value": "B:7",
          "confidence": 0.877736508846283
        },
        {
          "time": 113.12761904761905,
          "duration": 2.4148752834467047,
          "value": "E:min7",
          "confidence": 0.7154068350791931
        },
        {
          "time": 115.54249433106575,
          "duration": 1.2074376417233594,
          "value": "C:maj7",
          "confidence": 0.8440504670143127
        },
        {
          "time": 116.74993197278911,
          "duration": 1.2074376417233594,
          "value": "B:7",
          "confidence": 0.7883591651916504
        },
        {
          "time": 117.95736961451247,
          "duration": 1.1145578231292461,
          "value": "E:min7",
          "confidence": 0.6024501323699951
        },
        {
          "time": 119.07192743764172,
          "duration": 1.1145578231292603,
          "value": "D:min7",
          "confidence": 0.6945860981941223
        },
        {
          "time": 120.18648526077098,
          "duration": 1.3003174603174585,
          "value": "C:maj7",
          "confidence": 0.8463126420974731
        },
        {
          "time": 121.48680272108844,
          "duration": 1.2074376417233594,
          "value": "B:7",
          "confidence": 0.749811053276062
        },
        {
          "time": 122.6942403628118,
          "duration": 3.4365532879818517,
          "value": "E:min7",
          "confidence": 0.6062605977058411
        },
        {
          "time": 126.13079365079365,
          "duration": 4.643990929705225,
          "value": "E:7",
          "confidence": 0.6442009806632996
        }
      ],
      "sandbox": {},
      "time": 0,
      "duration": 130.73541950113378
    }
  ],
  "file_metadata": {
    "title": "",
    "artist": "",
    "release": "",
    "duration": 130.73541950113378,
    "identifiers": {},
    "jams_version": "0.3.4"
  },
  "sandbox": {}
}

# Format and print chord annotations
format_chord_annotations(chord_annotations_json)
