In [None]:
def quantize_beats(beats, onset_env, tempo, sr, hop_length, duration):
    """
    Adjusts beat times to the nearest detected onsets and creates beat and measure grids for the audio.

    This function assumes the beat times are evenly spaced within each measure and are in 4/4 time signature. It also backtracks the beats using the onset envelope to align them to the nearest detected onset.

    Parameters:
    beats : np.ndarray
        An array of beat times in frame units generated from librosa.beat.beat_track.
    onset_env : np.ndarray
        Onset envelope of the audio signal, used for backtracking beats.
    tempo : float
        Estimated tempo of the audio in beats per minute.
    sr : int
        Sampling rate of the audio signal.
    hop_length : int
        Hop length used in the onset detection and beat tracking.
    duration : float
        Duration of the audio signal in seconds.

    Returns:
    beat_grid : np.ndarray
        Array of quantized beat times.
    measure_grid : np.ndarray
        Array of quantized measure start times.
    beats_per_measure : int
        Number of beats per measure, which is set to 4 for a 4/4 time signature.

    Raises:
    ValueError: If the `beats` array is empty or not one-dimensional.
    """
    # Validate input
    if beats.ndim != 1:
        raise ValueError("The 'beats' array must be one-dimensional.")
    if beats.size == 0:
        raise ValueError("The 'beats' array must not be empty.")
    
    # Hardcoded assumption of 4/4 time signature
    beats_per_measure = 4

    # Track beats to align them to the nearest detected onset
    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=hop_length)

    # Calculate the beat interval (seconds per beat)
    beat_interval = 60.0 / tempo

    # Backtrack from the first beat to align with time 0 if necessary
    first_beat_time = 0

    # Create beat grid from the first beat time to the end of the song
    beat_grid = np.arange(first_beat_time, duration, beat_interval)

    # Ensure beat grid does not go past the duration of the song
    beat_grid = beat_grid[beat_grid <= duration]

    # Create measure grid
    measure_indices = np.arange(0, len(beat_grid), beats_per_measure)
    measure_grid = beat_grid[measure_indices]

    # Ensure measure grid does not go past the duration of the song
    measure_grid = measure_grid[measure_grid <= duration]

    return beat_grid, measure_grid


def apply_measure_grid(ax, measure_grid):
    """
    This function takes an axis object and applies measure grid lines,
    sets x-ticks to measure start times for every fourth measure starting from measure 0,
    labels them with measure numbers, and applies sub-ticks for intermediate measure times.
    
    Parameters:
    ax (matplotlib.axes.Axes): The axis object to modify.
    measure_grid (list or array): The list or array of measure start times in seconds.
    """
    measure_numbers = np.arange(len(measure_grid))
    # Adjust the list to start from measure 0 and get every fourth measure
    major_measure_indices = [i for i, measure_num in enumerate(measure_numbers) if (measure_num) % 4 == 0]
    major_measures = [measure_grid[i] for i in major_measure_indices]
    major_labels = [measure_numbers[i] for i in major_measure_indices]
    
    # Set major x-axis ticks and labels (for measure 0 and every fourth measure after)
    ax.set_xticks(major_measures, minor=False)
    ax.set_xticklabels(major_labels, minor=False)

    # Set minor x-axis ticks (for intermediate measures)
    minor_measures = [measure for i, measure in enumerate(measure_grid) if i not in major_measure_indices]
    ax.set_xticks(minor_measures, minor=True)
    
    # Overlay the major measure grid lines on the plot (for measure 0 and every fourth measure after)
    for measure_time in major_measures:
        ax.axvline(x=measure_time, color='green', linestyle='--', linewidth=2)  # Adjusted linewidth for major ticks
    
    # Overlay the minor measure grid lines on the plot (for intermediate measures)
    for measure_time in minor_measures:
        ax.axvline(x=measure_time, color='grey', linestyle=':', linewidth=1, alpha=0.8)  # Adjusted linewidth for minor ticks
    
    ax.set_xlabel('Measure Number')


# Function to detect key from a chromagram using Krumhansl-Schmuckler key-finding algorithm profiles
def detect_key_from_chromagram(chromagram, sr):
    pitches = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

    # Calculate the sum of each pitch class across all time frames
    chroma_vals = np.sum(chromagram, axis=1)

    # Krumhansl-Schmuckler key-finding algorithm profiles
    maj_profile = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
    min_profile = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]

    # Correlation for major and minor keys
    maj_key_corrs = [np.corrcoef(maj_profile, np.roll(chroma_vals, i))[1, 0] for i in range(12)]
    min_key_corrs = [np.corrcoef(min_profile, np.roll(chroma_vals, i))[1, 0] for i in range(12)]

    # Combine correlations and keys
    key_corrs = maj_key_corrs + min_key_corrs
    keys = [p + ' major' for p in pitches] + [p + ' minor' for p in pitches]

    # Determine the best key
    best_idx = np.argmax(key_corrs)
    best_key = keys[best_idx]
    best_corr = key_corrs[best_idx]

    return best_key, best_corr


# Function to convert standard key into Camelot key notation
def get_camelot(key):
    # Mapping from musical key to Camelot code
    camelot_major = {
        'B': '1B', 'F#': '2B', 'C#': '3B', 'G#': '4B', 'D#': '5B',
        'A#': '6B', 'F': '7B', 'C': '8B', 'G': '9B', 'D': '10B', 'A': '11B', 'E': '12B'
    }

    camelot_minor = {
        'G#': '1A', 'D#': '2A', 'A#': '3A', 'F': '4A', 'C': '5A',
        'G': '6A', 'D': '7A', 'A': '8A', 'E': '9A', 'B': '10A', 'F#': '11A', 'C#': '12A'
    }

    # Split the detected key into pitch and mode
    pitch, mode = key.split(' ')

    # Return the corresponding Camelot code
    if mode == 'major':
        return camelot_major[pitch]
    elif mode == 'minor':
        return camelot_minor[pitch]
    else:
        raise ValueError("Invalid mode in key: should be 'major' or 'minor'.")


def get_studio_bpm(beat_frames: np.ndarray, sr: int = 22050, hop_length: int = 512,
                   variance_threshold: float = 0.01, window_length: int = 4) -> Tuple[Optional[float], Optional[float], Optional[float], np.ndarray, np.ndarray]:
    """
    Analyze the provided beat frame indices to determine the studio BPM and the start frame of stable intervals.
    
    Parameters:
    - beat_frames (np.ndarray): Array of beat frame indices.
    - sr (int): The sample rate of the audio. Default is 22050 Hz.
    - hop_length (int): The number of samples per frame. Default is 512.
    - variance_threshold (float): The threshold for the variance to consider a window of beats as stable.
    - window_length (int): The number of beats to consider within each sliding window when calculating variance.

    Returns:
    Tuple[Optional[float], Optional[float], Optional[float], np.ndarray, np.ndarray]:
        - The mean studio BPM (float or None if not determined).
        - The median studio BPM (float or None if determined).
        - The BPM that occurs most frequently near a whole number (float or None if not determined).
        - The frame indices of the first beat of each stable interval.
        - An array of beat interval durations that are considered stable.
    """
    
    # Calculate the time in seconds for each beat frame index
    beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=hop_length)

    # Calculate beat intervals
    beat_intervals = np.diff(beat_times)
    total_intervals = len(beat_intervals)

    # Store stable intervals (low-variance windows)
    stable_intervals = []
    stable_beat_indices = []

    # Calculate variance in a sliding window
    for i in range(total_intervals - window_length + 1):
        window = beat_intervals[i:i + window_length]
        if np.var(window) < variance_threshold:
            # Extend the list with intervals from the current stable window
            stable_intervals.extend(window)
            # Record the frame index of the first beat in the stable window
            stable_beat_indices.append(beat_frames[i])

    # Initialize the BPM that occurs most frequently near a whole number to None
    mode_studio_bpm = None

    # If we found any stable intervals, calculate the BPMs
    if stable_intervals:
        # Calculate BPMs for each stable interval
        stable_bpms = 60.0 / np.array(stable_intervals)
        mean_studio_bpm = np.mean(stable_bpms)
        median_studio_bpm = np.median(stable_bpms)

        # Round BPMs to the nearest whole numbers and find the mode
        rounded_bpms = np.round(stable_bpms)
        mode_bpm, count = stats.mode(rounded_bpms)
        if count > 0:
            mode_studio_bpm = mode_bpm[0]
    else:
        # No stable intervals found; return None for mean and median BPM
        mean_studio_bpm = None
        median_studio_bpm = None

    # Convert the stable beat indices to frame indices
    stable_frames = beat_frames[stable_beat_indices]

    # Return the mean and median studio BPM if calculated, the mode BPM, the frame indices of the stable beats, and the stable interval durations
    return mean_studio_bpm, median_studio_bpm, mode_studio_bpm, stable_frames, stable_intervals

In [None]:
import pandas as pd
import os
import librosa
from tqdm import tqdm
import numpy as np
from typing import Tuple, List, Optional

# Define your directory and constants 
merged_df = pd.read_csv(r'..\data\dataframes\sp_merged2.csv')
mp3_directory = r"..\data\audio_files\processed"
export_directory = r"..\data\pkl"
hop_length = 512
sr = 22050
data_list = []

# Process each song
for index, row in tqdm(merged_df.iterrows(), desc="Processing audio profiles", total=merged_df.shape[0]):
    audio_file_path = row['FilePath']
    if os.path.exists(audio_file_path):
        # Load the audio file
        y, sr = librosa.load(audio_file_path, sr=sr)
        duration = librosa.get_duration(y=y, sr=sr)
        y_harm, y_perc = librosa.effects.hpss(y)

        # Chroma profile
        chroma_cq = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
        key, key_corr = detect_key_from_chromagram(chroma_cq, sr)
        camelot = get_camelot(key)

        # Tempo/rhythm profile
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
        studio_bpm, stable_intervals = get_studio_bpm(beats)
        tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop_length)
        tempogram_ratio = librosa.feature.tempogram_ratio(tg=tempogram, sr=sr)
        
        # Quantize beats and create measure grid
        beat_grid, measure_grid = quantize_beats(beats, onset_env, studio_bpm, sr, hop_length, duration)
        measure_numbers = np.arange(len(measure_grid))
        measure_dict = {measure_number: measure_time for measure_number, measure_time in zip(measure_numbers, measure_grid)}
        
        # Data dictionary to hold features
        data_dict = {
            'SongID': row['SongID'], 
            'duration': duration, 
            'tempo': tempo, 
            'studio_bpm': studio_bpm,
            'key': key,
            'key_corr': key_corr,
            'camelot_key': camelot
        }
        
        # Append the data dictionary to the data list
        data_list.append(data_dict)

# Convert the list of dictionaries to a dataframe
new_data_df = pd.DataFrame(data_list)

# Append this new dataframe to the original dataframe (if that's what you need)
merged_df = merged_df.merge(new_data_df, on='SongID', how='left')

## Saving the audio features as pkl files

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import librosa
import librosa.display
import numpy as np
import math
from tqdm import tqdm
import pandas as pd
import gc

# Define your directory and constants 
merged_df = pd.read_csv(r'..\data\dataframes\sp_merged2.csv')

mp3_directory = r"..\data\audio_files\processed"
export_directory = r"..\data\pkl"
hop_length = 512
sr = 22050


# Process each song
for index, row in tqdm(merged_df.iterrows(), desc="Processing audio profiles", total=merged_df.shape[0]):
    audio_file_path = row['FilePath']
    if os.path.exists(audio_file_path):
        # Load the audio file
        y, sr = librosa.load(audio_file_path, sr=sr)
        duration = librosa.get_duration(y=y, sr=sr)
        y_harm, y_perc = librosa.effects.hpss(y)

        # Chroma profile
        chroma_cq = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
        key, key_corr = detect_key_from_chromagram(chroma_cq, sr)
        camelot = get_camelot(key)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr, chroma=chroma_cq)

        # Spectrogram
        D = np.abs(librosa.stft(y))**2
        S_mel = librosa.feature.melspectrogram(S=D, sr=sr)
        S_mel_db = librosa.power_to_db(S_mel, ref=np.max)
        # Centroid
        centroid_mel = librosa.feature.spectral_centroid(S=S_mel, sr=sr, hop_length=hop_length)
        # MFCC
        mfccs = librosa.feature.mfcc(S=S_mel_db)
        
        # Tempo/rhythm profile
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
        studio_bpm, stable_intervals = get_studio_bpm(beats)
        tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop_length)
        tempogram_ratio = librosa.feature.tempogram_ratio(tg=tempogram, sr=sr)
        
        # Quantize beats and create measure grid
        beat_grid, measure_grid = quantize_beats(beats, onset_env, studio_bpm, sr, hop_length, duration)
        measure_numbers = np.arange(len(measure_grid))
        measure_dict = {measure_number: measure_time for measure_number, measure_time in zip(measure_numbers, measure_grid)}

        # Data dictionary to hold features
        data = {
            'SongID': row['SongID'], 
            'duration': duration, 
            'tempo': tempo, 
            'studio_bpm': studio_bpm,
            'key': key,
            'key_corr': key_corr,
            'camelot_key': camelot,
            'stable_intervals': stable_intervals.tolist(),
            'y': y.tolist(),
            'chroma_cq': chroma_cq.tolist(),
            'tonnetz': tonnetz.tolist(),
            'S_mel_db': S_mel_db.tolist(),
            'centroid_mel': centroid_mel.tolist(),
            'mfccs': mfccs.tolist(),
            'tempogram': tempogram.tolist(),
            'tempogram_ratio': tempogram_ratio.tolist(),
            'MeasureDict': measure_dict
        }

        # Save to pickle file
        pickle_file_path = os.path.join(export_directory, f"{row['SongID']}.pkl")  # Ensure proper path joining
        pd.to_pickle(data, pickle_file_path)
    else:
        print(f"File not found: {audio_file_path}")

## Making visual plots for all songs using pkl audio features

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa.display
from tqdm import tqdm
import pickle
import gc

merged_df = pd.read_csv(r'../data/dataframes/sp_merged2.csv')
merged_df = merged_df['SongID']
export_directory = r"../figures/audio_plots"
pkl_directory = r"../data/pkl"
sr = 22050  
hop_length = 512

def apply_measure_grid(ax, measure_grid, measure_numbers):
    # Filter the major (every fourth) measures
    major_indices = measure_numbers % 4 == 0

    # Set major x-axis ticks and labels (for measure 0 and every fourth measure after)
    ax.set_xticks(measure_grid[major_indices])
    ax.set_xticklabels(measure_numbers[major_indices])

    # Set minor x-axis ticks (for intermediate measures)
    ax.set_xticks(measure_grid[~major_indices], minor=True)

    # Overlay the major measure grid lines on the plot (for measure 0 and every fourth measure after)
    ax.vlines(measure_grid[major_indices], ax.get_ylim()[0], ax.get_ylim()[1], color='green', linestyle='--', linewidth=2)

    # Overlay the minor measure grid lines on the plot (for intermediate measures)
    ax.vlines(measure_grid[~major_indices], ax.get_ylim()[0], ax.get_ylim()[1], color='grey', linestyle=':', linewidth=1, alpha=0.7)
    
    ax.set_xlabel('Measure Number')

    
def load_pickle_data(pkl_path):
    with open(pkl_path, 'rb') as file:
        return pickle.load(file)

        
for song_id in tqdm(merged_df, desc="Processing audio profiles"):
    export_fig_path = os.path.join(export_directory, f"{song_id}.png")
    pkl_path = os.path.join(pkl_directory, f"{song_id}.pkl")

    if not os.path.exists(export_fig_path):
        if os.path.exists(pkl_path):
            try:
                data = load_pickle_data(pkl_path)

                # Extracting the individual components from the data dictionary
                y_harm = np.asarray(data['y_harm'])
                y_perc = np.asarray(data['y_perc'])
                S_mel_db = np.asarray(data['S_mel_db'])
                tempogram = np.asarray(data['tempogram'])
                tempogram_ratio = np.asarray(data['tempogram_ratio'])
                chroma_cq = np.asarray(data['chroma_cq'])
                tonnetz = np.asarray(data['tonnetz'])
                duration = data['duration']
                measure_grid = np.array(list(data['MeasureDict'].values()))
                measure_numbers = np.array(list(data['MeasureDict'].keys()))
        
                # Create subplots
                fig, axs = plt.subplots(6, 1, figsize=(20, 30), dpi=125)

                # Harmonic/Percussive Waveform plot
                axs[0].plot(np.linspace(0, duration, len(y_harm)), y_harm, alpha=0.5, label='Harmonic', color='b')
                axs[0].plot(np.linspace(0, duration, len(y_perc)), y_perc, alpha=0.5, label='Percussive', color='r')
                apply_measure_grid(axs[0], measure_grid, measure_numbers)
                axs[0].set_title('Harmonic and Percussive Waveform')
                axs[0].set_xlim([0, duration])
        
                # Mel Spectrogram plot
                librosa.display.specshow(S_mel_db, sr=sr, x_axis='time', y_axis='mel', ax=axs[1], fmax=8000)
                apply_measure_grid(axs[1], measure_grid, measure_numbers)
                axs[1].set_title('Mel Spectrogram')
                # Set the y-axis limits
                axs[1].set_ylim(0, 8000)  # Assuming the fmax is 8000 Hz as specified in the specshow call
        
                # Tempogram plot
                librosa.display.specshow(tempogram, sr=sr, hop_length=512, x_axis='time', y_axis='tempo', cmap='magma', ax=axs[2])
                apply_measure_grid(axs[2], measure_grid, measure_numbers)
                axs[2].set_title('Tempogram')
        
                # Tempogram ratio
                # Define note labels for tempogram ratio
                note_labels = [
                    'Sixteenth note',
                    'Dotted sixteenth',
                    'Eighth triplet',
                    'Eighth note',
                    'Dotted eighth',
                    'Quarter triplet',
                    'Quarter note',
                    'Dotted quarter',
                    'Half triplet',
                    'Half note',
                    'Dotted half note',
                    'Whole triplet',
                    'Whole note'
                ]
                
                librosa.display.specshow(tempogram_ratio, x_axis='time', ax=axs[3], sr=sr)
                axs[3].set_xlim([0, duration])
                apply_measure_grid(axs[3], measure_grid, measure_numbers)
                axs[3].set_yticks(range(len(note_labels)))
                axs[3].set_yticklabels(note_labels)
                axs[3].set_title('Tempogram Ratio')
        
                # Chroma CQT plot
                librosa.display.specshow(chroma_cq, y_axis='chroma', x_axis='time', ax=axs[4])
                apply_measure_grid(axs[4], measure_grid, measure_numbers)
                axs[4].set_title('Chroma CQT')
        
                # Tonnetz plot
                librosa.display.specshow(tonnetz, sr=sr, hop_length=hop_length, y_axis='tonnetz', x_axis='time', ax=axs[5])
                apply_measure_grid(axs[5], measure_grid, measure_numbers)
                axs[5].set_title('Tonnetz')

                plt.tight_layout()
                plt.savefig(export_fig_path)
                plt.close(fig)
                del y_harm, y_perc, S_mel_db, tempogram, tempogram_ratio, chroma_cq, tonnetz, duration, measure_grid, measure_numbers
                gc.collect()

            except (EOFError, pickle.UnpicklingError) as e:
                print(f"Failed to load data for {song_id} due to {e} Skipping.")