In [43]:
import librosa
import numpy as np
import pandas as pd
import pickle
import os
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel

In [55]:
def round_tempo(tempo):
    """Rounds the tempo to the nearest whole number with a small tolerance for deviation."""
    rounded_tempo = round(tempo)
    if abs(tempo - rounded_tempo) <= 0.2:
        return rounded_tempo
    else:
        return int(tempo)


def create_beat_grid(onset_frames, sr, beat_interval_in_frames, time_signature, duration):
    """Creates a grid of measures based on the tempo, time signature, and beats of the song."""

    tolerance_in_seconds = 0.1
    tolerance_in_frames = int(librosa.time_to_frames(tolerance_in_seconds, sr=sr))

    # Convert duration from seconds to frames
    duration_in_frames = librosa.time_to_frames(duration, sr=sr)

    # Find valid beat sequences
    valid_sequences = []
    current_sequence = []
    min_sequence_length = 4
    for i in range(len(onset_frames) - 1):
        frame_diff = onset_frames[i + 1] - onset_frames[i]
        if abs(frame_diff - beat_interval_in_frames) <= tolerance_in_frames:
            if not current_sequence:
                current_sequence = [onset_frames[i]]
            current_sequence.append(onset_frames[i + 1])
        else:
            if current_sequence and len(current_sequence) >= min_sequence_length:
                valid_sequences.append(current_sequence)
            current_sequence = []
    if current_sequence and len(current_sequence) >= min_sequence_length:
        valid_sequences.append(current_sequence)

    # If no valid sequences are found, start a grid search
    while not valid_sequences and tolerance_in_seconds < 0.5 and min_sequence_length > 1:
        tolerance_in_seconds += 0.1
        tolerance_in_frames = int(librosa.time_to_frames(tolerance_in_seconds, sr=sr))
        min_sequence_length -= 1
        valid_sequences = []
        current_sequence = []
        for i in range(len(onset_frames) - 1):
            frame_diff = onset_frames[i + 1] - onset_frames[i]
            if abs(frame_diff - beat_interval_in_frames) <= tolerance_in_frames:
                if not current_sequence:
                    current_sequence = [onset_frames[i]]
                current_sequence.append(onset_frames[i + 1])
            else:
                if current_sequence and len(current_sequence) >= min_sequence_length:
                    valid_sequences.append(current_sequence)
                current_sequence = []
        if current_sequence and len(current_sequence) >= min_sequence_length:
            valid_sequences.append(current_sequence)

    # Set the anchor frame to the first onset if no valid sequences are found
    if not valid_sequences:
        anchor_frame = onset_frames[0]
    else:
        first_sequence = valid_sequences[0]
        anchor_frame = first_sequence[0]

    # Add beats before the first onset (working backwards)
    beat_grid = [anchor_frame]
    current_frame = anchor_frame
    while current_frame >= 0:
        current_frame -= beat_interval_in_frames
        beat_grid.insert(0, current_frame)

    # Remove the first beat if it's negative
    if beat_grid[0] < 0:
        beat_grid.pop(0)

    # Add beats after the anchor frame
    current_frame = anchor_frame + beat_interval_in_frames
    while current_frame <= duration_in_frames:
        beat_grid.append(current_frame)
        current_frame += beat_interval_in_frames 
    # Group beats into measures
    measure_grid = []
    current_frame = beat_grid[0]
    while current_frame <= duration_in_frames:
        measure_grid.append(current_frame)
        current_frame += beat_interval_in_frames * time_signature
    
    return np.array(beat_grid), np.array(measure_grid)


def calculate_distances(start_frame, end_frame, beat_grid, measure_grid):
    """Calculate distances from the closest beat and measure for chorus start and end times."""
    
    # Initialize a dictionary to store distances
    distances = {
        'start_to_nearest_beat': None,
        'end_to_nearest_beat': None,
        'start_to_nearest_measure': None,
        'end_to_nearest_measure': None
    }
    
    # Find the closest beat to the chorus start and end times
    closest_start_beat_index = np.argmin(np.abs(beat_grid - start_frame))
    closest_end_beat_index = np.argmin(np.abs(beat_grid - end_frame))
    
    # Find the closest measure to the chorus start and end times
    closest_start_measure_index = np.argmin(np.abs(measure_grid - start_frame))
    closest_end_measure_index = np.argmin(np.abs(measure_grid - end_frame))
    
    # Calculate the distances from the chorus start to the nearest beat and measure
    distances['start_to_nearest_beat'] = (beat_grid[closest_start_beat_index] - start_frame)/48000
    distances['start_to_nearest_measure'] = (measure_grid[closest_start_measure_index] - start_frame)/48000
    
    # Calculate the distances from the chorus end to the nearest beat and measure
    distances['end_to_nearest_beat'] = (beat_grid[closest_end_beat_index] - end_frame)/48000
    distances['end_to_nearest_measure'] = (measure_grid[closest_end_measure_index] - end_frame)/48000
    
    return distances

In [21]:
# Load the DataFrame with labeled data
df = pd.read_csv('../data/dataframes/clean_labeled.csv')

# Initialize an empty DataFrame for the results
results_df = pd.DataFrame()

# Iterate through the DataFrame and prepare data for each song
for _, group in tqdm(df.groupby('SongID'), desc='Processing'):
    song_id = group['SongID'].values[0]
    audio_path = group['FilePath'].values[0]
    y, sr = librosa.load(audio_path, sr=None)

    # Extract tempo, beat frames, and time signature
    C = np.abs(librosa.cqt(y=y, sr=sr))
    onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    sp_tempo = group['sp_tempo'].values[0] if not pd.isna(group['sp_tempo'].values[0]) else tempo
    if sp_tempo == 0:
        sp_tempo = tempo
    if sp_tempo > 142:
        sp_tempo /= 2
    if sp_tempo < 71:
        sp_tempo *= 2
    tempo = round_tempo(sp_tempo)

    time_signature = group['sp_time_signature'].values[0] if not pd.isna(group['sp_time_signature'].values[0]) else 4
    time_signature = int(time_signature) if time_signature != 0 else 4

    # Create a measure grid
    duration = librosa.get_duration(y=y, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True, units='frames')
    beat_interval_in_seconds = 60 / tempo
    beat_interval_in_frames = int(librosa.time_to_frames(beat_interval_in_seconds, sr=sr))
    beat_grid, measure_grid = create_beat_grid(onset_frames, sr, beat_interval_in_frames, time_signature, duration)
    # Iterate through each chorus in the group
    for _, chorus in group.iterrows():
        if chorus['label'] == 'chorus':
            start_frame = chorus['start_frame']
            end_frame = chorus['end_frame']
    
            # Call the function to calculate distances
            distances = calculate_distances(start_frame, end_frame, beat_grid, measure_grid)
            
            # Create a DataFrame from the distances dictionary
            distances_df = pd.DataFrame([distances])
    
            # Add the SongID and other relevant information to the distances DataFrame
            distances_df['SongID'] = song_id  # <-- SongID is added here
            distances_df['start_frame'] = start_frame
            distances_df['end_frame'] = end_frame
    
            # Append the result to the results DataFrame
            results_df = pd.concat([results_df, distances_df], ignore_index=True)

Processing:   0%|          | 0/332 [00:00<?, ?it/s]

In [35]:
# Apply the absolute value function to the columns
results_df_abs = results_df[['start_to_nearest_beat', 'end_to_nearest_beat',
                             'start_to_nearest_measure', 'end_to_nearest_measure']].abs()

# Calculate the maximum, average, and standard deviation of the absolute values
max_distances = results_df_abs.max()
average_distances = results_df_abs.mean()
std_distances = results_df_abs.std()

# Output the calculated metrics
print("Maximum Distances (absolute values):")
print(max_distances)
print("\nAverage Distances (absolute values):")
print(average_distances)
print("\nStandard Deviations (absolute values):")
print(std_distances)

Maximum Distances (absolute values):
start_to_nearest_beat       0.000771
end_to_nearest_beat         0.027187
start_to_nearest_measure    0.002104
end_to_nearest_measure      0.027187
dtype: float64

Average Distances (absolute values):
start_to_nearest_beat       0.000213
end_to_nearest_beat         0.000278
start_to_nearest_measure    0.000823
end_to_nearest_measure      0.000906
dtype: float64

Standard Deviations (absolute values):
start_to_nearest_beat       0.000140
end_to_nearest_beat         0.001259
start_to_nearest_measure    0.000568
end_to_nearest_measure      0.001338
dtype: float64


In [40]:
# Test using CQT generated tempo with rounding
# Initialize an empty DataFrame for the results
CQT_results_df = pd.DataFrame()

# Iterate through the DataFrame and prepare data for each song
for _, group in tqdm(df.groupby('SongID'), desc='Processing'):
    song_id = group['SongID'].values[0]
    audio_path = group['FilePath'].values[0]
    y, sr = librosa.load(audio_path, sr=None)

    # Extract tempo, beat frames, and time signature
    C = np.abs(librosa.cqt(y=y, sr=sr))
    onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    if tempo >= 140:
        tempo /= 2
    if tempo < 70:
        tempo *= 2
    tempo = round_tempo(tempo)

    time_signature = group['sp_time_signature'].values[0] if not pd.isna(group['sp_time_signature'].values[0]) else 4
    time_signature = int(time_signature) if time_signature != 0 else 4

    # Create a measure grid
    duration = librosa.get_duration(y=y, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True, units='frames')
    beat_interval_in_seconds = 60 / tempo
    beat_interval_in_frames = int(librosa.time_to_frames(beat_interval_in_seconds, sr=sr))
    beat_grid, measure_grid = create_beat_grid(onset_frames, sr, beat_interval_in_frames, time_signature, duration)
    # Iterate through each chorus in the group
    for _, chorus in group.iterrows():
        if chorus['label'] == 'chorus':
            start_frame = chorus['start_frame']
            end_frame = chorus['end_frame']
    
            # Call the function to calculate distances
            distances = calculate_distances(start_frame, end_frame, beat_grid, measure_grid)
            
            # Create a DataFrame from the distances dictionary
            distances_df = pd.DataFrame([distances])
    
            # Add the SongID and other relevant information to the distances DataFrame
            distances_df['SongID'] = song_id  # <-- SongID is added here
            distances_df['start_frame'] = start_frame
            distances_df['end_frame'] = end_frame
    
            # Append the result to the results DataFrame
            CQT_results_df = pd.concat([CQT_results_df, distances_df], ignore_index=True)

Processing:   0%|          | 0/332 [00:00<?, ?it/s]

In [41]:
CQT_results_df_abs = CQT_results_df[['start_to_nearest_beat', 'end_to_nearest_beat',
                             'start_to_nearest_measure', 'end_to_nearest_measure']].abs()

In [46]:
# Column names to test
columns_to_test = [
    'start_to_nearest_beat', 'end_to_nearest_beat',
    'start_to_nearest_measure', 'end_to_nearest_measure'
]

# Loop over each column and perform the paired t-test
for column in columns_to_test:
    # Calculate the differences
    differences = results_df_abs[column] - CQT_results_df_abs[column]
    
    # Calculate the mean difference
    mean_difference = differences.mean()
    
    # Calculate the t-statistic and the p-value
    t_stat, p_value = ttest_rel(results_df_abs[column], CQT_results_df_abs[column])
    
    print(f'Column: {column}')
    print(f'Paired t-test Statistics={t_stat}, p-value={p_value}')
    print(f'Mean difference: {mean_difference}')
    
    # Interpretation based on a standard alpha value of 0.05
    alpha = 0.05
    if p_value < alpha:
        direction = "smaller" if mean_difference < 0 else "larger"
        print(f'For column {column}, the differences are statistically significant, and the mean difference ({mean_difference}) suggests that the first test has {direction} distances on average compared to the second test (p < {alpha}).\n')
    else:
        print(f'For column {column}, there is no significant difference between the first test and the second test (p >= {alpha}).\n')

Column: start_to_nearest_beat
Paired t-test Statistics=-1.2613135564488143, p-value=0.20759176544825514
Mean difference: -6.244399641577062e-06
For column start_to_nearest_beat, there is no significant difference between the first test and the second test (p >= 0.05).

Column: end_to_nearest_beat
Paired t-test Statistics=-1.331718153686073, p-value=0.18336120874160272
Mean difference: -7.308467741935485e-06
For column end_to_nearest_beat, there is no significant difference between the first test and the second test (p >= 0.05).

Column: start_to_nearest_measure
Paired t-test Statistics=-4.3583770200923615, p-value=1.4948498165870041e-05
Mean difference: -8.459341397849463e-05
For column start_to_nearest_measure, the differences are statistically significant, and the mean difference (-8.459341397849463e-05) suggests that the first test has smaller distances on average compared to the second test (p < 0.05).

Column: end_to_nearest_measure
Paired t-test Statistics=-2.1531478694669763, p-

In [52]:
# Test using CQT generated tempo with no rounding
# Initialize an empty DataFrame for the results
CQT_no_round_results_df = pd.DataFrame()

# Iterate through the DataFrame and prepare data for each song
for _, group in tqdm(df.groupby('SongID'), desc='Processing'):
    song_id = group['SongID'].values[0]
    audio_path = group['FilePath'].values[0]
    y, sr = librosa.load(audio_path, sr=None)

    # Extract tempo, beat frames, and time signature
    C = np.abs(librosa.cqt(y=y, sr=sr))
    onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    if tempo >= 140:
        tempo /= 2
    if tempo < 70:
        tempo *= 2

    time_signature = group['sp_time_signature'].values[0] if not pd.isna(group['sp_time_signature'].values[0]) else 4
    time_signature = int(time_signature) if time_signature != 0 else 4

    # Create a measure grid
    duration = librosa.get_duration(y=y, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True, units='frames')
    beat_interval_in_seconds = 60 / tempo
    beat_interval_in_frames = int(librosa.time_to_frames(beat_interval_in_seconds, sr=sr))
    beat_grid, measure_grid = create_beat_grid(onset_frames, sr, beat_interval_in_frames, time_signature, duration)
    # Iterate through each chorus in the group
    for _, chorus in group.iterrows():
        if chorus['label'] == 'chorus':
            start_frame = chorus['start_frame']
            end_frame = chorus['end_frame']
    
            # Call the function to calculate distances
            distances = calculate_distances(start_frame, end_frame, beat_grid, measure_grid)
            
            # Create a DataFrame from the distances dictionary
            distances_df = pd.DataFrame([distances])
    
            # Add the SongID and other relevant information to the distances DataFrame
            distances_df['SongID'] = song_id  # <-- SongID is added here
            distances_df['start_frame'] = start_frame
            distances_df['end_frame'] = end_frame
    
            # Append the result to the results DataFrame
            CQT_no_round_results_df = pd.concat([CQT_no_round_results_df, distances_df], ignore_index=True)
            
CQT_no_round_results_df_abs = CQT_no_round_results_df[['start_to_nearest_beat', 'end_to_nearest_beat',
                             'start_to_nearest_measure', 'end_to_nearest_measure']].abs()

Processing:   0%|          | 0/332 [00:00<?, ?it/s]

In [53]:
# Loop over each column and perform the paired t-test
for column in columns_to_test:
    # Calculate the differences
    differences = CQT_no_round_results_df_abs[column] - CQT_results_df_abs[column]
    
    # Calculate the mean difference
    mean_difference = differences.mean()
    
    # Calculate the t-statistic and the p-value
    t_stat, p_value = ttest_rel(CQT_no_round_results_df_abs[column], CQT_results_df_abs[column])
    
    print(f'Column: {column}')
    print(f'Paired t-test Statistics={t_stat}, p-value={p_value}')
    print(f'Mean difference: {mean_difference}')
    
    # Interpretation based on a standard alpha value of 0.05
    alpha = 0.05
    if p_value < alpha:
        direction = "smaller" if mean_difference < 0 else "larger"
        print(f'For column {column}, the differences are statistically significant, and the mean difference ({mean_difference}) suggests that the first test has {direction} distances on average compared to the second test (p < {alpha}).\n')
    else:
        print(f'For column {column}, there is no significant difference between the first test and the second test (p >= {alpha}).\n')

Column: start_to_nearest_beat
Paired t-test Statistics=-0.5201521700463505, p-value=0.6031124624355765
Mean difference: -2.0441308243727593e-06
For column start_to_nearest_beat, there is no significant difference between the first test and the second test (p >= 0.05).

Column: end_to_nearest_beat
Paired t-test Statistics=-0.7160344256504535, p-value=0.47419506720599003
Mean difference: -2.548163082437276e-06
For column end_to_nearest_beat, there is no significant difference between the first test and the second test (p >= 0.05).

Column: start_to_nearest_measure
Paired t-test Statistics=-2.72367622892034, p-value=0.00660757340987553
Mean difference: -4.004256272401434e-05
For column start_to_nearest_measure, the differences are statistically significant, and the mean difference (-4.004256272401434e-05) suggests that the first test has smaller distances on average compared to the second test (p < 0.05).

Column: end_to_nearest_measure
Paired t-test Statistics=-0.18171084835540993, p-val

In [56]:
# Test using CQT generated tempo with no rounding no backtracking
# Initialize an empty DataFrame for the results
CQT_no_backtrack_results_df = pd.DataFrame()

# Iterate through the DataFrame and prepare data for each song
for _, group in tqdm(df.groupby('SongID'), desc='Processing'):
    song_id = group['SongID'].values[0]
    audio_path = group['FilePath'].values[0]
    y, sr = librosa.load(audio_path, sr=None)

    # Extract tempo, beat frames, and time signature
    C = np.abs(librosa.cqt(y=y, sr=sr))
    onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    if tempo >= 140:
        tempo /= 2
    if tempo < 70:
        tempo *= 2

    time_signature = group['sp_time_signature'].values[0] if not pd.isna(group['sp_time_signature'].values[0]) else 4
    time_signature = int(time_signature) if time_signature != 0 else 4

    # Create a measure grid
    duration = librosa.get_duration(y=y, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='frames')
    beat_interval_in_seconds = 60 / tempo
    beat_interval_in_frames = int(librosa.time_to_frames(beat_interval_in_seconds, sr=sr))
    beat_grid, measure_grid = create_beat_grid(onset_frames, sr, beat_interval_in_frames, time_signature, duration)
    # Iterate through each chorus in the group
    for _, chorus in group.iterrows():
        if chorus['label'] == 'chorus':
            start_frame = chorus['start_frame']
            end_frame = chorus['end_frame']
    
            # Call the function to calculate distances
            distances = calculate_distances(start_frame, end_frame, beat_grid, measure_grid)
            
            # Create a DataFrame from the distances dictionary
            distances_df = pd.DataFrame([distances])
    
            # Add the SongID and other relevant information to the distances DataFrame
            distances_df['SongID'] = song_id  # <-- SongID is added here
            distances_df['start_frame'] = start_frame
            distances_df['end_frame'] = end_frame
    
            # Append the result to the results DataFrame
            CQT_no_backtrack_results_df = pd.concat([CQT_no_backtrack_results_df, distances_df], ignore_index=True)
            
CQT_no_backtrack_results_df_abs = CQT_no_backtrack_results_df[['start_to_nearest_beat', 'end_to_nearest_beat',
                             'start_to_nearest_measure', 'end_to_nearest_measure']].abs()

Processing:   0%|          | 0/332 [00:00<?, ?it/s]

In [57]:
# Loop over each column and perform the paired t-test
for column in columns_to_test:
    # Calculate the differences
    differences = CQT_no_backtrack_results_df_abs[column] - CQT_no_round_results_df_abs[column]
    
    # Calculate the mean difference
    mean_difference = differences.mean()
    
    # Calculate the t-statistic and the p-value
    t_stat, p_value = ttest_rel(CQT_no_backtrack_results_df_abs[column], CQT_no_round_results_df_abs[column])
    
    print(f'Column: {column}')
    print(f'Paired t-test Statistics={t_stat}, p-value={p_value}')
    print(f'Mean difference: {mean_difference}')
    
    # Interpretation based on a standard alpha value of 0.05
    alpha = 0.05
    if p_value < alpha:
        direction = "smaller" if mean_difference < 0 else "larger"
        print(f'For column {column}, the differences are statistically significant, and the mean difference ({mean_difference}) suggests that the first test has {direction} distances on average compared to the second test (p < {alpha}).\n')
    else:
        print(f'For column {column}, there is no significant difference between the first test and the second test (p >= {alpha}).\n')

Column: start_to_nearest_beat
Paired t-test Statistics=-0.5090298219252374, p-value=0.6108824478586004
Mean difference: -1.6521057347670257e-06
For column start_to_nearest_beat, there is no significant difference between the first test and the second test (p >= 0.05).

Column: end_to_nearest_beat
Paired t-test Statistics=1.115701727913295, p-value=0.2649105399980171
Mean difference: 4.1722670250896025e-06
For column end_to_nearest_beat, there is no significant difference between the first test and the second test (p >= 0.05).

Column: start_to_nearest_measure
Paired t-test Statistics=-0.6963597039455548, p-value=0.48642137205206526
Mean difference: -5.852374551971323e-06
For column start_to_nearest_measure, there is no significant difference between the first test and the second test (p >= 0.05).

Column: end_to_nearest_measure
Paired t-test Statistics=3.0602630818016516, p-value=0.0022910071889945113
Mean difference: 3.830645161290323e-05
For column end_to_nearest_measure, the differe

NameError: name 'group' is not defined