In [38]:
# Required libraries
import os
import pandas as pd
import librosa
import numpy as np
import ruptures as rpt
from tqdm import tqdm
from scipy.signal import argrelextrema
import soundfile as sf

In [2]:
# Function to create a tempogram from an audio signal
def compute_tempogram(onset_env, sr, hop_length):
    # Compute the tempogram
    tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop_length)
    return tempogram

# Function to create a chromagram from a harmonic audio signal
def compute_chromagram(y_harmonic, sr, hop_length):
    # Compute the chromagram
    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, hop_length=hop_length, bins_per_octave=24)
    return chromagram

# Function to detect key from a chromagram using Krumhansl-Schmuckler key-finding algorithm profiles
def detect_key_from_chromagram(chromagram, sr):
    pitches = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

    # Calculate the sum of each pitch class across all time frames
    chroma_vals = np.sum(chromagram, axis=1)

    # Krumhansl-Schmuckler key-finding algorithm profiles
    maj_profile = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
    min_profile = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]

    # Correlation for major and minor keys
    maj_key_corrs = [np.corrcoef(maj_profile, np.roll(chroma_vals, i))[1, 0] for i in range(12)]
    min_key_corrs = [np.corrcoef(min_profile, np.roll(chroma_vals, i))[1, 0] for i in range(12)]

    # Combine correlations and keys
    key_corrs = maj_key_corrs + min_key_corrs
    keys = [p + ' major' for p in pitches] + [p + ' minor' for p in pitches]

    # Determine the best key
    best_idx = np.argmax(key_corrs)
    best_key = keys[best_idx]
    best_corr = key_corrs[best_idx]

    return best_key, best_corr

# Function to convert standard key into Camelot key notation
def get_camelot(key):
    # Mapping from musical key to Camelot code
    camelot_major = {
        'B': '1B', 'F#': '2B', 'C#': '3B', 'G#': '4B', 'D#': '5B',
        'A#': '6B', 'F': '7B', 'C': '8B', 'G': '9B', 'D': '10B', 'A': '11B', 'E': '12B'
    }

    camelot_minor = {
        'G#': '1A', 'D#': '2A', 'A#': '3A', 'F': '4A', 'C': '5A',
        'G': '6A', 'D': '7A', 'A': '8A', 'E': '9A', 'B': '10A', 'F#': '11A', 'C#': '12A'
    }

    # Split the detected key into pitch and mode
    pitch, mode = key.split(' ')

    # Return the corresponding Camelot code
    if mode == 'major':
        return camelot_major[pitch]
    elif mode == 'minor':
        return camelot_minor[pitch]
    else:
        raise ValueError("Invalid mode in key: should be 'major' or 'minor'.")

# Function to calculate the optimal number of segments using the elbow method
def segment_waveform_optimal_k(algo, sr, n_bkps_max=6):
    # Function to calculate the sum of costs for a given number of breakpoints
    def get_sum_of_cost(n_bkps):
        bkps = algo.predict(n_bkps=n_bkps)
        cost = algo.cost.sum_of_costs(bkps)
        # Explicitly delete the breakpoints to free up memory
        del bkps
        return cost

    # Use a generator expression for memory efficiency if only iterating once
    costs = (get_sum_of_cost(n_bkps) for n_bkps in range(1, n_bkps_max + 1))
    costs_list = list(costs)  # Convert to list if needed more than once

    # Calculate curvatures with minimal intermediate variables
    curvatures = np.abs(np.diff(costs_list, 2))
    curvatures = np.insert(curvatures, [0, len(curvatures)], 0)

    # Identify the optimal number of breakpoints based on curvature
    optimal_idx = argrelextrema(curvatures, np.greater)[0][0]
    n_bkps_optimal = optimal_idx + 1  # Account for the initial 0 insertion

    # Predict and return the optimal change points
    bkps_optimal_times = algo.predict(n_bkps=n_bkps_optimal)
    times = librosa.frames_to_time(bkps_optimal_times[:-1], sr=sr, hop_length=512) # Exclude the last breakpoint (end of signal)
    return times

# Function to segment tempogram or chromagram with a fixed number of segments
def segment_waveform_fixed_k(algo, sr, n_bkps=3):
    bkps = algo.predict(n_bkps=n_bkps)
    times = librosa.frames_to_time(bkps[:-1], sr=sr, hop_length=512) # Exclude the last breakpoint (end of signal)
    return times

# Function to combine segments and filter based on criteria
def combine_and_filter_segments(tempo_times, chroma_times):
    combined_segments = set()

    # Add all tempogram segments
    for seg in tempo_times:
        combined_segments.add(seg)

    # Add chromagram segments if they are more than 3 seconds away from any tempogram segment
    for c_seg in chroma_times:
        if all(abs(c_seg - t_seg) > 3 for t_seg in tempo_times):
            combined_segments.add(c_seg)

    # Filter segments that are at least 10 seconds apart
    combined_segments = sorted(list(combined_segments))
    filtered_segments = [combined_segments[0]]
    for seg in combined_segments[1:]:
        if seg - filtered_segments[-1] >= 10:
            filtered_segments.append(seg)

    return filtered_segments

In [3]:
# Main processing loop

# Load the DataFrame from CSV
reference_df = pd.read_csv('../data/dataframes/spotify_metadata.csv')
audio_files_dir = '../data/audio_files/processed_download'
hop_length = 512
new_columns = ['track_name', 'artist_names',
    'duration', 'ks_key', 'key_corr','overall_tempo', 'camelot_key', 'times_tempogram_kfixed',
    'times_chroma_kfixed', 'numseg_combined_kfixed', 'times_combined_kfixed'
] 

# Iterate over the audio files
for index, row in tqdm(reference_df.iterrows(), total=reference_df.shape[0]):
    file_path = os.path.join(audio_files_dir, row['filename'])
    if os.path.isfile(file_path):
        # Load the audio file
        signal, sr = librosa.load(file_path, sr=None)
        duration = librosa.get_duration(y=signal, sr=sr)

        # Separate the harmonic and percussive components
        y_harmonic, y_percussive = librosa.effects.hpss(signal)

        # Compute onset envelope
        onset_env = librosa.onset.onset_strength(y=y_percussive, sr=sr, hop_length=hop_length)

        # Compute the tempogram and chromagram
        tempogram = compute_tempogram(onset_env, sr, hop_length)
        chromagram = compute_chromagram(y_harmonic, sr, hop_length)

        # Compute the overall key
        key, key_corr = detect_key_from_chromagram(chromagram, sr)
        camelot = get_camelot(key)

        # Compute the overall tempo
        tempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr)[0]

        # Segment using tempogram with optimal k and k=3
        algo_tempogram = rpt.KernelCPD(kernel="linear").fit(tempogram.T)
        k3_segments_tempo = segment_waveform_fixed_k(algo_tempogram, sr=sr, n_bkps=3)


        # Segment using chromagram with k=3
        algo_chroma = rpt.KernelCPD(kernel="linear").fit(chromagram.T)
        k3_segments_chroma = segment_waveform_fixed_k(algo_chroma, sr=sr, n_bkps=3)

        # Combine tempogram and chromagram segments for optimal k
        #combined_segments_kopt = combine_and_filter_segments(optimal_k_segments_tempogram, optimal_k_segments_chroma)
        # Combine tempogram and chromagram segments for fixed k
        combined_segments_kfixed = combine_and_filter_segments(k3_segments_tempo, k3_segments_chroma)

        # Store the results in the DataFrame
        reference_df.at[index, 'duration'] = duration
        reference_df.at[index, 'ks_key'] = str(key)
        reference_df.at[index, 'key_corr'] = float(key_corr)
        reference_df.at[index, 'camelot_key'] = str(camelot)
        reference_df.at[index, 'overall_tempo'] = float(tempo)
        #reference_df.at[index, 'numseg_tempogram_kopt'] = len(optimal_k_segments_tempogram)
        #reference_df.at[index, 'times_tempogram_kopt'] = ','.join(map(str, optimal_k_segments_tempogram))
        reference_df.at[index, 'times_tempogram_kfixed'] = ','.join(map(str, k3_segments_tempo))
        #reference_df.at[index, 'numseg_chroma_kopt'] = len(optimal_k_segments_chroma)
        #reference_df.at[index, 'times_chroma_kopt'] = ','.join(map(str, optimal_k_segments_chroma))
        reference_df.at[index, 'times_chroma_kfixed'] = ','.join(map(str, k3_segments_chroma))
        #reference_df.at[index, 'numseg_combined_kopt'] = len(combined_segments_kopt)
        #reference_df.at[index, 'times_combined_kopt'] = ','.join(map(str, combined_segments_kopt))
        reference_df.at[index, 'numseg_combined_kfixed'] = len(combined_segments_kfixed)
        reference_df.at[index, 'times_combined_kfixed'] = ','.join(map(str, combined_segments_kfixed))
        #print(reference_df.loc[index, new_columns])

segment_df = reference_df.copy()
prefix_cols = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
               'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
               'time_signature']

segment_df.rename(columns=lambda x: 'sp_' + x if x in prefix_cols else x, inplace=True)

# Splitting and converting 'times_tempogram_kfixed' column to numeric
tempo_split = segment_df['times_tempogram_kfixed'].str.split(',', expand=True)
segment_df['tempo_time_1'] = pd.to_numeric(tempo_split[0], errors='coerce')
segment_df['tempo_time_2'] = pd.to_numeric(tempo_split[1], errors='coerce')
segment_df['tempo_time_3'] = pd.to_numeric(tempo_split[2], errors='coerce')

# Splitting and converting 'times_chroma_kfixed' column to numeric
chroma_split = segment_df['times_chroma_kfixed'].str.split(',', expand=True)
segment_df['chroma_time_1'] = pd.to_numeric(chroma_split[0], errors='coerce')
segment_df['chroma_time_2'] = pd.to_numeric(chroma_split[1], errors='coerce')
segment_df['chroma_time_3'] = pd.to_numeric(chroma_split[2], errors='coerce')

# Dropping the specified columns
segment_df.drop(['times_tempogram_kfixed', 'times_chroma_kfixed',
                 'numseg_combined_kfixed', 'times_combined_kfixed'], axis=1, inplace=True)

# Save the updated DataFrame
segment_df.to_csv(r'../data/dataframes/segment_df.csv', index=False)

print("Segmentation complete. Combined segments have been added to the DataFrame and saved to 'segment_df.csv'.")

100%|██████████| 353/353 [6:44:40<00:00, 68.78s/it]   

Segmentation complete. Combined segments have been added to the DataFrame and saved to 'segment_df.csv'.





In [17]:
# Load the DataFrame
df = pd.read_csv(r'../data/dataframes/segment_df_cleaned.csv')
# Replace empty strings with NaN
df.replace('', np.nan, inplace=True)

# Function to process each row and create the segments with the required format
def process_segments(row):
    segment_list = ['0s']  # Start point added as '0s'
    
    # Add tempo times with 't'
    for i in range(1, 4):
        tempo_key = f'tempo_time_{i}'
        if pd.notnull(row[tempo_key]):
            segment_list.append(f"{round(row[tempo_key], 3)}t")

    # Add chroma times with 'c'
    for i in range(1, 4):
        chroma_key = f'chroma_time_{i}'
        if pd.notnull(row[chroma_key]):
            segment_list.append(f"{round(row[chroma_key], 3)}c")

    # Add end point with 'e'
    segment_list.append(f"{round(row['duration'], 3)}e")

    # Sort the list based on the numerical values (ignoring the 't', 'c', and 'e' suffix)
    segment_list.sort(key=lambda x: float(x[:-1]))

    return segment_list


# Apply the initial segment processing function to each row
df['combined_segments'] = df.apply(process_segments, axis=1)


def resolve_internal_overlaps(segments, suffix):
    """
    This function resolves overlaps within 't' or 'c' segments.
    It assumes segments are sorted by time.
    """
    if not segments:
        return []

    # Start with the first segment
    resolved_segments = [segments[0]]

    for segment in segments[1:]:
        last_segment_time = float(resolved_segments[-1][:-1])
        current_segment_time = float(segment[:-1])

        # If the current segment overlaps with the last, keep the highest (which is the current one, since they are sorted)
        if current_segment_time - last_segment_time <= 5:
            resolved_segments[-1] = segment
        else:
            resolved_segments.append(segment)

    return [seg for seg in resolved_segments if seg.endswith(suffix)]

def resolve_overlaps(segments):
    """
    This function resolves overlaps between 't' and 'c' segments,
    giving priority to 't' segments.
    """
    # Separate 't' and 'c' segments and sort them
    t_segments = sorted([seg for seg in segments if seg.endswith('t')], key=lambda x: float(x[:-1]))
    c_segments = sorted([seg for seg in segments if seg.endswith('c')], key=lambda x: float(x[:-1]))

    # Resolve internal overlaps within 't' and 'c' segments
    t_segments = resolve_internal_overlaps(t_segments, 't')
    c_segments = resolve_internal_overlaps(c_segments, 'c')

    # Combine and sort the resolved 't' and 'c' segments
    combined_segments = sorted(t_segments + c_segments, key=lambda x: float(x[:-1]))

    # Now resolve overlaps between 't' and 'c'
    resolved_segments = []
    i = 0
    while i < len(combined_segments):
        current_time = float(combined_segments[i][:-1])
        current_suffix = combined_segments[i][-1]

        # Check for overlaps within the next elements
        if i + 1 < len(combined_segments):
            next_time = float(combined_segments[i + 1][:-1])
            next_suffix = combined_segments[i + 1][-1]

            # If there's an overlap and the current is a 't', add 'tc' and skip the 'c'
            if next_time - current_time <= 5 and current_suffix == 't' and next_suffix == 'c':
                resolved_segments.append(f"{current_time}tc")
                i += 2  # Skip the next 'c' segment since it's an overlap
                continue
            # If the current is 'c' and the next is 't', skip the 'c'
            elif next_time - current_time <= 5 and current_suffix == 'c' and next_suffix == 't':
                i += 1  # Skip the current 'c' segment
                continue

        # If no overlap or if the segment is a 't' without an overlapping 'c', add the current segment
        resolved_segments.append(combined_segments[i])
        i += 1

    return resolved_segments

# Now apply the overlap resolution function to the processed segments
df['combined_segments'] = df['combined_segments'].apply(resolve_overlaps)

def update_combined_segments(row):
    # Start with '0'
    updated_segments = ['0'] + row['combined_segments']
    
    # Convert duration to string and append
    duration_str = str(row['duration'])
    updated_segments.append(duration_str)
    
    return updated_segments

df['combined_segments'] = df.apply(update_combined_segments, axis=1)

# The 'segments' column of the dataframe now contains the processed segments without overlaps
print(df['combined_segments'])

0      [0, 3.659t, 78.229t, 104.576c, 116.064c, 127.4...
1      [0, 31.083c, 60.757tc, 120.885t, 165.92t, 227....
2      [0, 43.861tc, 138.016tc, 154.101t, 183.072c, 2...
3      [0, 19.029tc, 53.067t, 123.307c, 141.931t, 216...
4      [0, 29.067tc, 230.293c, 236.256t, 260.235c, 29...
                             ...                        
348      [0, 25.6t, 31.584c, 207.627c, 242.08t, 294.722]
349    [0, 18.88c, 33.44t, 95.573t, 169.909tc, 205.67...
350    [0, 15.573t, 33.035c, 41.813t, 164.853tc, 177....
351    [0, 37.035t, 64.821c, 112.96c, 181.728t, 199.6...
352    [0, 15.349c, 29.504t, 58.4t, 74.197c, 114.379c...
Name: combined_segments, Length: 353, dtype: object


In [18]:
def create_segment_columns(row):
    # Initialize the segment columns with None
    for i in range(4):
        row[f'tempo_seg_{i}'] = None
        row[f'chroma_seg_{i}'] = None

    # Create the tuples for tempo segments
    for i in range(4):
        start_time = 0 if i == 0 else row[f'tempo_time_{i}']
        end_time = row[f'tempo_time_{i+1}'] if i < 3 else row['duration']
        
        # Only create a tuple if start_time and end_time are not null
        if pd.notnull(start_time) and pd.notnull(end_time):
            row[f'tempo_seg_{i}'] = (start_time, end_time)

    # Create the tuples for chroma segments
    for i in range(4):
        start_time = 0 if i == 0 else row[f'chroma_time_{i}']
        end_time = row[f'chroma_time_{i+1}'] if i < 3 else row['duration']
        
        # Only create a tuple if start_time and end_time are not null
        if pd.notnull(start_time) and pd.notnull(end_time):
            row[f'chroma_seg_{i}'] = (start_time, end_time)

    return row

# Apply the function to each row in the DataFrame
df = df.apply(create_segment_columns, axis=1)
df

Unnamed: 0,sp_danceability,sp_energy,sp_key,sp_loudness,sp_mode,sp_speechiness,sp_acousticness,sp_instrumentalness,sp_liveness,sp_valence,...,chroma_time_3,combined_segments,tempo_seg_0,chroma_seg_0,tempo_seg_1,chroma_seg_1,tempo_seg_2,chroma_seg_2,tempo_seg_3,chroma_seg_3
0,0.784,0.521,1,-5.701,1,0.0322,0.06200,0.000005,0.0995,0.817,...,163.125333,"[0, 3.659t, 78.229t, 104.576c, 116.064c, 127.4...","(0, 3.6586666666666665)","(0, 104.576)","(3.6586666666666665, 78.22933333333333)","(104.576, 116.064)","(78.22933333333333, 127.456)","(116.064, 163.12533333333334)","(127.456, 164.771)","(163.12533333333334, 164.771)"
1,0.905,0.838,6,-6.838,1,0.0499,0.00112,0.839000,0.6080,0.464,...,227.210667,"[0, 31.083c, 60.757tc, 120.885t, 165.92t, 227....","(0, 60.757333333333335)","(0, 31.082666666666668)","(60.757333333333335, 120.88533333333334)","(31.082666666666668, 60.821333333333335)","(120.88533333333334, 165.92)","(60.821333333333335, 227.21066666666667)","(165.92, 259.995)","(227.21066666666667, 259.995)"
2,0.897,0.692,11,-4.985,0,0.0492,0.01870,0.725000,0.0603,0.607,...,183.072000,"[0, 43.861tc, 138.016tc, 154.101t, 183.072c, 2...","(0, 43.86133333333333)","(0, 44.053333333333335)","(43.86133333333333, 138.016)","(44.053333333333335, 139.92533333333333)","(138.016, 154.10133333333334)","(139.92533333333333, 183.072)","(154.10133333333334, 276.425)","(183.072, 276.425)"
3,0.724,0.792,1,-3.332,0,0.1020,0.01140,0.000008,0.6250,0.559,...,141.717333,"[0, 19.029tc, 53.067t, 123.307c, 141.931t, 216...","(0, 19.029333333333334)","(0, 19.57333333333333)","(19.029333333333334, 53.06666666666667)","(19.57333333333333, 123.30666666666669)","(53.06666666666667, 141.93066666666667)","(123.30666666666669, 141.71733333333333)","(141.93066666666667, 216.506)","(141.71733333333333, 216.506)"
4,0.710,0.729,2,-4.978,1,0.0539,0.07430,0.000004,0.0419,0.532,...,260.234667,"[0, 29.067tc, 230.293c, 236.256t, 260.235c, 29...","(0, 29.066666666666663)","(0, 29.610666666666667)","(29.066666666666663, 236.256)","(29.610666666666667, 230.2933333333333)","(236.256, 292.36266666666666)","(230.2933333333333, 260.23466666666667)","(292.36266666666666, 323.466)","(260.23466666666667, 323.466)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348,0.579,0.759,2,-5.584,0,0.0386,0.06000,0.000138,0.1170,0.127,...,241.802667,"[0, 25.6t, 31.584c, 207.627c, 242.08t, 294.722]","(0, 23.584)","(0, 31.584)","(23.584, 25.6)","(31.584, 207.6266666666667)","(25.6, 242.08)","(207.6266666666667, 241.80266666666665)","(242.08, 294.722)","(241.80266666666665, 294.722)"
349,0.583,0.860,8,-4.347,1,0.0842,0.02310,0.000001,0.0977,0.279,...,205.674667,"[0, 18.88c, 33.44t, 95.573t, 169.909tc, 205.67...","(0, 33.44)","(0, 18.88)","(33.44, 95.57333333333334)","(18.88, 170.02666666666667)","(95.57333333333334, 169.90933333333334)","(170.02666666666667, 205.67466666666667)","(169.90933333333334, 218.284)","(205.67466666666667, 218.284)"
350,0.572,0.864,2,-4.098,0,0.0477,0.02150,0.000000,0.3750,0.260,...,166.133333,"[0, 15.573t, 33.035c, 41.813t, 164.853tc, 177....","(0, 15.573333333333334)","(0, 33.034666666666666)","(15.573333333333334, 41.81333333333333)","(33.034666666666666, 161.83466666666666)","(41.81333333333333, 164.85333333333332)","(161.83466666666666, 166.13333333333333)","(164.85333333333332, 177.839)","(166.13333333333333, 177.839)"
351,0.509,0.781,8,-3.480,1,0.0720,0.03980,0.000000,0.0749,0.176,...,199.605333,"[0, 37.035t, 64.821c, 112.96c, 181.728t, 199.6...","(0, 37.034666666666666)","(0, 64.82133333333333)","(37.034666666666666, 181.728)","(64.82133333333333, 112.96)","(181.728, 211.66933333333333)","(112.96, 199.60533333333333)","(211.66933333333333, 266.043)","(199.60533333333333, 266.043)"


In [19]:
df.to_csv(r'../data/dataframes/segments.csv', index=False)

In [35]:
# Melt the DataFrame to long format
df_long = df.melt(id_vars=['track_id', 'artist_ids', 'genre_list', 'track_name', 'artist_names',
       'filename', 'duration', 'ks_key', 'key_corr', 'camelot_key'],
       value_vars=['tempo_seg_0', 'tempo_seg_1', 'tempo_seg_2', 'tempo_seg_3',
       'chroma_seg_0', 'chroma_seg_1', 'chroma_seg_2', 'chroma_seg_3'],
       var_name='segment', value_name='value')

# Extract segment type from the 'segment' column
df_long['segment_type'] = df_long['segment'].str.split('_', expand=True)[0]

# Extract segment order from the 'segment' column
df_long['segment_order'] = df_long['segment'].str.split('_', expand=True)[2].astype(int)

# Extract start and end times from the 'value' column
df_long['value'] = df_long['value'].astype(str)
df_long[['start_time', 'end_time']] = df_long['value'].str[1:-1].str.split(',', expand=True).astype(float)

# Drop the original 'segment' and 'value' columns
df_long.drop(columns=['segment', 'value'], inplace=True)

df_long = df_long.sort_values(by=['track_name', 'segment_type', 'segment_order'], ascending=[True, False, True])
df_long['label'] = np.nan
df_long

Unnamed: 0,track_id,artist_ids,genre_list,track_name,artist_names,filename,duration,ks_key,key_corr,camelot_key,segment_type,segment_order,start_time,end_time,label
338,28X0E21uKdA7S1HvgMJjP8,['3534yWWzmxx8NbKVoNolsK'],"['progressive electro house', 'house', 'edm', ...",28 Grams,Wolfgang Gartner,Wolfgang Gartner - 28 Grams.mp3,184.329,F# major,0.409054,2B,tempo,0,0.000000,20.256000,
691,28X0E21uKdA7S1HvgMJjP8,['3534yWWzmxx8NbKVoNolsK'],"['progressive electro house', 'house', 'edm', ...",28 Grams,Wolfgang Gartner,Wolfgang Gartner - 28 Grams.mp3,184.329,F# major,0.409054,2B,tempo,1,20.256000,105.845333,
1044,28X0E21uKdA7S1HvgMJjP8,['3534yWWzmxx8NbKVoNolsK'],"['progressive electro house', 'house', 'edm', ...",28 Grams,Wolfgang Gartner,Wolfgang Gartner - 28 Grams.mp3,184.329,F# major,0.409054,2B,tempo,2,105.845333,178.154667,
1397,28X0E21uKdA7S1HvgMJjP8,['3534yWWzmxx8NbKVoNolsK'],"['progressive electro house', 'house', 'edm', ...",28 Grams,Wolfgang Gartner,Wolfgang Gartner - 28 Grams.mp3,184.329,F# major,0.409054,2B,tempo,3,178.154667,184.329000,
1750,28X0E21uKdA7S1HvgMJjP8,['3534yWWzmxx8NbKVoNolsK'],"['progressive electro house', 'house', 'edm', ...",28 Grams,Wolfgang Gartner,Wolfgang Gartner - 28 Grams.mp3,184.329,F# major,0.409054,2B,chroma,0,0.000000,27.978667,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1210,4JUD1cL0uwd4lJKI9jCgso,['6fs2or0cKLEM2xohWq8SoX'],['chinese electropop'],香芋的错觉 CAROUSEL,Lexie Liu,Lexie Liu - 香芋的错觉 CAROUSEL.mp3,195.530,A# major,0.597794,6B,tempo,3,145.952000,195.530000,
1563,4JUD1cL0uwd4lJKI9jCgso,['6fs2or0cKLEM2xohWq8SoX'],['chinese electropop'],香芋的错觉 CAROUSEL,Lexie Liu,Lexie Liu - 香芋的错觉 CAROUSEL.mp3,195.530,A# major,0.597794,6B,chroma,0,0.000000,16.416000,
1916,4JUD1cL0uwd4lJKI9jCgso,['6fs2or0cKLEM2xohWq8SoX'],['chinese electropop'],香芋的错觉 CAROUSEL,Lexie Liu,Lexie Liu - 香芋的错觉 CAROUSEL.mp3,195.530,A# major,0.597794,6B,chroma,1,16.416000,130.293333,
2269,4JUD1cL0uwd4lJKI9jCgso,['6fs2or0cKLEM2xohWq8SoX'],['chinese electropop'],香芋的错觉 CAROUSEL,Lexie Liu,Lexie Liu - 香芋的错觉 CAROUSEL.mp3,195.530,A# major,0.597794,6B,chroma,2,130.293333,145.738667,


In [36]:
df_long.to_csv(r'../data/dataframes/segments_long.csv', index=False)

Generate tempogram-segmented audio sections

In [None]:
# Create the output directory if it doesn't exist
output_dir = r'../data/audio_files/segmented/tempogram_segments'
os.makedirs(output_dir, exist_ok=True)

# Loop through the DataFrame with progress monitoring
for index, row in tqdm(df_long.iterrows(), total=df_long.shape[0], desc="Processing audio segments"):
    # Check if the segment type is 'tempo'
    if row['segment_type'] == 'tempo':
        # Load the audio file
        y, sr = librosa.load(os.path.join(r'../data/audio_files/processed_download', row['filename']))

        # Get the start and end times of the segment
        start_time = row['start_time']
        end_time = row['end_time']

        # Extract the segment from the audio file
        segment = y[int(start_time * sr):int(end_time * sr)]

        # Create the output file name
        output_file = (os.path.join(output_dir,
                                    row['filename'].split('.')[0] + '_' + row['segment_type'] + '_' + str(row['segment_order']) + '.mp3'))

        # Save the segment to an MP3 file
        sf.write(output_file, segment, sr, format='MP3')

Processing audio segments:  31%|███       | 865/2824 [03:06<05:36,  5.82it/s]