In [6]:
import librosa
import numpy as np
import pandas as pd  # Make sure pandas is imported
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
from bokeh.models import Span, LinearColorMapper, ColorBar
from bokeh.models import FixedTicker
import warnings
import requests

output_notebook()

# Download the file to a local path
# audio_url = 'https://github.com/egorpol/beat_it/raw/refs/heads/main/examples/Tekonivel_Voimamies.aif'
local_audio_path = 'examples/Tekonivel_Voimamies.aif'

# # Download the file
# response = requests.get(audio_url)
# with open(local_audio_path, 'wb') as f:
#     f.write(response.content)

# Load an audio file
y, sr = librosa.load(local_audio_path)

duration = len(y) / sr
print(f"The audio file is {duration:.2f} seconds long.")

# Define the start and stop times for the segments (lists)
start_times = [10, 30, 60, 120, 180]  # List of start times in seconds
end_times = [12, 32, 62, 122, 182]    # List of end times in seconds

# Ensure that start_times and end_times are lists of the same length
if len(start_times) != len(end_times):
    raise ValueError("start_times and end_times must be lists of the same length.")

# List of features to plot - 'amplitude_envelope', 'spectral_flux', 'zero_crossing_rate', 'spectral_centroid', 'melspectrogram', 'stft_spectrogram'
features_to_plot = ['amplitude_envelope', 'spectral_flux', 'stft_spectrogram']

# Load multiple cuepoint lists from DataFrames
cuepoint_lists = []

# First cuepoint list
url1 = 'https://raw.githubusercontent.com/egorpol/beat_it/refs/heads/main/csv/dufour_onsets_librosa_unfiltered.csv'
df1 = pd.read_csv(url1)
peaks_in_sec1 = df1['onset_times'].values
cuepoint_lists.append({'name': 'Onsets Unfiltered', 'times': peaks_in_sec1, 'color': 'orange'})

# Second cuepoint list (example)
# Uncomment and modify the following lines to add another cuepoint list
# url2 = 'https://example.com/other_cuepoints.csv'  # Replace with actual URL
# df2 = pd.read_csv(url2)
# peaks_in_sec2 = df2['onset_times'].values
# cuepoint_lists.append({'name': 'Other Cuepoints', 'times': peaks_in_sec2, 'color': 'green'})

# You can add more cuepoint lists similarly

# Lines to plot
lines_to_plot = ['custom_interval_lines']  # Options: 'custom_interval_lines', 'cuepoints'

# Parameters for feature extraction
hop_length = 512  # You can adjust this value

# Parameters for custom interval lines
start_time_custom = 10.031  # in seconds
interval = 0.4451        # in seconds

# Parameters for STFT
window_size = 1024  # You can adjust this value
overlap = 0.5       # Overlap percentage
hop_length_stft = int(window_size * (1 - overlap))
n_fft = window_size

# Define frequency limits for STFT plot
y_min = 0     # Minimum frequency (Hz)
y_max = 10000  # Maximum frequency (Hz)

# Initialize a list to store the layouts for each segment
segment_layouts = []

# Process each segment
for idx, (start_time, end_time) in enumerate(zip(start_times, end_times)):
    # Check that start_time and end_time are within the duration of the audio file
    if start_time < 0 or end_time > duration:
        warnings.warn(f"Segment {idx+1} start_time or end_time is out of bounds. Skipping this segment.")
        continue  # Skip this segment

    # Calculate the start and end sample indices
    start_sample = int(start_time * sr)
    end_sample = int(end_time * sr)

    # Extract the segment of interest
    y_segment = y[start_sample:end_sample]

    # Compute features for the segment
    amplitude_envelope = librosa.onset.onset_strength(y=y_segment, sr=sr, hop_length=hop_length)
    S = librosa.feature.melspectrogram(y=y_segment, sr=sr, hop_length=hop_length)
    spectral_flux = np.diff(S, axis=1)
    zcr = librosa.feature.zero_crossing_rate(y_segment, hop_length=hop_length)
    spectral_centroid = librosa.feature.spectral_centroid(y=y_segment, sr=sr, hop_length=hop_length)
    melspectrogram = S  # Already computed

    # Create a common time axis for the segment
    times = librosa.frames_to_time(np.arange(len(amplitude_envelope)), sr=sr, hop_length=hop_length) + start_time

    # Create a dictionary of plots for this segment
    plots = {}

    # Create plots based on selected features
    if 'amplitude_envelope' in features_to_plot:
        p1 = figure(title=f"Amplitude Envelope (Segment {idx+1})", x_axis_label='Time (s)', y_axis_label='Amplitude', width=900, height=300)
        p1.line(times, amplitude_envelope, legend_label="Amplitude Envelope")
        plots['amplitude_envelope'] = p1

    if 'spectral_flux' in features_to_plot:
        p2 = figure(title=f"Spectral Flux (Segment {idx+1})", x_axis_label='Time (s)', y_axis_label='Spectral Flux', width=900, height=300)
        times_flux = librosa.frames_to_time(np.arange(spectral_flux.shape[1]), sr=sr, hop_length=hop_length) + start_time
        for i in range(spectral_flux.shape[0]):
            p2.line(times_flux, spectral_flux[i])
        plots['spectral_flux'] = p2

    if 'zero_crossing_rate' in features_to_plot:
        p3 = figure(title=f"Zero-Crossing Rate (Segment {idx+1})", x_axis_label='Time (s)', y_axis_label='Zero-Crossing Rate', width=900, height=300)
        times_zcr = librosa.frames_to_time(np.arange(len(zcr[0])), sr=sr, hop_length=hop_length) + start_time
        p3.line(times_zcr, zcr[0])
        plots['zero_crossing_rate'] = p3

    if 'spectral_centroid' in features_to_plot:
        p4 = figure(title=f"Spectral Centroid (Segment {idx+1})", x_axis_label='Time (s)', y_axis_label='Spectral Centroid', width=900, height=300)
        times_centroid = librosa.frames_to_time(np.arange(len(spectral_centroid[0])), sr=sr, hop_length=hop_length) + start_time
        p4.line(times_centroid, spectral_centroid[0])
        plots['spectral_centroid'] = p4

    # Parameters for Mel-spectrogram plotting
    if 'melspectrogram' in features_to_plot:
        p5 = figure(title=f"Mel-Spectrogram (Segment {idx+1})", x_axis_label='Time (s)', y_axis_label='Frequency (Mel)', width=900, height=600)

        # Convert power spectrogram to decibel units
        mel_db = librosa.power_to_db(melspectrogram, ref=np.max)

        # Compute the time values for the x-axis
        times_mel = librosa.frames_to_time(np.arange(mel_db.shape[1]), sr=sr, hop_length=hop_length) + start_time

        # Create a color mapper for the spectrogram
        color_mapper = LinearColorMapper(palette="Viridis256", low=np.min(mel_db), high=np.max(mel_db))

        # Plot the spectrogram image with appropriate dimensions
        p5.image(image=[mel_db],
                 x=times_mel[0],
                 y=0,
                 dw=times_mel[-1] - times_mel[0],
                 dh=mel_db.shape[0],
                 color_mapper=color_mapper)

        # Add a color bar to the right of the plot
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0, 0), title='dB')
        p5.add_layout(color_bar, 'right')

        # Compute Mel frequencies corresponding to each Mel bin
        mel_frequencies = librosa.mel_frequencies(n_mels=mel_db.shape[0], fmin=0, fmax=sr/2)
        mel_values = librosa.hz_to_mel(mel_frequencies)

        # Decide on tick positions (e.g., every 10th Mel bin)
        num_ticks = 10
        tick_positions = np.linspace(0, mel_db.shape[0]-1, num=num_ticks, dtype=int)
        tick_labels = np.round(mel_values[tick_positions], decimals=1)

        # Set y-axis ticks and labels to represent Mel frequencies
        p5.yaxis.ticker = FixedTicker(ticks=tick_positions.tolist())
        p5.yaxis.major_label_overrides = {int(pos): f"{label}" for pos, label in zip(tick_positions, tick_labels)}

        # Assign the plot to your plots dictionary
        plots['melspectrogram'] = p5

    # Add STFT spectrogram plot with interpolated frequency axis
    if 'stft_spectrogram' in features_to_plot:
        # Compute STFT for the segment
        stft = librosa.stft(y=y_segment, n_fft=n_fft, hop_length=hop_length_stft)
        stft_db_full = librosa.amplitude_to_db(np.abs(stft), ref=np.max)

        # Compute frequency axis
        freqs_full = np.linspace(0, sr / 2, num=stft_db_full.shape[0])

        # Find indices corresponding to y_min and y_max
        freq_indices = np.where((freqs_full >= y_min) & (freqs_full <= y_max))[0]

        # Subset the STFT and frequency arrays
        stft_db = stft_db_full[freq_indices, :]
        freqs = freqs_full[freq_indices]

        # Define interpolation factor between linear and log
        interp_factor = 0  # Adjust this value between 0 (linear) and 1 (log)

        # Compute linear and logarithmic frequency axes within y_min and y_max
        linear_freqs = np.linspace(y_min, y_max, num=len(freq_indices))
        log_freqs = np.logspace(np.log10(max(y_min, 1)), np.log10(y_max), num=len(freq_indices))

        # Interpolate between linear and logarithmic frequency axes
        interp_freqs = (1 - interp_factor) * linear_freqs + interp_factor * log_freqs

        # Limit the number of ticks (maximum 10)
        num_ticks = min(10, len(interp_freqs))
        tick_indices = np.linspace(0, len(interp_freqs) - 1, num=num_ticks).astype(int)
        tick_locations = interp_freqs[tick_indices]
        tick_labels = [f"{int(freq)} Hz" for freq in tick_locations]

        p7 = figure(
            title=f"STFT Spectrogram (Segment {idx+1})",
            x_axis_label='Time (s)',
            y_axis_label='Frequency (Hz)',
            width=900,
            height=600,
            y_range=(y_min, y_max)
        )

        # Time axis for STFT
        times_stft = librosa.frames_to_time(np.arange(stft_db.shape[1]), sr=sr, hop_length=hop_length_stft) + start_time

        # Create color mapper for the spectrogram
        color_mapper_stft = LinearColorMapper(palette="Viridis256", low=np.min(stft_db), high=np.max(stft_db))

        # Plot the STFT spectrogram
        p7.image(
            image=[stft_db],
            x=times_stft[0],
            y=y_min,
            dw=times_stft[-1] - times_stft[0],
            dh=y_max - y_min,
            color_mapper=color_mapper_stft
        )

        # Manually set y-axis ticks and labels based on interpolated frequencies
        p7.yaxis.ticker = FixedTicker(ticks=tick_locations)
        p7.yaxis.major_label_overrides = {tick: label for tick, label in zip(tick_locations, tick_labels)}

        # Add a color bar
        color_bar_stft = ColorBar(
            color_mapper=color_mapper_stft,
            label_standoff=12,
            location=(0, 0),
            title='dB'
        )
        p7.add_layout(color_bar_stft, 'right')

        plots['stft_spectrogram'] = p7

    # Add specified lines to each plot
    for plot in plots.values():
        # Add custom interval lines if specified
        if 'custom_interval_lines' in lines_to_plot:
            custom_time = start_time_custom
            while custom_time < duration:
                plot.add_layout(Span(location=custom_time, dimension='height', line_color='red', line_dash='dotted', line_width=2))
                custom_time += interval

        # Add cue points if specified
        if 'cuepoints' in lines_to_plot:
            for cuepoint in cuepoint_lists:
                cue_times = cuepoint['times']
                cue_color = cuepoint['color']
                for cue_time in cue_times:
                    # Only add the cuepoint if it falls within the current segment
                    if start_time <= cue_time <= end_time:
                        plot.add_layout(Span(location=cue_time, dimension='height', line_color=cue_color, line_dash='dashed', line_width=2))

    # Combine the plots for this segment into a single column
    segment_layout = column(*plots.values())

    # Add the segment layout to the list of segment layouts
    segment_layouts.append(segment_layout)

# Combine all segment layouts into a single layout
layout = column(*segment_layouts)

# Output the result
show(layout)


The audio file is 201.02 seconds long.


In [7]:
import math

# Given values
start_time_custom = 10.031  # in seconds
interval = 0.4451            # in seconds
max_time = 201              # desired maximum time in seconds

# Step 1: Calculate how many intervals to subtract to get closest to zero
n_intervals = math.floor(start_time_custom / interval)
base_time = start_time_custom - (n_intervals * interval)

# Handle potential negative base_time (if any)
if base_time < 0:
    n_intervals -= 1
    base_time = start_time_custom - (n_intervals * interval)

# Ensure base_time is non-negative
base_time = max(base_time, 0)

# Step 2: Generate the list from base_time to max_time with the given interval
num_steps = math.ceil((max_time - base_time) / interval) + 1  # +1 to include max_time if exact

times_list = [round(base_time + i * interval, 3) for i in range(num_steps)]

# Optional: Trim any values that exceed max_time due to floating-point arithmetic
times_list = [t for t in times_list if t <= max_time]

# Display the result
print(times_list)


[0.239, 0.684, 1.129, 1.574, 2.019, 2.464, 2.909, 3.355, 3.8, 4.245, 4.69, 5.135, 5.58, 6.025, 6.47, 6.915, 7.36, 7.806, 8.251, 8.696, 9.141, 9.586, 10.031, 10.476, 10.921, 11.366, 11.811, 12.257, 12.702, 13.147, 13.592, 14.037, 14.482, 14.927, 15.372, 15.817, 16.262, 16.707, 17.153, 17.598, 18.043, 18.488, 18.933, 19.378, 19.823, 20.268, 20.713, 21.159, 21.604, 22.049, 22.494, 22.939, 23.384, 23.829, 24.274, 24.719, 25.164, 25.61, 26.055, 26.5, 26.945, 27.39, 27.835, 28.28, 28.725, 29.17, 29.615, 30.061, 30.506, 30.951, 31.396, 31.841, 32.286, 32.731, 33.176, 33.621, 34.066, 34.511, 34.957, 35.402, 35.847, 36.292, 36.737, 37.182, 37.627, 38.072, 38.517, 38.963, 39.408, 39.853, 40.298, 40.743, 41.188, 41.633, 42.078, 42.523, 42.968, 43.413, 43.859, 44.304, 44.749, 45.194, 45.639, 46.084, 46.529, 46.974, 47.419, 47.865, 48.31, 48.755, 49.2, 49.645, 50.09, 50.535, 50.98, 51.425, 51.87, 52.316, 52.761, 53.206, 53.651, 54.096, 54.541, 54.986, 55.431, 55.876, 56.321, 56.767, 57.212, 57.657,

In [8]:
import pandas as pd

# Create a DataFrame
df_seq = pd.DataFrame({"onset_times": times_list})


# Define the file path
file_path = 'tekonivel_onsets_bassdrum_projected.csv'

# Save the DataFrame to a CSV file
df_seq.to_csv(file_path, index=False)

# Print the dynamic message
print(f"Sequence has been saved to '{file_path}'")

Sequence has been saved to 'tekonivel_onsets_bassdrum_projected.csv'
