In [6]:
INPUT_AUDIO_FOLDER = 'inputAudio'
INPUT_AUDIO_FILENAME = 'marine'
OUTPUT_AUDIO_FOLDER = 'outputAudio'

import librosa
import ruptures as rpt
import matplotlib.pyplot as plt
from Utility import df_ops, music_ops, vmd_ops, io_ops, interpolate, cloud_ops
from spleeter.separator import Separator

In [7]:
def analyze_audio(audio_file_path, rms_window_length=0.02, rms_hop_length=0.01, 
                  change_threshold_factor=2.5, cooldown_time=0.2, low_level_percentile=25):
    # Load the audio file
    y, sr = librosa.load(audio_file_path, sr=None)

    # Calculate the RMS energy for each frame
    frame_length = int(rms_window_length * sr)
    hop_length = int(rms_hop_length * sr)
    rms_energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]

    # Smooth the RMS energy using a moving average
    smooth_rms_energy = np.convolve(rms_energy, np.ones(3)/3, mode='valid')

    # Calculate the change in RMS energy between consecutive frames
    rms_change = np.diff(smooth_rms_energy)

    # Define 'low' RMS energy level as a percentile of the RMS energy
    low_rms_level = np.percentile(smooth_rms_energy, low_level_percentile)

    # Define a threshold for significant RMS change
    rms_change_threshold = np.median(np.abs(rms_change)) * change_threshold_factor

    # Find points where RMS change is above the threshold and the previous RMS value is below the 'low' level
    significant_changes = np.where((rms_change > rms_change_threshold) & 
                                   (smooth_rms_energy[:-1] < low_rms_level))[0]

    # Convert frame numbers to time and apply cooldown (debouncing)
    significant_change_times = []
    last_time = 0
    for frame in significant_changes:
        time = librosa.frames_to_time(frame + 1, sr=sr, hop_length=hop_length)
        if time - last_time > cooldown_time:  # Apply cooldown period
            significant_change_times.append(time)
            last_time = time

    return significant_change_times

def generateMovesTimestampsFromAudio(inputAudioFolder, inputAudioFilename, outputAudioFolder):
  # Initialize separator in '2stems' mode.
  separator = Separator('spleeter:2stems')

  # Perform the separation.
  separator.separate_to_file(inputAudioFolder + "/" + inputAudioFilename + ".mp3", outputAudioFolder)
  VocalTrackFilePath = outputAudioFolder + "/" + inputAudioFilename + "/vocals.wav"

  significantAudioHits = analyze_audio(VocalTrackFilePath)

  movesArray = [music_ops.get_frameNumberFromSecond30Fps(x) for x in significantAudioHits]

  

generateMovesTimestampsFromAudio(INPUT_AUDIO_FOLDER, INPUT_AUDIO_FILENAME, OUTPUT_AUDIO_FOLDER)


INFO:tensorflow:Using config: {'_model_dir': 'pretrained_models\\2stems', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.7
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead
INFO:tensorflow:Calling model_fn.
INFO:tensorfl

In [7]:
# Load the audio file and extract a feature
outputVocals = OUTPUT_AUDIO_FOLDER + "/" + INPUT_AUDIO_FILENAME + "/" + "vocals" + ".wav"
inputVocals = INPUT_AUDIO_FOLDER + "/" + INPUT_AUDIO_FILENAME + ".mp3"

y, sr = librosa.load(inputVocals, sr=None)
feature = librosa.feature.rms(y=y)

# Use ruptures to detect change points
algo = rpt.Pelt(model="rbf").fit(feature[0])
result = algo.predict(pen=10)

# Visualize the detected change points on the feature
plt.figure(figsize=(10, 6))
rpt.display(feature[0], [], result)
plt.title("Change Point Detection on Audio Feature")
plt.show()

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


BadSegmentationParameters: 

In [None]:
# Generate a silent track for the length of loaded audio
hop_length = 512

silent_track = np.zeros_like(y)

# Load a beep sound (assumed to be short and sampled at the same rate as the main audio)
beep, _ = librosa.load('beep_test.wav', sr=sr)

# Add a beep sound at detected feature locations
for change_point in result:
  start_sample = change_point * hop_length  # hop_length is the hop size used in feature extraction (default is 512 for RMS)
  end_sample = start_sample + len(beep)
  if end_sample < len(silent_track):
    silent_track[start_sample:end_sample] += beep

# Save the resultant audio
sf.write(OUTPUT_AUDIO_FOLDER + "/" + 'output_with_beeps.wav', silent_track, sr)

In [None]:
print(result[:10])
