<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Edit-Podcast" data-toc-modified-id="Edit-Podcast-1">Edit Podcast</a></span><ul class="toc-item"><li><span><a href="#Goal" data-toc-modified-id="Goal-1.1">Goal</a></span></li><li><span><a href="#References" data-toc-modified-id="References-1.2">References</a></span></li><li><span><a href="#Imports" data-toc-modified-id="Imports-1.3">Imports</a></span></li><li><span><a href="#Code" data-toc-modified-id="Code-1.4">Code</a></span><ul class="toc-item"><li><span><a href="#Volume-tracks" data-toc-modified-id="Volume-tracks-1.4.1">Volume tracks</a></span></li></ul></li><li><span><a href="#Adjust-tracks-volume" data-toc-modified-id="Adjust-tracks-volume-1.5">Adjust tracks volume</a></span></li><li><span><a href="#Compose-the-program" data-toc-modified-id="Compose-the-program-1.6">Compose the program</a></span></li><li><span><a href="#TODO" data-toc-modified-id="TODO-1.7">TODO</a></span></li></ul></li></ul></div>

# Edit Podcast

## Goal

Automatize podcast edition as much as possible

## References

- https://librosa.org/doc/main/generated/librosa.feature.rms.html
- https://librosa.org/doc/main/generated/librosa.resample.html

## Imports

In [None]:
import os
import librosa
import soundfile as sf
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl

plt.plot()
plt.close('all')
plt.rcParams["figure.figsize"] = (30, 5)
mpl.rcParams['lines.linewidth'] = 1
mpl.rcParams['font.size'] = 16

## Code

### Volume tracks

In [None]:
frame_length = 2048
hop_length = 512
target_sr = 8000

def compute_gains_to_merge_audios(filepaths, target_sr=target_sr, db_goal=-20):
    audios = load_resampled_audios(filepaths, target_sr)
    rms_values = [librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0] for y in tqdm(audios, desc='computing rms values')]
    return optimize_track_gains(rms_values, db_goal)

def load_resampled_audios(filepaths, target_sr):
    audios = [librosa.load(filepath, sr=target_sr, res_type='linear')[0] for filepath in tqdm(filepaths, desc='loading audios')]
    audios = [audio[:len(audios[0])] for audio in audios]
    return audios

In [None]:
def optimize_track_gains(rms_values, db_goal=-20, n_runs=10, factors=[0.8, 1.25, 1.5, 2]):
    gains = [1 for _ in rms_values]
    visualize_tracks_with_gains(rms_values, gains)
    merge = merge_rms(rms_values, gains)
    best_fitness = measure_fitness(merge, db_goal)
    print(f'Initial fitness: {best_fitness:.2f}')

    for _ in tqdm(range(n_runs), desc='Optimizing track gain'):
        for factor in factors:
            for track_idx, _ in enumerate(rms_values):
                new_gains = gains.copy()
                new_gains[track_idx] *= factor
                fitness = measure_fitness(merge_rms(rms_values, new_gains), db_goal)
                if fitness > best_fitness:
                    gains = new_gains
                    best_fitness = fitness
    print(f'Final fitness: {best_fitness:.2f}')
    visualize_tracks_with_gains(rms_values, gains)
    visualize_merged_energy(rms_values, gains)
    return gains

def merge_rms(rms_values, gains):
    merge = (rms_values[0]*gains[0])**2
    for rms, gain in zip(rms_values[1:], gains[1:]):
        merge += (rms*gain)**2
    return np.sqrt(merge)

def measure_fitness(rms, db_goal, goal_width=2):
    lower_bound = db_goal - goal_width
    upper_bound = db_goal + goal_width
    rms_db = librosa.amplitude_to_db(rms)
    fitness = (rms_db > lower_bound) & (rms_db < upper_bound)
    return np.mean(fitness)


def visualize_tracks_with_gains(rms_values, gains):
    bins = np.linspace(-50, 0, 100)
    for idx, rms in enumerate(rms_values):
        label = os.path.splitext(os.path.basename(filepaths[idx]))[0]
        plt.hist(librosa.amplitude_to_db(rms*gains[idx]), bins=bins, alpha=0.5, label=label, density=True)
    plt.hist(librosa.amplitude_to_db(merge_rms(rms_values, gains)), bins=bins, alpha=0.5, label='merge', density=True)
    plt.grid()
    plt.legend(loc=0)
    plt.show()

def visualize_merged_energy(rms_values, gains):
    merge = merge_rms(rms_values, gains)
    t = librosa.frames_to_time(range(len(merge)), sr=target_sr, hop_length=hop_length, n_fft=frame_length)
    plt.plot(t/60, librosa.amplitude_to_db(merge))
    plt.plot(t/60, moving_average(librosa.amplitude_to_db(merge), 100))
    plt.title('RMS Energy')
    plt.xlabel('Time (minutes)')
    plt.ylabel('Energy (dB)')
    plt.grid(axis='y')
    plt.show()

def moving_average(data, window_size):
    weights = np.ones(window_size) / window_size
    return np.convolve(data, weights, 'same')

In [None]:
def merge_audios_with_gains(filepaths, gains):
    audio, sr = None, None
    for filepath, gain in tqdm(zip(filepaths, gains), total=len(filepaths), desc='merging audios'):
        ret = librosa.load(filepath, sr=None)
        new_audio = ret[0]
        if audio is None:
            audio = new_audio*gain
            sr = ret[1]
        else:
            audio[:len(new_audio)] += new_audio[:len(audio)]*gain
    return audio, sr

In [None]:
raise

## Adjust tracks volume

There are many speakers in the podcast and they should have the same volume. When merging the tracks into a single one we would like to see a uniform volume level.

In [None]:
filepaths = """
/mnt/data/other/data/TERTULia/episodios/tertulia_15_hardware/aligned_audios/gbarbadillo.wav
/mnt/data/other/data/TERTULia/episodios/tertulia_15_hardware/aligned_audios/jgoros.wav
/mnt/data/other/data/TERTULia/episodios/tertulia_15_hardware/aligned_audios/vgoni.wav
"""
filepaths = filepaths.strip().splitlines()
print('\n'.join(filepaths))
gains = compute_gains_to_merge_audios(filepaths, db_goal=-30)
print(f'Gains: {gains}')
audio, sr = merge_audios_with_gains(filepaths, gains)
sf.write('temp.wav', audio, sr)

In [None]:
filepaths = """
/mnt/data/other/data/TERTULia/episodios/tertulia_14/aligned_audios/clarriu.wav
/mnt/data/other/data/TERTULia/episodios/tertulia_14/aligned_audios/gbarbadillo.wav
/mnt/data/other/data/TERTULia/episodios/tertulia_14/aligned_audios/iolcoz.wav
/mnt/data/other/data/TERTULia/episodios/tertulia_14/aligned_audios/vgoni.wav
"""
filepaths = filepaths.strip().splitlines()
print('\n'.join(filepaths))
gains = compute_gains_to_merge_audios(filepaths, db_goal=-30)
print(f'Gains: {gains}')
audio, sr = merge_audios_with_gains(filepaths, gains)
sf.write('temp.wav', audio, sr)

## Compose the program

In [None]:
def compose_program(intro_filepath,
                    episode_filepath,
                    output_filepath,
                    intro_music_filepath='/mnt/data/other/data/TERTULia/sound_library/intro_music_v5_auto.mp3',
                    outro_music_filepath='/mnt/data/other/data/TERTULia/sound_library/outro_v5_auto.mp3',
                    intro_music_start_duration=55,
                    intro_music_high_duration=15,
                    outro_music_high_duration=15,
                    sr=48000):
    audio = librosa.load(intro_filepath, sr=sr)[0]
    intro_music = librosa.load(intro_music_filepath, sr=sr)[0]
    crop_intro = intro_music_start_duration - len(audio)/sr
    assert crop_intro > 0
    intro_music = intro_music[int(crop_intro*sr):]
    intro_music[:len(audio)] += audio

    outro_music = librosa.load(outro_music_filepath, sr=sr)[0]

    episode = librosa.load(episode_filepath, sr=sr)[0]
    episode = np.pad(episode, (int(len(audio) + intro_music_high_duration*sr), int(outro_music_high_duration*sr)), 'constant')
    episode[:len(intro_music)] += intro_music
    episode[-len(outro_music):] += outro_music
    print(f'Saving program to {output_filepath}...')
    sf.write(output_filepath, episode, sr)

In [None]:
compose_program(intro_filepath='/mnt/data/other/data/TERTULia/episodios/tertulia_14/raw_audios/20240409-155437_alsa2.wav',
                episode_filepath='temp.wav',
                output_filepath='program.mp3')

In [None]:
compose_program(intro_filepath='/mnt/data/other/data/TERTULia/episodios/tertulia_15_hardware/raw_audios/intro1.m4a',
                episode_filepath='/mnt/data/other/data/TERTULia/episodios/tertulia_15_hardware/curated_audios/part1_v1.wav',
                output_filepath='program.mp3')

## Create video

We have to create a video using the audio of the program and the miniature.

## TODO

- [x] Automatic search of gains to have a good merge audio
- [x] Try with audio from other episodes
- [ ] Find a correspondence between audacity dBs and this notebook dBs.
- [ ] Add a script to compose the program
- [ ] Add a script to create the video for youtube