# Use `ffmpeg` to cue and listen to a section of an audio file
This notebook allows you to listen in the browser. <br>

- it supports a wide variety of audio filetypes {WAV, FLAC, MP3, etc.} <br>
- it presumes the user has both [`ffmpeg` and `ffprobe`](https://ffmpeg.org/download.html) accessible for computation purposes <br>

---

First, you may wish to know where the ffmpeg executable is located:

In [None]:
!where ffmpeg

In [None]:
!where ffprobe

#### We must import libaries and define functions

In [None]:
import datetime as dt
import ipywidgets as widgets
from IPython.display import display, Audio
import json
import os
import re
import subprocess

def extract_start_time_from_filename(filename):
    """
    Extracts the start time from the given filename if it matches the datetime pattern.
    
    Args:
        filename (str): The name of the audio file.
    
    Returns:
        datetime.datetime or None: The extracted start time as a datetime object, or None if not found.
    """
    # Define the regex pattern for the datetime format
    datetime_pattern = r'(\d{8})_(\d{6})'  # Matches YYYYMMDD_HHMMSS

    match = re.search(datetime_pattern, filename)
    if match:
        date_str = match.group(1)  # YYYYMMDD
        time_str = match.group(2)  # HHMMSS
        # Combine date and time strings
        datetime_str = f"{date_str}_{time_str}"
        # Convert to datetime object
        start_time = dt.datetime.strptime(datetime_str, "%Y%m%d_%H%M%S")
        return start_time
    return None

# Function to get codec and sample rate using ffprobe
def get_audio_info(ffprobe_path, input_file):
    command = [
        ffprobe_path,
        '-v', 'error',
        '-select_streams', 'a:0',
        '-show_entries', 'stream=codec_name,codec_type,sample_rate',
        '-of', 'json',
        input_file
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    audio_info = json.loads(result.stdout)
    
    # Extract codec and sample rate
    codec = audio_info['streams'][0]['codec_name']
    sample_rate = int(audio_info['streams'][0]['sample_rate'])
    
    return codec, sample_rate

def filepath_selector(directory):

    def get_selected_file(change):
        global selected_file_path  # Use the global variable
        selected_file = change['new']
        if selected_file:  # Check if a file is selected
            selected_file_path = os.path.join(directory, selected_file)  # Construct the full file path
    

    # List files in the specified directory
    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    
    # Create a dropdown widget for file selection
    file_dropdown = widgets.Dropdown(
        options=files,
        description='Select a file:',
        layout=widgets.Layout(width='800px')
    )
    
    # Display the dropdown widget
    display(file_dropdown)
    
    # Variable to store the selected file path
    selected_file_path = None
    
    # Attach the function to the dropdown
    file_dropdown.observe(get_selected_file, names='value')

# Function to extract audio segment
def extract_audio_segment(ffmpeg_path, input_file, start_elapsed, duration, rate, volume=1.0, output_ext="wav", verbose=False):
    output_file = 'extracted_audio.'+output_ext  # Output file name
    command = [
        ffmpeg_path,
        '-y', # allow overwriting the output file
        '-ss', str(start_elapsed),
        '-i', input_file,
        '-t', str(duration),
        '-ar', str(int(rate)),  # Set audio sample rate
        '-filter:a', f'volume={volume}',  # Apply volume amplification
        output_file
    ]
    
    # Run the ffmpeg command
    result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if(verbose):
        print(result.stdout.decode())
        print(result.stderr.decode())
    return output_file


#### (1) Setup your paths + (2) Define the temporal bounds of the desired selection + (3) Select the audio file containing the bounds

In [None]:
# where is `ffmpeg`?
ffmpeg_path = r"C:\Users\DBetchkal\AppData\Local\miniforge3\envs\geosound\Library\bin\ffmpeg.exe"
ffprobe_path = r"C:\Users\DBetchkal\AppData\Local\miniforge3\envs\geosound\Library\bin\ffprobe.exe"

# where are your audio files of interest?
directory = r'F:\Sound Data\2024 GLBABARTC Bartlett Cove\01 DATA\AUDIO'  # Change this to your desired directory of audio files

# between which two times would you like to extract?
#                              YYYY  M  D  H  MM  SS
desired_start_dt = dt.datetime(2024, 6, 7, 18, 10, 13)
desired_end_dt  =  dt.datetime(2024, 6, 7, 18, 16, 35)

filepath_selector(directory) # initializes a global variable `selected_file_path`

#### This block is mostly automatic - it relies on the `IPython.Audio` widget for audio playback

In [None]:
file_begins = extract_start_time_from_filename(selected_file_path)

# user-defined start time and selection duration
start_elapsed = str(int((desired_start_dt - file_begins).total_seconds()))
dur = str(int((desired_end_dt - desired_start_dt).total_seconds()))
# dur = 100 # or, you can just pass in a duration in seconds if preferred

# first we'll use `ffprobe` to find specific information about this audio file
file_codec,file_rate = get_audio_info(ffprobe_path, selected_file_path)

# extract the audio segment
extracted_audio_path = extract_audio_segment(ffmpeg_path, 
                                             selected_file_path, 
                                             start_elapsed, 
                                             duration=dur,
                                             rate=file_rate,   # automatically determined using `ffprobe`
                                             volume=2.0, # many natural audio recordings are faint: a multiplicative gain factor helps (try 2.0 to 4.0x)
                                             output_ext="mp3", # for large sections, "mp3" is faster... for lossless conversion, use "wav"
                                             verbose=False) # note, if you'd like detailed printout information, set to `True`

# play the extracted audio
audio_widget = Audio(extracted_audio_path)
display(audio_widget)