In [12]:
import subprocess
import os
import csv
from math import ceil


In [26]:
ignore_last = 0

In [13]:
def get_audio_duration(file_path):
    try:
        result = subprocess.run([
                'ffprobe',
                '-i', file_path,
                '-show_entries', 'format=duration',
                '-v', 'quiet',
                '-of', 'csv=p=0'],
            capture_output=True,text=True)
        return round(float(result.stdout.strip()), 2)  # Rounded to 2 decimal places
    except Exception as e:
        print(f"Error getting duration for {file_path}: {e}")
        return 0

In [14]:
def get_audio_sample_rate(file_path):
    try:
        result = subprocess.run([
                'ffprobe',
                '-i', file_path,
                '-show_entries', 'stream=sample_rate',
                '-v', 'quiet',
                '-of', 'csv=p=0'],
            capture_output=True, text=True)
        return int(result.stdout.strip())
    except Exception as e:
        print(f"Error getting sample rate for {file_path}: {e}")
        return 0

In [15]:
def mp3_to_wav(audio_clips, input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for clip in audio_clips:
        clip_name = clip['Name']
        mp3_path = f"{input_folder}/{clip_name}.mp3"
        wav_path = f"{output_folder}/{clip_name}.wav"

        if not os.path.exists(mp3_path):
            print(f"{mp3_path} does not exist!")
            continue

        if not os.path.exists(wav_path):
            subprocess.call([
                'ffmpeg',
                '-i', mp3_path,
                wav_path])
            print(f"Converted {clip_name}.mp3 to {clip_name}.wav")
        else:
            print(f"{clip_name}.wav already exists in {output_folder}")

In [16]:
def is_already_in_csv(csv_output, file_path):
    if not os.path.exists(csv_output):
        return False
    
    try:
        with open(csv_output, mode='r', newline='', encoding='utf-8-sig') as csvfile:
            csv_reader = csv.DictReader(csvfile)
            for row in csv_reader:
                if row['Path'] == file_path:
                    return True
    except UnicodeDecodeError:
        print(f"UnicodeDecodeError with utf-8-sig for {csv_output}, trying latin-1...")
        try:
            with open(csv_output, mode='r', newline='', encoding='latin-1') as csvfile:
                csv_reader = csv.DictReader(csvfile)
                for row in csv_reader:
                    if row['Path'] == file_path:
                        return True
        except Exception as e:
            print(f"Error reading CSV file {csv_output}: {e}")
    return False

In [None]:
# Chop WAV files into smaller chunks and append their paths and captions to a CSV file.
def chop_wav(audio_clips, input_folder, output_folder, csv_output, min_duration=7, max_duration=10):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Check if the CSV file already exists
    file_exists = os.path.isfile(csv_output)

    # Open the CSV in append mode
    with open(csv_output, mode='a', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)

        # Write the header only if the file doesn't already exist
        if not file_exists:
            csv_writer.writerow(['Path', 'TrueCaption', 'Generated', 'Model', 'Duration', 'SampleRate'])

        for clip in audio_clips:
            clip_name = clip['Name']
            caption = clip['Caption']
            input_path = f"{input_folder}/{clip_name}.wav"

            if not os.path.exists(input_path):
                print(f"{input_path} does not exist!")
                continue

            # Get original duration and sample rate of the WAV file
            total_duration = get_audio_duration(input_path)
            sample_rate = get_audio_sample_rate(input_path)

            if total_duration == 0 or sample_rate == 0:
                continue

            # Adjust duration to exclude the last 15 seconds
            total_duration = max(0, total_duration - 15)

            # Determine the number of chunks based on the max duration
            num_chunks = ceil(total_duration / max_duration)

            for i in range(num_chunks):
                start_time = i * max_duration
                if start_time >= total_duration:
                    break

                output_path = f"{output_folder}/{clip_name}_part{i + 1}.wav"

                # Check if this specific chunk already exists in CSV
                if is_already_in_csv(csv_output, output_path):
                    print(f"{output_path} already exists in {csv_output}, skipping.")
                    continue

                subprocess.call([
                    'ffmpeg',
                    '-i', input_path,
                    '-ss', str(start_time),
                    '-t', str(min(max_duration, total_duration - start_time)),
                    '-acodec', 'pcm_s16le',
                    '-ar', '44100',
                    '-ac', '2',
                    output_path])
                print(f"Created {output_path}")

                # Get the actual duration of the chopped clip
                clip_duration = get_audio_duration(output_path)

                # Append path, caption, and audio metadata to the CSV
                csv_writer.writerow([output_path, caption, 0, '', clip_duration, sample_rate])

In [None]:
# Read the audio clips and captions from a CSV file.
def get_clips(filename):
    clips = []
    try:
        with open(filename, mode='r', newline='', encoding='utf-8-sig') as file:
            reader = csv.DictReader(file)
            print(f"Headers found: {reader.fieldnames}")

            if "Name" not in reader.fieldnames or "Caption" not in reader.fieldnames:
                raise ValueError("CSV file does not contain 'Name' and 'Caption' columns.")

            for row in reader:
                if row["Name"] and row["Caption"]:
                    clips.append({"Name": row["Name"], "Caption": row["Caption"]})

    except Exception as e:
        print(f"Error reading clips file:\n{e}")
        return []

    print(f"{len(clips)} clips found.")
    return clips

In [21]:
audio_clips = get_clips("OnlineAudioClips.csv")

Headers found: ['Name', 'Caption']
24 clips found.


In [22]:
if __name__ == "__main__":
    main(audio_clips)

Converted V_online_1.mp3 to V_online_1.wav
Converted V_online_2.mp3 to V_online_2.wav
Converted V_online_3.mp3 to V_online_3.wav
Converted V_online_4.mp3 to V_online_4.wav
Converted V_online_5.mp3 to V_online_5.wav
Converted V_online_6.mp3 to V_online_6.wav
Converted V_online_7.mp3 to V_online_7.wav
Converted V_online_8.mp3 to V_online_8.wav
Converted V_online_9.mp3 to V_online_9.wav
Converted V_online_10.mp3 to V_online_10.wav
Converted V_online_11.mp3 to V_online_11.wav
Converted V_online_12.mp3 to V_online_12.wav
Converted V_online_13.mp3 to V_online_13.wav
Mp3Clips/V_online_14.mp3 does not exist!
Mp3Clips/V_online_15.mp3 does not exist!
Mp3Clips/V_online_16.mp3 does not exist!
Mp3Clips/V_online_17.mp3 does not exist!
Mp3Clips/V_online_18.mp3 does not exist!
Mp3Clips/V_online_19.mp3 does not exist!
Mp3Clips/V_online_20.mp3 does not exist!
Mp3Clips/V_online_21.mp3 does not exist!
Mp3Clips/V_online_22.mp3 does not exist!
Mp3Clips/V_online_23.mp3 does not exist!
Converted VP_online_1.m