In [1]:
import json
import pandas as pd
import requests
import os
import subprocess
import re
import shutil

In [2]:
metadata_path = "/shared/3/projects/benlitterer/podcastData/processed/mayJune/mayJuneDataClean.jsonl"
download_path = "/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache"
output_path = "/shared/3/projects/bangzhao/prosodic_embeddings/mfa/output"
# Total rows: 1,124,058

In [3]:
def clean_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Remove content between square brackets and parentheses (including the brackets and parentheses themselves)
    cleaned_text = re.sub(r'\[.*?\]', '', text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\(.*?\)', '', cleaned_text, flags=re.IGNORECASE)
    # Remove the '>>' mark
    cleaned_text = re.sub(r'>>', '', cleaned_text, flags=re.IGNORECASE)
    
    # Remove extra spaces left after removing patterns
    cleaned_text = re.sub(r'\s{2,}', ' ', cleaned_text)  # Replace multiple spaces with a single space
    cleaned_text = cleaned_text.strip()  # Remove leading and trailing whitespace

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(cleaned_text)

def clean_directory(directory_path):
    for root, _, files in os.walk(directory_path):
        for filename in files:
            if filename.endswith('.txt'):
                file_path = os.path.join(root, filename)
                clean_text_file(file_path)
                print(f"Processed {file_path}")

In [8]:
def count_rows(file_path):
    """Count the total number of rows in a JSONL file."""
    with open(file_path, 'r', encoding='utf-8') as file:
        return sum(1 for _ in file)
    

def download_mp3(json_object, i, download_directory):
    """Download MP3 file from the URL specified in the JSON object."""
    mp3_url = json_object['enclosure']

    response = requests.get(mp3_url, stream=True)
    if response.status_code == 200:
        mp3_filename = os.path.join(download_directory, f"{i}.mp3")
        with open(mp3_filename, 'wb') as mp3_file:
            for chunk in response.iter_content(chunk_size=8192):
                mp3_file.write(chunk)
        print(f"Downloaded {mp3_filename}")
        return True
    else:
        print(f"Failed to download {mp3_url}, status code: {response.status_code}")
    return False

        
def download_transcript(json_object, i, download_directory):
    """Write the transcript from the JSON object to a text file in the download directory."""
    transcript = json_object["transcript"]
    transcript_filename = os.path.join(download_directory, f"{i}.txt")
    with open(transcript_filename, 'w', encoding='utf-8') as transcript_file:
        transcript_file.write(transcript)
    print(f"Transcript saved as {transcript_filename}")
    
    
def get_prosody(json_object):
    prosodic_path = "/shared/3/projects/benlitterer/podcastData/prosodyMerged/floydMonth" + json_object['potentialOutPath']
    prosodic_feature = pd.read_csv(prosodic_path)
    prosodic_feature = prosodic_feature.drop(columns=['Unnamed: 0'])
    return prosodic_feature


def convert_mp3_to_wav(directory):
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".mp3"):
            mp3_path = os.path.join(directory, filename)
            wav_path = os.path.splitext(mp3_path)[0] + '.wav'
            
            # Construct the ffmpeg command
            command = ['ffmpeg', '-y', '-i', mp3_path, wav_path]

            # Execute the command
            try:
                subprocess.run(command, check=True)
                print(f"Converted {mp3_path} to {wav_path}")
                # Remove the original MP3 file after conversion
                os.remove(mp3_path)
                print(f"Removed original MP3 file: {mp3_path}")
            except subprocess.CalledProcessError as e:
                print(f"Failed to convert {mp3_path}: {e}")
                

def run_mfa_align(input_path, acoustic_model, dictionary, output_path, beam, retry_beam, num_jobs):
    # Convert the output path to an absolute path
    # absolute_output_path = os.path.abspath(output_path)
    
    # Construct the MFA align command with the absolute path
    command = [
        'mfa', 'align', '--clean', input_path, acoustic_model, dictionary,
        output_path, '--beam', str(beam), '--retry_beam', 
        str(retry_beam), '--num_jobs', str(num_jobs), '--single_speaker'
    ]

    # Print the command to verify
    print("Running command:", ' '.join(command))

    # Execute the command
    try:
        subprocess.run(command, check=True)
        print("Command executed successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Failed to execute command: {e}")
        

def parse_textgrid(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    tier_name = None
    tier_type = None
    start_time = None
    end_time = None
    label = None
    in_intervals = False

    for line in lines:
        line = line.strip()
        
        if line.startswith('name = '):
            tier_name = line.split('=')[1].strip().strip('"')
        elif line.startswith('class = "IntervalTier"'):
            tier_type = 'IntervalTier'
        elif line.startswith('class = "TextTier"'):
            tier_type = 'TextTier'
        elif line.startswith('intervals:'):
            in_intervals = True
        elif line.startswith('points:'):
            in_intervals = False
        elif in_intervals and line.startswith('xmin ='):
            start_time = float(line.split('=')[1].strip())
        elif in_intervals and line.startswith('xmax ='):
            end_time = float(line.split('=')[1].strip())
        elif in_intervals and line.startswith('text ='):
            label = line.split('=')[1].strip().strip('"')
            data.append([tier_name, start_time, end_time, label])
        elif tier_type == 'TextTier' and line.startswith('number ='):
            time = float(line.split('=')[1].strip())
            label = 'point'  # Placeholder, replace with actual point data if needed
            data.append([tier_name, time, time, label])

    return data


def convert_textgrids_to_csv(directory):
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".TextGrid"):
            textgrid_path = os.path.join(directory, filename)
            csv_path = os.path.splitext(textgrid_path)[0] + '.csv'

            # Parse the TextGrid file
            data = parse_textgrid(textgrid_path)

            # Create a DataFrame
            df = pd.DataFrame(data, columns=['Tier', 'Start Time', 'End Time', 'Label'])

            # Save the DataFrame to a CSV file
            df.to_csv(csv_path, index=False)
            print(f"Converted {textgrid_path} to {csv_path}")

            # Remove the original TextGrid file
            os.remove(textgrid_path)
            print(f"Removed original TextGrid file: {textgrid_path}")
    

def process_batch(download_directory):
    # clean the transcripts
    clean_directory(download_directory)   
    convert_mp3_to_wav(download_directory)

    acoustic_model = 'english_us_arpa'
    dictionary = 'english_us_arpa'
    beam = 10
    retry_beam = 40
    num_jobs = 12

    run_mfa_align(download_directory, acoustic_model, dictionary, output_path, beam, retry_beam, num_jobs)
    convert_textgrids_to_csv(output_path)

    # Remove all files under the download directory
    for filename in os.listdir(download_directory):
        file_path = os.path.join(download_directory, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f'Failed to delete {file_path}. Reason: {e}')    
            
            
def get_json_objects(file_path, download_directory, batch_size=12):
    """Read JSONL file and process each JSON object."""
    # total_rows = count_rows(file_path)
    # print(f"Total rows: {total_rows}")
    downloaded_files = 0
    
    with open(file_path, 'r', encoding='utf-8') as file:
        
        for i, line in enumerate(file):
            if 1:
                json_object = json.loads(line)
                #print(json.dumps(json_object, indent=4))
                
                # Construct the prosodic path and read the CSV file
                # get_prosody(json_object)

                # Download the MP3 file
                success = download_mp3(json_object, i, download_directory)
                if success:
                    downloaded_files += 1
                download_transcript(json_object, i, download_directory)
                print(success)
            
            else:
                print(f"An error occurred at line {i}")

            if i % 100000 == 0:
                print(f"Processed {i} lines...")
        
            if downloaded_files >= batch_size:
                process_batch(download_directory)    
                downloaded_files = 0
                break
        
        # Process any remaining files in the last batch
        if downloaded_files > 0:
            process_batch(download_directory)

        return i

In [9]:
total_rows_processed = get_json_objects(metadata_path, download_path)
print(f"Total rows processed: {total_rows_processed}")

Failed to download https://www.buzzsprout.com/783020/4252475-best-of-singout-speakout-no-3.mp3, status code: 403
Transcript saved as /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_0.txt
False
Processed 0 lines...
Failed to download https://www.buzzsprout.com/783020/4165286-it-s-all-gone.mp3, status code: 403
Transcript saved as /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_1.txt
False
Failed to download https://www.buzzsprout.com/783020/3983942-today-is-yesterday.mp3, status code: 403
Transcript saved as /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_2.txt
False
Failed to download https://www.buzzsprout.com/783020/3892169-saturn-return.mp3, status code: 403
Transcript saved as /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_3.txt
False
Failed to download https://www.buzzsprout.com/783020/3791501-quarterlife-crisis.mp3, status code: 403
Transcript saved as /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_4.txt

ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libo

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_5.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_5.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_5.mp3


Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_6.wav':
  Metadata:
    ISFT            : Lavf61.1.100
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
      Metadata:
        encoder         : Lavc61.3.100 pcm_s16le
[out#0/wav @ 0x55c93d905e40] video:0KiB audio:590576KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000013%
size=  590576KiB time=00:57:08.28 bitrate=1411.2kbits/s speed= 144x    
ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-li

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_6.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_6.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_6.mp3


[mp3 @ 0x559221d20b40] Estimating duration from bitrate, this may be inaccurate
Input #0, mp3, from '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_7.mp3':
  Metadata:
    album_artist    : California Community Colleges Chancellor’s Office
    track           : 33
    genre           : Vocal
    title           : CCC20033.mp3  Episode 33  "College Futures Foundation"  25:58 TRT
    artist          : Eloy Ortiz Oakley, Monica Lozano
    album           : California Community Colleges Podast
    date            : 2020
  Duration: 00:25:56.74, start: 0.000000, bitrate: 323 kb/s
  Stream #0:0: Audio: mp3 (mp3float), 44100 Hz, stereo, fltp, 320 kb/s
  Stream #0:1: Video: png, rgba(pc, gbr/unknown/unknown), 682x684, 90k tbr, 90k tbn (attached pic)
      Metadata:
        comment         : Other
Stream mapping:
  Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native))
Press [q] to stop, [?] for help
Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cac

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_7.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_7.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_7.mp3


Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_8.wav':
  Metadata:
    INAM            : 2.20: The NHL Will Continue Until Morale Improves
    ICMT            : So phase 2 beats on, himbos against the current, borne back ceaselessly into a league-wide lockdown.
    lyrics-ENG      : <p>The girls are back in town once again because the NHL cannot stop bungling their increasingly pathetic attempt to reenact the 2020 Stanley Cup Playoffs in Las Vegas. This week, 11 players tested positive (for corona, not cooties), Roman Polak yeeted fr
                    : 
    IPRD            : Puck Bunnies
    IGNR            : Podcast
    ICRD            : 2021
    ISFT            : Lavf61.1.100
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
      Metadata:
        encoder         : Lavc61.3.100 pcm_s16le
[out#0/wav @ 0x557bad45e700] video:0KiB audio:412155KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing ove

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_8.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_8.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_8.mp3


[out#0/wav @ 0x5645dd726740] video:0KiB audio:1720449KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000190%
size= 1720452KiB time=02:46:27.18 bitrate=1411.2kbits/s speed= 180x    
ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_15.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_15.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_15.mp3


Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_16.wav':
  Metadata:
    ICMT            : 
    album_artist    : Bryce Blankenagel
    INAM            : Ep 48 - The Negro: A Proclamation to the World; Case for BoM Final
    IPRD            : Glass Box Podcast
    IART            : Bryce Blankenagel/Braden Hamm
    IGNR            : Religion & Spirituality
    publisher       : Ground Gnomes LLC
    ICRD            : 2020
    ISFT            : Lavf61.1.100
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
      Metadata:
        encoder         : Lavc61.3.100 pcm_s16le
[out#0/wav @ 0x55e29a0d2940] video:0KiB audio:1595637KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000016%
size= 1595637KiB time=02:34:22.65 bitrate=1411.2kbits/s speed= 185x    
ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --pre

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_16.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_16.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_16.mp3


[mp3 @ 0x55f354744b40] Estimating duration from bitrate, this may be inaccurate
Input #0, mp3, from '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_17.mp3':
  Metadata:
    album_artist    : Bryce Blankenagel
    title           : Ep 47 - Haven’t We Done This Already?
    comment         : Update on COVID-19 time! The Mormon church is taking steps to reopen wards around the globe while there’s no evidence that we’ve effectively flattened the curve. They’re also investing some resources into making PPE for medical workers in the form o 
                    :  
                    : Angels Trumpet: 
                    : https://www.sltrib.com/religion/2020/05/20/lds-church-sells-exxon/ 
                    :  
                    : https://www.sltrib.com/religion/2020/05/19/lds-church-unveils-plan/ 
                    :  
                    : https://newsroom.churchofjesuschrist.org/article/global-sacred-clothing-production-facilities-temporarily-produce-masks-gowns 
 

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_17.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_17.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_17.mp3


ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libo

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_18.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_18.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_18.mp3


Unknown attached picture mimetype: image/, skipping.
[mp3 @ 0x564e86405b40] Estimating duration from bitrate, this may be inaccurate
Input #0, mp3, from '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_19.mp3':
  Metadata:
    title           : Opioid Addictions- It Can Happen To Anyone
    lyrics-ENG      : Summary Placeholder Episode ID: 15497780
    album           : Getufit with Irene- Helping you lead a healthy lifestyle. 
    genre           : Podcast
    date            : 2023
  Duration: 00:27:00.11, start: 0.000000, bitrate: 128 kb/s
  Stream #0:0: Audio: mp3 (mp3float), 44100 Hz, stereo, fltp, 128 kb/s
Stream mapping:
  Stream #0:0 -> #0:0 (mp3 (mp3float) -> pcm_s16le (native))
Press [q] to stop, [?] for help
Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_19.wav':
  Metadata:
    INAM            : Opioid Addictions- It Can Happen To Anyone
    lyrics-ENG      : Summary Placeholder Episode ID: 15497780
    IPRD            : Ge

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_19.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_19.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_19.mp3


Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_20.wav':
  Metadata:
    INAM            : Why Diets Fail- Trust the Process
    lyrics-ENG      : Summary Placeholder Episode ID: 15130404
    IPRD            : Getufit with Irene- Helping you lead a healthy lifestyle. 
    IGNR            : Podcast
    ICRD            : 2023
    ISFT            : Lavf61.1.100
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
      Metadata:
        encoder         : Lavc61.3.100 pcm_s16le
[out#0/wav @ 0x560374833b00] video:0KiB audio:279090KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000076%
size=  279090KiB time=00:27:00.11 bitrate=1411.2kbits/s speed= 138x    
ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_h_env_placehold_placehold_placeh

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_20.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_20.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_20.mp3


[out#0/wav @ 0x55ddd27ecb00] video:0KiB audio:279090KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000076%
size=  279090KiB time=00:27:00.11 bitrate=1411.2kbits/s speed= 167x    
ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-7)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1716729489913/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_17167

Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_21.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_21.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_21.mp3


Output #0, wav, to '/shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_22.wav':
  Metadata:
    INAM            : Intermittent Fasting - Yay or Nay?
    lyrics-ENG      : Summary Placeholder Episode ID: 14175014
    IPRD            : Getufit with Irene- Helping you lead a healthy lifestyle. 
    IGNR            : Podcast
    ICRD            : 2023
    ISFT            : Lavf61.1.100
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, stereo, s16, 1411 kb/s
      Metadata:
        encoder         : Lavc61.3.100 pcm_s16le
[out#0/wav @ 0x56485a9f5b00] video:0KiB audio:279090KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000077%
size=  279090KiB time=00:27:00.11 bitrate=1411.2kbits/s speed= 152x    


Converted /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_22.mp3 to /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_22.wav
Removed original MP3 file: /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache/file_22.mp3
Running command: mfa align --clean /shared/3/projects/bangzhao/prosodic_embeddings/mfa/cache english_us_arpa english_us_arpa /shared/3/projects/bangzhao/prosodic_embeddings/mfa/output --beam 10 --retry_beam 40 --num_jobs 12 --single_speaker


[2;36m [0m[32mINFO    [0m Setting up corpus information[33m...[0m                                      
[2;36m [0m[32mINFO    [0m Loading corpus from source files[33m...[0m                                   


[2K[35m  12%[0m [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12/100 [0m [ [33m0:00:02[0m < [36m0:01:56[0m , [31m1 it/s[0m ]
[?25h

[2;36m [0m[32mINFO    [0m Found [1;36m1[0m speaker across [1;36m12[0m files, average number of utterances per     
[2;36m [0m         speaker: [1;36m12.0[0m                                                         
[2;36m [0m[32mINFO    [0m Initializing multiprocessing jobs[33m...[0m                                  
[2;36m [0m[32mINFO    [0m Normalizing text[33m...[0m                                                   


[2K[35m 100%[0m [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12/12 [0m [ [33m0:00:01[0m < [36m0:00:00[0m , [31m8 it/s[0m ] [31m7 it/s[0m ]
[?25h

[2;36m [0m[32mINFO    [0m Generating MFCCs[33m...[0m                                                   


[2K[35m 100%[0m [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12/12 [0m [ [33m0:15:54[0m < [36m0:00:00[0m , [31m? it/s[0m ] [31m? it/s[0m ]
[?25h

[2;36m [0m[32mINFO    [0m Calculating CMVN[33m...[0m                                                   
[2;36m [0m[32mINFO    [0m Generating final features[33m...[0m                                          


[2K[35m 100%[0m [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12/12 [0m [ [33m0:00:05[0m < [36m0:00:00[0m , [31m3 it/s[0m ] [31m1 it/s[0m ]
[?25h

[2;36m [0m[32mINFO    [0m Creating corpus split[33m...[0m                                              


[2K[35m 100%[0m [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12/12 [0m [ [33m0:00:01[0m < [36m0:00:00[0m , [31m9 it/s[0m ] [31m? it/s[0m ]
[?25h

[2;36m [0m[32mINFO    [0m Compiling training graphs[33m...[0m                                          
[2;36m [0m[32mINFO    [0m Performing first-pass alignment[33m...[0m                                    
[2;36m [0m[32mINFO    [0m Generating alignments[33m...[0m                                              


[2K[35m  75%[0m [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━[0m [32m9/12 [0m [ [33m0:16:07[0m < [36m-:--:--[0m , [31m? it/s[0m ]


KeyboardInterrupt

