# How to generate mp3 files

In [None]:
#| default_exp mp3

In [None]:
#| export

import csv
import os
from pathlib import Path
from subprocess import run, CalledProcessError
import shlex

In [None]:
#| export
def text2wav(
    s: str = "pää on kipeä.",  # Text to convert to WAV
    wav: str = "output.wav",   # Output WAV file path
    model: str = 'models/fi_FI-harri-low.onnx'  # Piper TTS model path
) -> None:
    """Convert text to WAV using Piper TTS."""
    try:
        run(shlex.split(f"piper --model {model} --output_file {wav}"), input=s.encode(), check=True)
    except FileNotFoundError:
        raise RuntimeError(
            "Piper TTS not found. Install: pip install piper-tts\n"
            "Or download from: https://github.com/rhasspy/piper"
        )
    except CalledProcessError as e:
        raise RuntimeError(
            f"Piper TTS failed with exit code {e.returncode}\n"
            f"Check that model file exists: {model}\n"
            f"Error: {e}"
        )

def wav2mp3(
    wav: str = "output.wav",  # Input WAV file path
    mp3: str = "output.mp3"   # Output MP3 file path
) -> None:
    """Convert WAV to MP3 using ffmpeg."""
    try:
        run(shlex.split(f"ffmpeg -hide_banner -loglevel error -y -i {wav} -codec:a libmp3lame -q:a 4 {mp3}"), check=True)
    except FileNotFoundError:
        raise RuntimeError(
            "ffmpeg not found. Install:\n"
            "  Ubuntu/Debian: sudo apt install ffmpeg\n"
            "  macOS: brew install ffmpeg\n"
            "  Windows: Download from https://ffmpeg.org/"
        )
    except CalledProcessError as e:
        raise RuntimeError(
            f"ffmpeg conversion failed with exit code {e.returncode}\n"
            f"Input: {wav}, Output: {mp3}"
        )

def text2mp3(
    s: str = "pää on kipeä.",  # Text to convert to MP3
    mp3: str = "output.mp3",   # Output MP3 file path
    model: str = 'models/fi_FI-harri-low.onnx'  # Piper TTS model path
) -> None:
    """Convert text to MP3 via WAV using Piper TTS and ffmpeg."""
    wav = "output.wav"
    try:
        text2wav(s, wav=wav, model=model)
        wav2mp3(wav=wav, mp3=mp3)
    finally:
        # Clean up temporary WAV file
        if os.path.exists(wav):
            os.remove(wav)

def mp3s(
    tsv: str,                  # Path to TSV file
    output_dir: str = "audio", # Output directory for MP3 files
    model: str = 'models/fi_FI-harri-low.onnx'  # Piper TTS model path
) -> None:
    """Generate MP3 files for all Finnish entries in TSV file."""
    assert Path(tsv).is_file(), f"TSV file not found: {tsv}"
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    with open(tsv, encoding="utf-8") as f:
        for i, row in enumerate(csv.DictReader(f, delimiter="\t")):
            if "Finnish" not in row:
                continue
            finnish_text = row["Finnish"].strip()
            if not finnish_text:
                continue
            text2mp3(s=finnish_text, mp3=f"{output_dir}/{Path(tsv).stem}_{i:02}.mp3", model=model)

## EDA

In [None]:
#| eval: false
Path("audio").mkdir(exist_ok=True)
tsv = Path("tsvs/05_Keho.tsv")
tsv.stem

'05_Keho'

In [None]:
#| eval: false
model = Path('models/fi_FI-harri-low.onnx')

In [None]:
#| eval: false
from suomi.tsv import *

In [None]:
#| eval: false
cattsv(tsv)

FileNotFoundError: [Errno 2] No such file or directory: 'tsvs/05_Keho.tsv'

In [None]:
#| eval: false
data = []
with open(tsv, encoding="utf-8") as f:
    for i,r in enumerate(csv.DictReader(f, delimiter="\t")):
        r = {k:(v or "").strip() for k,v in r.items()}
        r["fname"] = f"{tsv.stem}_{i:02}"
        data.append(r)

data[:2]

## Loop all entries in a TSV file

In [None]:
#| eval: false
import shutil
shutil.rmtree("audio", ignore_errors=True)

In [None]:
#| eval: false
from suomi.core import ffr
for tsv in ffr(['tsvs'], ['.tsv']):
    mp3s(tsv)

## Tests

In [None]:
#| test
# Test: mp3s raises error for non-existent TSV file
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    non_existent_tsv = Path(tmpdir) / "does_not_exist.tsv"
    
    try:
        mp3s(str(non_existent_tsv))
        assert False, "Should have raised AssertionError for non-existent TSV"
    except AssertionError as e:
        assert "TSV file not found" in str(e), f"Error should mention TSV not found: {e}"

print("✓ mp3s non-existent TSV error test passed")

In [None]:
#| test
# Test: mp3s creates output directory if it doesn't exist
import tempfile
from pathlib import Path
import csv

with tempfile.TemporaryDirectory() as tmpdir:
    # Create a test TSV file
    tsv_path = Path(tmpdir) / "test.tsv"
    with open(tsv_path, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=["Finnish", "English", "Japanese"], delimiter="\t")
        writer.writeheader()
        # Empty TSV - just testing directory creation
    
    output_dir = Path(tmpdir) / "audio_output"
    assert not output_dir.exists(), "Output directory should not exist yet"
    
    # This should create the directory (even with empty TSV)
    mp3s(str(tsv_path), output_dir=str(output_dir))
    
    assert output_dir.exists(), "Output directory should be created"
    assert output_dir.is_dir(), "Output path should be a directory"

print("✓ mp3s directory creation test passed")

In [None]:
#| test
# Test: text2mp3 cleans up temporary WAV file even on error
from unittest.mock import patch, MagicMock
import tempfile
from pathlib import Path
import os
from subprocess import CalledProcessError

with tempfile.TemporaryDirectory() as tmpdir:
    wav_file = Path(tmpdir) / "output.wav"
    mp3_file = Path(tmpdir) / "output.mp3"
    
    # Mock run to create WAV on first call, then fail on second call
    call_count = [0]  # Use list to avoid nonlocal scope issue
    def mock_run(*args, **kwargs):
        call_count[0] += 1
        if call_count[0] == 1:
            # First call (piper) - create the WAV file
            wav_file.touch()
        else:
            # Second call (ffmpeg) - simulate failure
            raise CalledProcessError(1, "ffmpeg")
    
    # Change to temp directory
    old_cwd = os.getcwd()
    try:
        os.chdir(tmpdir)
        with patch('__main__.run', side_effect=mock_run):
            try:
                text2mp3("test text", str(mp3_file))
                assert False, "Should have raised RuntimeError"
            except RuntimeError:
                # WAV file should be cleaned up even though conversion failed
                assert not Path("output.wav").exists(), "WAV file should be cleaned up after error"
    finally:
        os.chdir(old_cwd)

print("✓ text2mp3 WAV cleanup on error test passed")

In [None]:
#| test
# Test: wav2mp3 raises helpful error when conversion fails
from unittest.mock import patch
from subprocess import CalledProcessError

# Mock run in the notebook's namespace
mock_error = CalledProcessError(1, "ffmpeg")
with patch('__main__.run', side_effect=mock_error):
    try:
        wav2mp3("input.wav", "output.mp3")
        assert False, "Should have raised RuntimeError"
    except RuntimeError as e:
        error_msg = str(e)
        assert "ffmpeg conversion failed" in error_msg, f"Error should mention conversion failure: {error_msg}"
        assert "input.wav" in error_msg, f"Error should mention input file: {error_msg}"
        assert "output.mp3" in error_msg, f"Error should mention output file: {error_msg}"

print("✓ wav2mp3 error handling for conversion failure test passed")

In [None]:
#| test
# Test: wav2mp3 raises helpful error when ffmpeg not found
from unittest.mock import patch
import builtins

# Mock run in the notebook's namespace
with patch('__main__.run', side_effect=FileNotFoundError("ffmpeg not found")):
    try:
        wav2mp3("test.wav", "test.mp3")
        assert False, "Should have raised RuntimeError"
    except RuntimeError as e:
        error_msg = str(e)
        assert "ffmpeg not found" in error_msg, f"Error should mention ffmpeg: {error_msg}"
        assert "apt install ffmpeg" in error_msg or "brew install ffmpeg" in error_msg, \
            f"Error should include installation instructions: {error_msg}"

print("✓ wav2mp3 error handling for missing ffmpeg test passed")

In [None]:
#| test
# Test: text2wav raises helpful error when model file missing
from unittest.mock import patch, MagicMock
from subprocess import CalledProcessError

# Mock run in the notebook's namespace
mock_error = CalledProcessError(1, "piper")
with patch('__main__.run', side_effect=mock_error):
    try:
        text2wav("test", "test.wav", model="nonexistent.onnx")
        assert False, "Should have raised RuntimeError"
    except RuntimeError as e:
        error_msg = str(e)
        assert "Piper TTS failed" in error_msg, f"Error should mention Piper TTS failure: {error_msg}"
        assert "nonexistent.onnx" in error_msg, f"Error should mention the model file: {error_msg}"

print("✓ text2wav error handling for missing model test passed")

In [None]:
#| test
# Test: text2wav raises helpful error when piper not found
from unittest.mock import patch, MagicMock
from subprocess import run, CalledProcessError
import tempfile
from pathlib import Path

# Mock run in the notebook's namespace
with patch('__main__.run', side_effect=FileNotFoundError("piper not found")):
    try:
        text2wav("test", "test.wav")
        assert False, "Should have raised RuntimeError"
    except RuntimeError as e:
        error_msg = str(e)
        assert "Piper TTS not found" in error_msg, f"Error message should mention Piper TTS: {error_msg}"
        assert "pip install piper-tts" in error_msg, f"Error should include installation instructions: {error_msg}"

print("✓ text2wav error handling for missing piper test passed")

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()