In [1]:
import base64

def audio_to_base64(audio_file_path: str) -> str:
    """Convert an audio file to a base64 encoded string."""
    with open(audio_file_path, "rb") as audio_file:
        audio_bytes = audio_file.read()
        base64_string = base64.b64encode(audio_bytes).decode("utf-8")
    return base64_string


def base64_to_audio(base64_string: str, output_file_path: str):
    """Convert a base64 string back to audio bytes and save as a file."""
    audio_bytes = base64.b64decode(base64_string.encode("utf-8"))
    with open(output_file_path, "wb") as audio_file:
        audio_file.write(audio_bytes)


if __name__ == "__main__":
    # Example usage
    input_audio = "yoruba_short_1.wav"      # replace with your audio file
    output_audio = "decoded_audio.wav"   # file to save decoded result
    
    # Convert to base64
    encoded = audio_to_base64(input_audio)
    print("Base64 string (first 100 chars):", encoded[:100], "...")

    # Convert back from base64
    base64_to_audio(encoded, output_audio)
    print(f"Decoded audio saved as: {output_audio}")


Base64 string (first 100 chars): AAAAHGZ0eXBNNEEgAAACAE00QSBpc29taXNvMgAAAAhmcmVlAACAHG1kYXTcAExhdmM2Mi4wLjEwMQAB1GAy6mIlXV5vqnmSu+mu ...
Decoded audio saved as: decoded_audio.wav


In [3]:
test = audio_to_base64("test_tts.wav")

In [7]:
test

'AAAAHGZ0eXBNNEEgAAACAE00QSBpc29taXNvMgAAAAhmcmVlAABjom1kYXTcAExhdmM2Mi4wLjEwMQAB1KARIXSrDRBE3dcC84vvrc1qaq85l5VT1YiqPsIEjPA2JhmOZtAhhW41cYkagz8ERDgEnYYmlErsY3sl+/8Hc6h95hfg7nkML8HHZY/eeWf/Z3nU1tsquC3iS/uZR95I7lnlu+AqLaMwmikvgIvAaQaWpqQqwTTWcVgQWPe37bOD0tSNLfaNZ5K5g0Y2Ig/JxCX1wztv1dfxe30/AaKgrCijhsHr5BuD29gjsFIYmsrpuEIlsaRYO6fGZ4lkMy1HB1aFhuOMTUr/DPLtA150w4AA2pWailNgbNcKQNmORNuNpfxUD29v71qt+TXqvSp17fv/T8/v+njnX1+H8Dy2GwQ2fOYquXwdf1V/xH/5d9Yb34PDm/83/SUGT5iA6BB18U2fSnjEVyQuk73Pwc/Yy4bD0xA5mTb/RrY8L09Oof4vSHrf8ttz5p8EzLzUO/tqJFCdKTm4L6WHUM1dUfwdfPx7gWxtCrBykenmbTVH3e/kWaLc3fmC/x0ZzUL5BnO+8CiWMzZeam+6RE3799lZMEhTtF70Pk2lxkbhjI3F5EfS28d0zM7UiA3AAQiY2s1jWoDZZVRlllUYiwNmOBWkC/aKySOvr/v/2Af+XnXv/8AP+/x+f9SuZw/9ib9A335v3oEgiTG/USUHCwEFIz6lbYmTzK2cr+aQ25CG2zJGZEJax3gpLbZUnbniwYc6Ke5aa6XaSovaEgf6NZcY8YS5ew2cLHpJ3hNqb2KV2/nvjgOfe6raLxf/rS1yU4qtuWCiYvRiudfwzW7In+MKoNevXq1QDXr1z1atQhip5LIRMB5Bk6oR1+T7Xpsvpd2KzS0wVAvAOQxua2K2TaDnTKsSFd1eE2Wte1KLjLnO1XKNe2Z+ccbmF4XzK00yaahDDB5oOUQ/wRBrAT0k/jNSAhkZSMq

In [5]:
output_file_path = "sanitizer_test.wav"

In [None]:
base64_to_audio(test, output_file_path)

In [3]:
import base64
import mimetypes

def encode_audio_to_base64(file_path: str) -> str:
    """Encode audio file to base64 string."""
    with open(file_path, "rb") as f:
        audio_bytes = f.read()
    return base64.b64encode(audio_bytes).decode("utf-8")


def decode_base64_to_audio(base64_str: str, output_file: str):
    """Decode base64 string back to audio file."""
    audio_bytes = base64.b64decode(base64_str.encode("utf-8"))
    with open(output_file, "wb") as f:
        f.write(audio_bytes)


def encode_audio_to_data_uri(file_path: str) -> str:
    """Encode audio file to data URI format (with mime type)."""
    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        raise ValueError(f"Could not determine MIME type for {file_path}")
    
    base64_str = encode_audio_to_base64(file_path)
    return f"data:{mime_type};base64,{base64_str}"


def decode_data_uri_to_audio(data_uri: str, output_file: str):
    """Decode data URI back to audio file."""
    # Format: data:<mime_type>;base64,<base64_data>
    header, base64_data = data_uri.split(",", 1)
    audio_bytes = base64.b64decode(base64_data.encode("utf-8"))
    with open(output_file, "wb") as f:
        f.write(audio_bytes)


if __name__ == "__main__":
    input_audio = "yoruba_short_1.wav"       # replace with your audio file
    output_audio_1 = "decoded_plain.wav"  # decoded from plain base64
    output_audio_2 = "decoded_datauri.wav" # decoded from data URI

    # --- Plain base64 round trip ---
    print("Testing plain Base64 encoding/decoding...")
    b64_str = encode_audio_to_base64(input_audio)
    decode_base64_to_audio(b64_str, output_audio_1)
    print(f"‚úÖ Plain Base64 decoded file saved as {output_audio_1}")

    # --- Data URI round trip ---
    print("Testing data URI encoding/decoding...")
    data_uri = encode_audio_to_data_uri(input_audio)
    decode_data_uri_to_audio(data_uri, output_audio_2)
    print(f"‚úÖ Data URI decoded file saved as {output_audio_2}")


Testing plain Base64 encoding/decoding...
‚úÖ Plain Base64 decoded file saved as decoded_plain.wav
Testing data URI encoding/decoding...
‚úÖ Data URI decoded file saved as decoded_datauri.wav


In [5]:
# Open an audio file and read it as bytes
with open("yoruba_short_1.wav", "rb") as f:
    audio_bytes = f.read()

# Now audio_bytes is a `bytes` object
print(type(audio_bytes))  # <class 'bytes'>

<class 'bytes'>


In [7]:
import base64

def test_base64_roundtrip(audio_bytes: bytes) -> bool:
    """
    Encodes audio bytes to Base64, decodes back, 
    and checks if the round-trip preserves data.

    Args:
        audio_bytes (bytes): Raw audio data.

    Returns:
        bool: True if decoded bytes match the original, False otherwise.
    """
    # Encode to base64 string
    encoded = base64.b64encode(audio_bytes).decode("utf-8")

    # Decode back to bytes
    decoded = base64.b64decode(encoded.encode("utf-8"))

    # Verify round-trip
    return decoded == audio_bytes


In [9]:
# Read an audio file into bytes
with open("yoruba_short_1.wav", "rb") as f:
    audio_bytes = f.read()

# Test round-trip
success = test_base64_roundtrip(audio_bytes)
print("Round-trip successful:", success)  # should print True


Round-trip successful: True
