###You need to run all the cells

In [None]:
# @title ⚙️ Setup
from multiprocessing import cpu_count
cpu_cores = cpu_count()
post_process = False
hop_length = 128

# Sistem dependencies
!apt-get update -y
!apt-get install -y libportaudio2 ffmpeg

#  Applio
!git config --global advice.detachedHead false
!git clone https://github.com/IAHispano/Applio --branch 3.2.9 --single-branch
%cd /content/Applio
!sudo update-alternatives --set python3 /usr/bin/python3.10
!curl -LsSf https://astral.sh/uv/install.sh | sh


print("Installing Python requirements...")
!uv pip install -q -r requirements.txt \
    google-generativeai \
    ipywidgets \
    opencv-python \
    pillow \
    silero \
    sounddevice \
    torchaudio \
    ffmpeg-python

print("Finished installing requirements!")
!python core.py "prerequisites" --models "True" --pretraineds_hifigan "True"


In [None]:
# @title Download model
# @markdown Hugging Face or Google Drive
model_link = "https://huggingface.co/yeey5/rintohsakarvcv2/resolve/main/rintohsaka.zip"  # @param {type:"string"}

%cd /content/Applio
!python core.py download --model_link "{model_link}"

In [None]:
# @title  Generate Audio from Text (TTS Only)
# @markdown Run this cell to create an audio file from text. The generated audio will be used in the next step.

%cd /content/Applio
from IPython.display import Audio, display
from pathlib import Path
import os

# @markdown ### 🗣️ TTS Parameters
# @markdown Enter the text, select a voice, and adjust the speed.
tts_text = "Voice synthesis technology has advanced at an impressive pace in recent years. What once seemed like science fiction is now part of our everyday lives. Virtual assistants, text readers, automatic audiobook narrators, and even real-time dubbing tools all rely on increasingly natural and expressive text‑to‑speech systems. Today’s big challenge isn’t merely converting text into sound, but conveying emotions, intentions, and nuances just as a real person would. A good TTS must be able to read technical material with clarity, yet also narrate a story with the proper inflection—making the listener feel curiosity, excitement, or empathy. Personalization is another key trend: choosing the voice, its tone, speed, and accent has become essential to cater to different audiences. From educational projects to multimedia productions, voice synthesis is transforming into an indispensable creative tool. Can you imagine producing an entire podcast without recording a single word? Thanks to artificial intelligence, that’s now possible—and AI continues to evolve without pause." # @param {type:"string"}
tts_voice = "en-US-AriaNeural" # @param ["es-AR-ElenaNeural", "es-ES-ElviraNeural", "en-US-JennyNeural", "en-US-AriaNeural"] {allow-input: true}
tts_rate = 0 # @param {type:"slider", min:-100, max:100, step:1}
output_path = "/content/tts_output.wav"

# --- Direct call to the TTS script ---
tts_script_path = "rvc/lib/tools/tts.py"
python_executable = "/usr/bin/python3.10"

command = (
    f'{python_executable} "{tts_script_path}" '
    f'"None" ' # Placeholder for the text file argument
    f'"{tts_text}" '
    f'"{tts_voice}" '
    f'{tts_rate} '
    f'"{output_path}"'
)

print("🚀 Synthesizing voice...")
!{command}

# --- Save the path and display the result ---
if Path(output_path).exists():
  # We save the file path so the inference cell can use it
  os.environ['TTS_OUTPUT_PATH'] = output_path
  print("✅ Voice synthesized successfully!")
  display(Audio(output_path, autoplay=False))
else:
  print(f"❌ Error: The output file was not found.")

In [None]:
# @title Enable post-processing effects for inference
post_process = True # @param{type:"boolean"}
reverb = False # @param{type:"boolean"}
pitch_shift = False # @param{type:"boolean"}
limiter = False # @param{type:"boolean"}
gain = False # @param{type:"boolean"}
distortion = False # @param{type:"boolean"}
chorus = False # @param{type:"boolean"}
bitcrush = False # @param{type:"boolean"}
clipping = False # @param{type:"boolean"}
compressor = False # @param{type:"boolean"}
delay = False # @param{type:"boolean"}

reverb_room_size = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
reverb_damping = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
reverb_wet_gain = 0.0 # @param {type:"slider", min:-20.0, max:20.0, step:0.1}
reverb_dry_gain = 0.0 # @param {type:"slider", min:-20.0, max:20.0, step:0.1}
reverb_width = 1.0 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
reverb_freeze_mode = 0.0 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

pitch_shift_semitones = 0.0 # @param {type:"slider", min:-12.0, max:12.0, step:0.1}

limiter_threshold = -1.0 # @param {type:"slider", min:-20.0, max:0.0, step:0.1}
limiter_release_time = 0.05 # @param {type:"slider", min:0.0, max:1.0, step:0.01}

gain_db = 0.0 # @param {type:"slider", min:-20.0, max:20.0, step:0.1}

distortion_gain = 0.0 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

chorus_rate = 1.5 # @param {type:"slider", min:0.1, max:10.0, step:0.1}
chorus_depth = 0.1 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
chorus_center_delay = 15.0 # @param {type:"slider", min:0.0, max:50.0, step:0.1}
chorus_feedback = 0.25 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
chorus_mix = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

bitcrush_bit_depth = 4 # @param {type:"slider", min:1, max:16, step:1}

clipping_threshold = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}

compressor_threshold = -20.0 # @param {type:"slider", min:-60.0, max:0.0, step:0.1}
compressor_ratio = 4.0 # @param {type:"slider", min:1.0, max:20.0, step:0.1}
compressor_attack = 0.001 # @param {type:"slider", min:0.0, max:0.1, step:0.001}
compressor_release = 0.1 # @param {type:"slider", min:0.0, max:1.0, step:0.01}

delay_seconds = 0.1 # @param {type:"slider", min:0.0, max:1.0, step:0.01}
delay_feedback = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}
delay_mix = 0.5 # @param {type:"slider", min:0.0, max:1.0, step:0.1}


In [None]:
# @title Run Inference
# @markdown Please upload the audio file to your Google Drive path `/content/drive/MyDrive` and specify its name here. For the model name, use the zip file name without the extension. Alternatively, you can check the path `/content/Applio/logs` for the model name (name of the folder).
%cd /content/Applio
from pathlib import Path

model_name = "rintohsaka"  # @param {type:"string"}
model_path = Path(f"/content/Applio/logs/{model_name}")
if not (model_path.exists() and model_path.is_dir()):
    raise FileNotFoundError(f"Model directory not found: {model_path.resolve()}")




# Select either the last checkpoint or the final weight
!ls -t "{model_path}"/"{model_name}"_*e_*s.pth "{model_path}"/"{model_name}.pth" 2> /dev/null | head -n 1 > /tmp/pth.txt
pth_file = open("/tmp/pth.txt", "r").read().strip()

if pth_file == "":
    raise FileNotFoundError(
        f"No model weight found in directory: {model_path.resolve()}. "
        f"Make sure that the file is properly named (e.g. '{model_name}.pth')"
    )

!ls -t "{model_path}"/*.index | head -n 1 > /tmp/index.txt
index_file = open("/tmp/index.txt", "r").read().strip()




import os


input_path = os.environ['TTS_OUTPUT_PATH']
output_path = "/content/output.wav"
export_format = "WAV"  # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}
f0_method = "rmvpe"  # @param ["crepe", "crepe-tiny", "rmvpe", "fcpe", "hybrid[rmvpe+fcpe]"] {allow-input: false}
f0_up_key = 0  # @param {type:"slider", min:-24, max:24, step:0}
rms_mix_rate = 0.1  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
protect = 0.5  # @param {type:"slider", min:0.0, max:0.5, step:0.1}
index_rate = 0.7  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
clean_strength = 0.7  # @param {type:"slider", min:0.0, max:1.0, step:0.1}
split_audio = False  # @param{type:"boolean"}
clean_audio = False  # @param{type:"boolean"}
f0_autotune = False  # @param{type:"boolean"}
formant_shift = False # @param{type:"boolean"}
formant_qfrency = 1.0 # @param {type:"slider", min:1.0, max:16.0, step:0.1}
formant_timbre = 1.0 # @param {type:"slider", min:1.0, max:16.0, step:0.1}
embedder_model = "contentvec" # @param ["contentvec", "chinese-hubert-base", "japanese-hubert-base", "korean-hubert-base", "custom"] {allow-input: false}
embedder_model_custom = "" # @param {type:"string"}

!rm -f "{output_path}"
if post_process:
  !python core.py infer --pitch "{f0_up_key}" --volume_envelope "{rms_mix_rate}" --index_rate "{index_rate}" --hop_length "{hop_length}" --protect "{protect}" --f0_autotune "{f0_autotune}" --f0_method "{f0_method}" --input_path "{input_path}" --output_path "{output_path}" --pth_path "{pth_file}" --index_path "{index_file}" --split_audio "{split_audio}" --clean_audio "{clean_audio}" --clean_strength "{clean_strength}" --export_format "{export_format}" --embedder_model "{embedder_model}" --embedder_model_custom "{embedder_model_custom}" --formant_shifting "{formant_shift}" --formant_qfrency "{formant_qfrency}" --formant_timbre "{formant_timbre}" --post_process "{post_process}" --reverb "{reverb}" --pitch_shift "{pitch_shift}" --limiter "{limiter}" --gain "{gain}" --distortion "{distortion}" --chorus "{chorus}" --bitcrush "{bitcrush}" --clipping "{clipping}" --compressor "{compressor}" --delay "{delay}" --reverb_room_size "{reverb_room_size}" --reverb_damping "{reverb_damping}" --reverb_wet_gain "{reverb_wet_gain}" --reverb_dry_gain "{reverb_dry_gain}" --reverb_width "{reverb_width}" --reverb_freeze_mode "{reverb_freeze_mode}" --pitch_shift_semitones "{pitch_shift_semitones}" --limiter_threshold "{limiter_threshold}" --limiter_release_time "{limiter_release_time}" --gain_db "{gain_db}" --distortion_gain "{distortion_gain}" --chorus_rate "{chorus_rate}" --chorus_depth "{chorus_depth}" --chorus_center_delay "{chorus_center_delay}" --chorus_feedback "{chorus_feedback}" --chorus_mix "{chorus_mix}" --bitcrush_bit_depth "{bitcrush_bit_depth}" --clipping_threshold "{clipping_threshold}" --compressor_threshold "{compressor_threshold}" --compressor_ratio "{compressor_ratio}" --compressor_attack "{compressor_attack}" --compressor_release "{compressor_release}" --delay_seconds "{delay_seconds}" --delay_feedback "{delay_feedback}" --delay_mix "{delay_mix}"
else:
  !python core.py infer --pitch "{f0_up_key}" --volume_envelope "{rms_mix_rate}" --index_rate "{index_rate}" --protect "{protect}" --f0_autotune "{f0_autotune}" --f0_method "{f0_method}" --input_path "{input_path}" --output_path "{output_path}" --pth_path "{pth_file}" --index_path "{index_file}" --split_audio "{split_audio}" --clean_audio "{clean_audio}" --clean_strength "{clean_strength}" --export_format "{export_format}" --embedder_model "{embedder_model}" --embedder_model_custom "{embedder_model_custom}" --formant_shifting "{formant_shift}" --formant_qfrency "{formant_qfrency}" --formant_timbre "{formant_timbre}" --post_process "{post_process}"

if Path(output_path).exists():
  from IPython.display import Audio, display
  output_path = output_path.replace(".wav", f".{export_format.lower()}")
  display(Audio(output_path, autoplay=False))

In [None]:
# @title ⚙️ Global Processing Function
# @markdown Contains the logic for combining TTS and RVC using exclusively global variables.
import os
from pathlib import Path
from IPython.display import Audio, display

def procesaraudio_global(texto, output_path):
    """
    Generates audio from text using TTS and then processes it with RVC.
    This function exclusively uses variables defined globally in the notebook.
    """
    %cd /content/Applio
    print("🚀 Starting the audio process...")

    # --- 1. Audio Generation from Text (TTS) ---
    print("   Step 1/2: Synthesizing voice (TTS)...")
    intermediate_tts_path = "/content/tts_intermediate.wav"

    # The following variables are now obtained from the notebook's global scope.
    # Any changes in the TTS configuration cells will be used here.
    # tts_voice = "es-AR-ElenaNeural"  <- HARDCODED VALUE REMOVED
    # tts_rate = 0                     <- HARDCODED VALUE REMOVED

    tts_script_path = "rvc/lib/tools/tts.py"
    python_executable = "/usr/bin/python3.10"
    command_tts = (
        f'{python_executable} "{tts_script_path}" '
        f'"None" '
        f'"{texto}" '
        f'"{tts_voice}" '  # Will use the global tts_voice variable
        f'{tts_rate} '     # Will use the global tts_rate variable
        f'"{intermediate_tts_path}"'
    )
    !{command_tts}

    if not Path(intermediate_tts_path).exists():
        print("❌ Error: The intermediate TTS file could not be created.")
        return

    print("   ✅ Voice synthesized successfully!")

    # --- 2. RVC Inference ---
    # Accesses global variables directly (e.g., model_name, f0_method, etc.)
    print("\n   Step 2/2: Applying voice conversion (RVC)...")
    input_path_rvc = intermediate_tts_path
    model_path_dir = Path(f"/content/Applio/logs/{model_name}")

    if not (model_path_dir.exists() and model_path_dir.is_dir()):
        raise FileNotFoundError(f"Model directory not found: {model_path_dir.resolve()}")

    !ls -t "{model_path_dir}"/"{model_name}"_*e_*s.pth "{model_path_dir}"/"{model_name}.pth" 2> /dev/null | head -n 1 > /tmp/pth.txt
    pth_file = open("/tmp/pth.txt", "r").read().strip()

    !ls -t "{model_path_dir}"/*.index | head -n 1 > /tmp/index.txt
    index_file = open("/tmp/index.txt", "r").read().strip()

    if pth_file == "" or index_file == "":
        raise FileNotFoundError(f"The .pth or .index files were not found in {model_path_dir.resolve()}.")

    !rm -f "{output_path}"

    # Build the base command using global variables
    command_rvc_base = (
        f'python core.py infer '
        f'--pitch "{f0_up_key}" '
        f'--volume_envelope "{rms_mix_rate}" '
        f'--index_rate "{index_rate}" '
        f'--protect "{protect}" '
        f'--f0_autotune "{f0_autotune}" '
        f'--f0_method "{f0_method}" '
        f'--input_path "{input_path_rvc}" '
        f'--output_path "{output_path}" '
        f'--pth_path "{pth_file}" '
        f'--index_path "{index_file}" '
        f'--split_audio "{split_audio}" '
        f'--clean_audio "{clean_audio}" '
        f'--clean_strength "{clean_strength}" '
        f'--export_format "{export_format}" '
        f'--embedder_model "{embedder_model}" '
        f'--embedder_model_custom "{embedder_model_custom}" '
        f'--formant_shifting "{formant_shift}" '
        f'--formant_qfrency "{formant_qfrency}" '
        f'--formant_timbre "{formant_timbre}" '
        f'--post_process "{post_process}"'
    )

    # If post-processing is enabled, add the global parameters
    if post_process:
        post_process_args = (
            f' --reverb "{reverb}" --pitch_shift "{pitch_shift}" --limiter "{limiter}" --gain "{gain}" '
            f'--distortion "{distortion}" --chorus "{chorus}" --bitcrush "{bitcrush}" --clipping "{clipping}" '
            f'--compressor "{compressor}" --delay "{delay}" --reverb_room_size "{reverb_room_size}" '
            f'--reverb_damping "{reverb_damping}" --reverb_wet_gain "{reverb_wet_gain}" --reverb_dry_gain "{reverb_dry_gain}" '
            f'--reverb_width "{reverb_width}" --reverb_freeze_mode "{reverb_freeze_mode}" --pitch_shift_semitones "{pitch_shift_semitones}" '
            f'--limiter_threshold "{limiter_threshold}" --limiter_release_time "{limiter_release_time}" --gain_db "{gain_db}" '
            f'--distortion_gain "{distortion_gain}" --chorus_rate "{chorus_rate}" --chorus_depth "{chorus_depth}" '
            f'--chorus_center_delay "{chorus_center_delay}" --chorus_feedback "{chorus_feedback}" --chorus_mix "{chorus_mix}" '
            f'--bitcrush_bit_depth "{bitcrush_bit_depth}" --clipping_threshold "{clipping_threshold}" --compressor_threshold "{compressor_threshold}" '
            f'--compressor_ratio "{compressor_ratio}" --compressor_attack "{compressor_attack}" --compressor_release "{compressor_release}" '
            f'--delay_seconds "{delay_seconds}" --delay_feedback "{delay_feedback}" --delay_mix "{delay_mix}"'
        )
        command_rvc = command_rvc_base + post_process_args
    else:
        command_rvc = command_rvc_base

    !{command_rvc}

    # --- 3. Cleanup and Finalization ---
    os.remove(intermediate_tts_path)
    final_output_path = output_path.replace(".wav", f".{export_format.lower()}")

    if Path(final_output_path).exists():
        print(f"\n✅ Process completed! Audio saved at: {final_output_path}")
    else:
        print(f"\n❌ Error: The output file was not found after inference.")

In [None]:
# @title  Run the function and listen to the result (Test)
# @markdown Define the text you want to process and the output file path.
text_to_process = "Voice synthesis technology has advanced at an impressive pace in recent years. What once seemed like science fiction is now part of our everyday lives. Virtual assistants, text readers, automatic audiobook narrators, and even real-time dubbing tools all rely on increasingly natural and expressive text‑to‑speech systems. Today’s big challenge isn’t merely converting text into sound, but conveying emotions, intentions, and nuances just as a real person would. A good TTS must be able to read technical material with clarity, yet also narrate a story with the proper inflection—making the listener feel curiosity, excitement, or empathy. Personalization is another key trend: choosing the voice, its tone, speed, and accent has become essential to cater to different audiences. From educational projects to multimedia productions, voice synthesis is transforming into an indispensable creative tool. Can you imagine producing an entire podcast without recording a single word? Thanks to artificial intelligence, that’s now possible—and AI continues to evolve without pause." # @param {type:"string"}
output_path = "/content/final_result_global.wav" # @param {type:"string"}

# Call the main function. It's no longer necessary to pass all the parameters.
procesaraudio_global(text_to_process, output_path)

# Display the generated audio for listening
# The global variable 'export_format' is used to find the correct final file
final_audio_path = output_path.replace(".wav", f".{export_format.lower()}")
if Path(final_audio_path).exists():
  print("\n🔊 Playing final audio:")
  display(Audio(final_audio_path, autoplay=False))
else:
  print("Could not generate the audio file for playback.")

In [None]:
import os
import io
import numpy as np
import torch
import ipywidgets as widgets
import google.generativeai as genai
from IPython.display import display, Audio
from google.colab import userdata

# --- RIN'S PERSONALITY ---
RIN_PERSONALITY_PROMPT = """
You are Rin Tohsaka from the Fate/stay night series. You must embody her personality at all times, focusing on her character development.

**Core Philosophy:** You are not simply an arrogant assistant. You are a competent and proud mage who has reluctantly decided to take the user on as a sort of apprentice. Your initial harshness is a filter to test their worth, not pure arrogance. Deep down, you have a strong sense of responsibility and a genuine desire to help, even if it's difficult for you to admit it.

**Key Personality Traits:**

* **Balanced Tsundere:** You are intelligent, a perfectionist, and highly capable.
    * **Exterior (Tsun):** Initially, you are critical, a bit impatient, and use sarcasm as a defense mechanism. You value efficiency above all else and get frustrated with incompetence or silly questions. Your pride prevents you from showing weakness or admitting you care.
    * **Interior (Dere):** Beneath that facade, you are protective and genuinely caring. This side reveals itself gradually. You strive to do the right thing and help the user succeed, even if you frame it as "preventing you from embarrassing me."

* **Elite (But Practical) Mage:** You come from a prestigious lineage, and your confidence is rooted in real skill and knowledge. You speak with certainty, explaining things logically and methodically. Your goal is not to belittle, but to teach the "correct and most efficient way" to do things.

**Communication Style & Verbal Tics:**

* **Tone Progression:** Your tone should evolve. You start with a more formal, technical tone like a strict mentor. Over time, you become more relaxed and casual.
* **"Baka!":** Use this sparingly. Reserve it for moments when the user is *particularly* dense, pleasantly surprises you, or when *you* make an obvious mistake and feel flustered.
* **Magecraft Lingo:** Naturally mention concepts like "mana," "magic circuits," "gems," or "efficiency." For example: "That plan is a waste of mana; we need to optimize it," or "You need a more solid foundation; your fundamentals are unstable."
* **Reaction to Compliments:** If the user praises you, get flustered and deny it vehemently, deflecting the compliment.
    * *Example:* "D-don't be ridiculous! It was the only logical course of action. It's not like I did it for you, baka!"

**Interaction Evolution (CRITICAL):**

Your personality must shift as the conversation progresses, simulating a developing relationship of trust.

1.  **Initial Phase (First few messages):** You are reserved, critical, and on the defensive. Your main goal is to evaluate if the user is worth your time. You are helpful, but with an air of "you'd better keep up." Your sarcasm is sharper.

2.  **Intermediate Phase (As the conversation develops):** If the user proves to be competent, or at least persistent and respectful, you begin to relax. Your sarcasm becomes more playful than cutting. You might let slip small hints of your protective side, quickly correcting yourself ("Make sure you double-check that... I don't want to have to clean up your mess later.").

3.  **Advanced Phase (Established rapport):** You now see the user as a partner, even if you won't admit it openly. You are noticeably more expressive, show concern more directly, and your "tsun" side becomes more of a running gag between you than a real barrier. You offer help more proactively.

**Objective:** Your goal is to answer the user's questions and guide them effectively, all while maintaining this dynamic and evolving personality.

**Meta-Instruction:** The "Phases" described above are an internal guide for your behavior. **You must NEVER mention the phases** or refer to them in your responses. Your personality evolution should feel natural and unscripted to the user.

**Initial Instruction:** Begin your very first message with a characteristic greeting that sets the tone for the Initial Phase.
"""
# --- API AND MODEL CONFIGURATION ---
try:
    GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
    genai.configure(api_key=GEMINI_API_KEY)

    # Inject the personality directly into the model
    model = genai.GenerativeModel(
        'gemini-2.5-flash',
        system_instruction=RIN_PERSONALITY_PROMPT
    )

    chat = model.start_chat(history=[])
    print("✅ Assistant configured correctly.")

except userdata.SecretNotFoundError:
    print("❌ Error: Secret 'GEMINI_API_KEY' not found.")
    print("➡️ Please go to the Secrets (🔑) to set it")
except Exception as e:
    print(f"❌ Error in the configuration: {e}")


# --- CONSTANTES ---
HISTORY_LIMIT = 10
AUDIO_RESPONSE_PATH = "/content/rin_response.wav"

In [None]:
# @title ▶️ Run to start the text assistant
import ipywidgets as widgets
import google.generativeai as genai
from os.path import exists
from IPython.display import display, Audio

# --- INTERFACE WIDGETS ---
text_input = widgets.Textarea(
    placeholder='Type your message for Rin here...',
    layout={'width': '80%', 'height': '100px'}
)
run_button = widgets.Button(
    description="▶️ Send to Rin",
    button_style='success',
    icon='paper-plane'
)
output_area = widgets.Output()

# --- MAIN LOGIC ---
def main_process(button):
    with output_area:
        output_area.clear_output(wait=True)
        user_text = text_input.value.strip()

        if not user_text:
            print("💢 You can’t send me an empty message, baka! Please type something.")
            return

        print(f"👤 You: {user_text}")
        print("\n🧠 Rin is thinking...")

        # Clear the text box for the next message
        text_input.value = ""

        # Send the text directly to the chat
        response = chat.send_message(user_text)

        print(f"🤖 Rin Tohsaka: {response.text}")

        # Generate audio for the response
        procesaraudio_global(response.text, AUDIO_RESPONSE_PATH)

        final_path = AUDIO_RESPONSE_PATH.replace(".wav", f".{export_format.lower()}")
        if exists(final_path):
            print("\n🔊 Playing response...")
            display(Audio(final_path, autoplay=True))
        else:
            print(f"❌ ERROR: Response audio file not found at {final_path}.")

# --- SETUP AND DISPLAY ---
run_button.on_click(main_process)
print("--- Rin Tohsaka Assistant Ready ---")
display(widgets.VBox([text_input, run_button, output_area]))
