<a href="https://colab.research.google.com/github/blueliner17/Voice-Cloner-System/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install ipywidgets  # Jupyter widgets
!jupyter nbextension enable --py widgetsnbextension --sys-prefix  # Classic Notebook
# (JupyterLab ≥ 4 already ships with widget support)

Enabling notebook extension jupyter-js-widgets/extension...
Paths used for configuration of notebook: 
    	/usr/etc/jupyter/nbconfig/notebook.json
Paths used for configuration of notebook: 
    	
      - Validating: [32mOK[0m
Paths used for configuration of notebook: 
    	/usr/etc/jupyter/nbconfig/notebook.json


In [5]:
import os, uuid, tempfile
import ipywidgets as widgets
from IPython.display import display, Audio, clear_output

In [6]:
! pip install outetts==0.3.0



In [7]:
import outetts

class VoiceCloner:
    """
    A class to generate speech in the cloned voice from a provided .wav sample.
    """
    def __init__(self,
                 model_path: str = "OuteAI/OuteTTS-0.2-500M",
                 model_version: str = "0.2",
                 language: str = "en",
                 whisper_model: str = "turbo",
                 device: str = None):
        # Configure the TTS model
        cfg = outetts.HFModelConfig_v1(
            model_path=model_path,
            language=language
        )
        self.interface = outetts.InterfaceHF(model_version=model_version, cfg=cfg)
        self.whisper_model = whisper_model
        self.whisper_device = device

    def clone_and_speak(self,
                        text: str,
                        sample_audio_path: str,
                        output_path: str,
                        temperature: float = 0.1,
                        repetition_penalty: float = 1.1,
                        max_length: int = 4096) -> str:
        """
        Clone the voice from sample_audio_path and generate speech for the provided text.

        Args:
            text: The input text to synthesize.
            sample_audio_path: Path to the .wav (or supported) file for cloning.
            output_path: Path where the generated .wav will be saved.
            temperature: Sampling temperature for generation.
            repetition_penalty: Repetition penalty.
            max_length: Max token length for TTS.

        Returns:
            The path to the generated .wav file.
        """
        # Create a new speaker from the sample
        speaker = self.interface.create_speaker(
            audio_path=sample_audio_path,
            transcript=None,
            whisper_model=self.whisper_model,
            whisper_device=self.whisper_device
        )
        # Generate speech
        output = self.interface.generate(
            text=text,
            temperature=temperature,
            repetition_penalty=repetition_penalty,
            max_length=max_length,
            speaker=speaker
        )
        # Save and return
        output.save(output_path)
        return output_path






In [13]:
!pip install gradio --quiet



In [11]:
import ipywidgets as widgets
from IPython.display import display, HTML, Audio
import tempfile, uuid, os

# --- 1. Add Custom Styles ----------------------------------------------
display(HTML("""
<style>
body {
    font-family: 'Segoe UI', sans-serif;
}
.voice-ui {
    background: linear-gradient(135deg, #1c1c2c, #2e2e4e);
    border-radius: 20px;
    padding: 30px;
    color: white;
    box-shadow: 0 0 20px rgba(0,0,0,0.4);
}
.voice-ui h2 {
    text-align: center;
    font-size: 26px;
    margin-bottom: 25px;
    color: #00ffe1;
    text-shadow: 0 0 8px #00ffe1;
}
.voice-ui .widget-label {
    color: #bbb;
}
.voice-ui .button-style {
    font-weight: bold;
    background-color: #00ffe1;
    color: #000;
    border-radius: 10px;
    padding: 10px 20px;
}
.voice-ui .button-style:hover {
    background-color: #00c3b0;
    color: white;
}
.voice-ui .download {
    margin-top: 12px;
    display: inline-block;
}
.voice-ui .download a {
    color: #00ffe1;
    border: 1px solid #00ffe1;
    padding: 6px 12px;
    border-radius: 6px;
    text-decoration: none;
}
.voice-ui .download a:hover {
    background: #00ffe1;
    color: black;
}
</style>
"""))

# --- 2. Setup VoiceCloner -------------------------------------------------
# Replace with your actual implementation
class VoiceCloner:
    def clone_and_speak(self, text, sample_audio_path, output_path, temperature=0.2):
        # Mock logic - replace with actual TTS cloning
        import shutil
        shutil.copy(sample_audio_path, output_path)

voice_cloner = VoiceCloner()

# --- 3. Create Widgets ----------------------------------------------------
header = widgets.HTML("<h2>🎙️ NEUROVOX | Advanced Voice Cloner</h2>")

text_input = widgets.Textarea(
    value='Type something futuristic...',
    placeholder='Enter your message...',
    description='Text:',
    layout=widgets.Layout(width='100%', height='80px'),
    style={'description_width': '60px'}
)

file_upload = widgets.FileUpload(
    accept='.wav',
    multiple=False,
    description='Upload .wav Sample',
    style={'description_width': '160px'},
    layout=widgets.Layout(width='100%')
)

temperature_slider = widgets.FloatSlider(
    value=0.2,
    min=0,
    max=1,
    step=0.05,
    description='Temperature:',
    layout=widgets.Layout(width='100%'),
    style={'description_width': '120px'}
)

generate_btn = widgets.Button(
    description='🚀 Generate Voice',
    layout=widgets.Layout(width='100%', height='40px'),
    button_style=''
)

status_output = widgets.HTML()
output_area = widgets.Output()

# --- 4. Define Button Callback --------------------------------------------
def generate_clicked(b):
    with output_area:
        output_area.clear_output()
        status_output.value = "<span style='color:orange;'>⏳ Processing...</span>"

        if not file_upload.value:
            status_output.value = "<span style='color:red;'>⚠️ Please upload a .wav file.</span>"
            return
        if not text_input.value.strip():
            status_output.value = "<span style='color:red;'>⚠️ Please enter text.</span>"
            return

        try:
            # Save uploaded .wav file
            upload_info = next(iter(file_upload.value.values()))
            sample_path = tempfile.mkstemp(suffix='.wav')[1]
            with open(sample_path, 'wb') as f:
                f.write(upload_info['content'])

            # Output path
            out_name = f"cloned_{uuid.uuid4().hex}.wav"
            out_path = os.path.abspath(out_name)

            # Call the cloner
            voice_cloner.clone_and_speak(
                text=text_input.value,
                sample_audio_path=sample_path,
                output_path=out_path,
                temperature=temperature_slider.value
            )

            status_output.value = "<span style='color:lightgreen;'>✅ Done! Playing below:</span>"
            display(Audio(out_path, autoplay=True))
            display(HTML(f"<div class='download'><a href='{out_path}' download>⬇️ Download Audio</a></div>"))

        except Exception as e:
            status_output.value = f"<span style='color:red;'>❌ Error: {e}</span>"

generate_btn.on_click(generate_clicked)

# --- 5. Display UI --------------------------------------------------------
ui = widgets.VBox([
    header,
    text_input,
    file_upload,
    temperature_slider,
    generate_btn,
    status_output,
    output_area
], layout=widgets.Layout(padding='20px'))

display(HTML("<div class='voice-ui'>"))
display(ui)
display(HTML("</div>"))


VBox(children=(HTML(value='<h2>🎙️ NEUROVOX | Advanced Voice Cloner</h2>'), Textarea(value='Type something futu…