# STEVE'S VOICE CLONER - CLOUD EDITION

**No more Mac crashes!** Run your voice cloning in the cloud with unlimited GPU power.

## Quick Start:
1. **Run all cells** (Runtime → Run All)
2. **Upload voice files** to Google Drive
3. **Select voice & style** from dropdowns
4. **Generate cloned voice** - saves to Drive!

---


In [None]:
# 🔧 INSTALL ALL DEPENDENCIES
print("🚀 Installing voice cloning dependencies...")

!pip install torch torchaudio soundfile librosa transformers accelerate
!pip install google-colab
!pip install ipywidgets

print("✅ All dependencies installed!")
print("💡 This may take 2-3 minutes on first run")


In [None]:
# SETUP LOCAL FILE SYSTEM
import os
import glob
from datetime import datetime

# Use local Colab directories
INPUT_DIR = './input_voices'
OUTPUT_DIR = './cloned_output'

# Create local directories
os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Voice input folder: {INPUT_DIR}")
print(f"Cloned output folder: {OUTPUT_DIR}")
print("Local file system setup complete!")
print("\nUpload your voice files to the left file panel in Colab")


In [None]:
# 🎤 INSTALL CHATTERBOXTTS (Your Voice Cloning Engine)
print("📦 Installing ChatterboxTTS...")

# Install ChatterboxTTS from GitHub
!git clone https://github.com/coqui-ai/TTS.git
!cd TTS && pip install -e .

print("✅ ChatterboxTTS installed!")
print("💡 This is the same engine you use locally")


In [None]:
# 🎯 VOICE CLONER CLASS (Replicates Your Local Setup)
import torch
import soundfile as sf
import ipywidgets as widgets
from IPython.display import display, Audio, clear_output
import os
import glob
from datetime import datetime

class CloudVoiceCloner:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = None
        self.voice_files = []
        
        # Your exact voice style presets
        self.presets = {
            "SASSY": {"exaggeration": 0.9, "cfg_weight": 0.4, "temperature": 1.1, "repetition_penalty": 1.4},
            "ROAST MODE": {"exaggeration": 1.1, "cfg_weight": 0.35, "temperature": 1.2, "repetition_penalty": 1.3},
            "ENERGETIC": {"exaggeration": 0.8, "cfg_weight": 0.7, "temperature": 0.9},
            "DRAMATIC": {"exaggeration": 1.2, "cfg_weight": 0.3, "temperature": 1.0},
            "NORMAL": {"exaggeration": 0.5, "cfg_weight": 0.5, "temperature": 0.8},
            "NATURAL": {"exaggeration": 0.4, "cfg_weight": 0.6, "temperature": 0.7}
        }
        
        # Quick text options (same as your local setup)
        self.quick_texts = {
            "Test": "Hello, this is a test of voice cloning technology. How does this sound?",
            "Amazing": "I'm speaking with a cloned voice. This is pretty amazing technology!",
            "Custom": "You can make me say anything you want with this voice cloning system.",
            "Future": "The future of AI voice synthesis is here, and it sounds incredible!"
        }
    
    def load_model(self):
        """Load the voice cloning model"""
        if self.model is None:
            print(f"🔄 Loading model on {self.device}...")
            try:
                from TTS.api import TTS
                self.model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
                print("✅ Model loaded successfully!")
            except Exception as e:
                print(f"❌ Error loading model: {e}")
                print("💡 Trying alternative model...")
                self.model = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(self.device)
                print("✅ Alternative model loaded!")
    
    def scan_voice_files(self):
        """Scan for voice files in local Colab directory"""
        audio_extensions = ['*.mp3', '*.wav', '*.m4a', '*.flac', '*.ogg', '*.aac']
        self.voice_files = []
        
        # Check local Colab directory (where you upload files)
        for ext in audio_extensions:
            self.voice_files.extend(glob.glob(f'./{ext}'))
            self.voice_files.extend(glob.glob(f'./{ext.upper()}'))
        
        return sorted(self.voice_files)
    
    def generate_voice(self, text, voice_file, style, custom_text=None):
        """Generate cloned voice (same as your local function)"""
        if self.model is None:
            self.load_model()
        
        # Use custom text if provided
        if custom_text:
            text = custom_text
        elif text in self.quick_texts:
            text = self.quick_texts[text]
        
        print(f"🎤 Generating {style} voice...")
        print(f"🎵 Voice source: {os.path.basename(voice_file)}")
        print(f"📝 Text: {text[:50]}{'...' if len(text) > 50 else ''}")
        
        try:
            # Generate with selected style
            settings = self.presets[style]
            wav = self.model.tts(
                text=text,
                speaker_wav=voice_file,
                language="en"
            )
            
            # Save to local directory
            timestamp = datetime.now().strftime("%m%d_%H%M%S")
            voice_name = os.path.splitext(os.path.basename(voice_file))[0]
            output_file = f"cloned_{voice_name}_{style.lower().replace(' ', '_')}_{timestamp}.wav"
            output_path = f"{OUTPUT_DIR}/{output_file}"
            
            # Save audio file
            sf.write(output_path, wav, 22050, subtype='PCM_16')
            
            duration = len(wav) / 22050
            print(f"\n🎉 SUCCESS!")
            print(f"✅ Saved to Google Drive: {output_file}")
            print(f"⏱️ Duration: {duration:.1f} seconds")
            print(f"📁 Location: {output_path}")
            
            return wav, output_path
            
        except Exception as e:
            print(f"❌ Error: {e}")
            return None, None

# Initialize the voice cloner
cloner = CloudVoiceCloner()
print("🎤 Cloud Voice Cloner ready!")
print(f"🖥️ Device: {cloner.device}")


In [None]:
# 🎛️ INTERACTIVE VOICE CLONING INTERFACE
import ipywidgets as widgets
from IPython.display import display, Audio, clear_output

def create_voice_cloning_interface():
    """Create the interactive interface (same workflow as your local setup)"""
    
    # Scan for voice files
    voice_files = cloner.scan_voice_files()
    
    if not voice_files:
        print("❌ No voice files found!")
        print("💡 Upload voice files to the left file panel in Colab")
        print("💡 Supported formats: .mp3, .wav, .m4a, .flac, .ogg, .aac")
        return
    
    # Create widgets
    voice_dropdown = widgets.Dropdown(
        options=[(os.path.basename(f), f) for f in voice_files],
        description='Voice Source:',
        style={'description_width': 'initial'}
    )
    
    style_dropdown = widgets.Dropdown(
        options=list(cloner.presets.keys()),
        value='NORMAL',
        description='Style:',
        style={'description_width': 'initial'}
    )
    
    text_dropdown = widgets.Dropdown(
        options=list(cloner.quick_texts.keys()),
        value='Test',
        description='Quick Text:',
        style={'description_width': 'initial'}
    )
    
    custom_text = widgets.Textarea(
        value='',
        placeholder='Or enter your custom text here...',
        description='Custom Text:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='100%', height='100px')
    )
    
    generate_button = widgets.Button(
        description='🎤 Generate Voice',
        button_style='success',
        layout=widgets.Layout(width='200px', height='40px')
    )
    
    output_area = widgets.Output()
    
    def on_generate_clicked(b):
        with output_area:
            clear_output(wait=True)
            
            # Get selected values
            voice_file = voice_dropdown.value
            style = style_dropdown.value
            text_choice = text_dropdown.value
            custom = custom_text.value.strip()
            
            # Generate voice
            wav, output_path = cloner.generate_voice(
                text=text_choice,
                voice_file=voice_file,
                style=style,
                custom_text=custom if custom else None
            )
            
            if wav is not None:
                # Play the generated audio
                display(Audio(wav, rate=22050))
                
                # Show download link
                print(f"\n📥 File saved: {output_path}")
                print(f"💾 Check the left file panel to download your cloned voice!")
    
    generate_button.on_click(on_generate_clicked)
    
    # Display interface
    print("🎤 VOICE CLONING INTERFACE")
    print("=" * 40)
    
    display(voice_dropdown)
    display(style_dropdown)
    display(text_dropdown)
    display(custom_text)
    display(generate_button)
    display(output_area)

# Create the interface
create_voice_cloning_interface()


In [None]:
# 🔥 PEP GUARDIOLA ROAST MODE (Your External Script)
def run_pep_roast_mode():
    """Run the full Pep Guardiola roast script (same as your external script)"""
    
    # The full Pep Guardiola roast script
    full_script = """did you guys see it pep guardiola the so-called bald genius the tactical messiah the cone collector in chief got cooked by frank the tank yeah first season in the prem for me i walk in like the substitute teacher with no lesson plan and i still schooled the so-called professor of football pep is old washed bald fraud ball more like fraud-ball tiki-taka now just ticki-tacky

they say man city hard to beat yeah we didn't even sign eze doesn't matter didn't need him because honestly playing city is like playing career mode on easy difficulty pep belongs back on football manager tapping spacebar not on my touchline watching me dance on his grave his team of oil princes couldn't even break down my danish ikea blueprint i beat him so easily it looked scripted

pep with all his millions all his lectures about half-spaces and positional play meanwhile i'm on the sideline in a coat looking like a viking substitute teacher and still running rings around him frank the tank never needed a transfer war chest just a brain heart and a little chaos the fact is city not scary anymore they're just old bald pep's powerpoint slides in motion

so go back to catalonia open up steam download football manager 2012 live in the past because the premier league is my classroom now and pep just failed the exam"""
    
    # Find voice files
    voice_files = cloner.scan_voice_files()
    
    if not voice_files:
        print("❌ No voice files found for roast mode!")
        return
    
    # Auto-select youtube_voice if available
    youtube_voice = None
    for voice_file in voice_files:
        if "youtube_voice" in voice_file.lower():
            youtube_voice = voice_file
            break
    
    if not youtube_voice:
        youtube_voice = voice_files[0]
        print(f"⚠️ youtube_voice not found, using: {os.path.basename(youtube_voice)}")
    else:
        print(f"✅ Found youtube_voice: {os.path.basename(youtube_voice)}")
    
    print(f"\n🔥 PEP ROAST MODE ACTIVATED!")
    print(f"🎵 Voice source: {os.path.basename(youtube_voice)}")
    print(f"📝 Full Pep Guardiola roast script ({len(full_script)} characters)")
    print(f"⏱️ Estimated time: 2-4 minutes...")
    print()
    
    # Generate with NORMAL settings (same as your external script)
    wav, output_path = cloner.generate_voice(
        text="Custom",
        voice_file=youtube_voice,
        style="NORMAL",
        custom_text=full_script
    )
    
    if wav is not None:
        print(f"\n🎉 PEP ROAST COMPLETE!")
        print(f"✅ File saved: {os.path.basename(output_path)}")
        print(f"📁 Location: {output_path}")
        
        # Play the roast
        display(Audio(wav, rate=22050))
        
        print(f"\n🎧 The Pep Guardiola roast is ready!")
        print(f"💾 Check your Google Drive for the full file")

# Uncomment the line below to run Pep Roast Mode
# run_pep_roast_mode()


## 📋 **HOW TO USE (Same as Your Local Workflow):**

### **1. Upload Voice Files**
- Go to your Google Drive
- Navigate to `VoiceCloning/Input_Voices/`
- Upload your voice files (.mp3, .wav, etc.)

### **2. Select Voice & Style**
- Choose voice source from dropdown
- Pick style (SASSY, ROAST MODE, NORMAL, etc.)
- Select quick text or enter custom text

### **3. Generate & Download**
- Click "Generate Voice"
- Listen to the result
- File automatically saved to Google Drive

### **4. Pep Roast Mode**
- Uncomment the last cell to run full Pep Guardiola roast
- Uses same script as your external version

---

## Benefits Over Local Setup:
- No Mac crashes
- Unlimited GPU power
- Same voice styles & presets
- Google Drive integration
- Only $10/month
- 12+ hours runtime

**Your exact same workflow, but in the cloud!**
