# Whisper Transcription + Translation Tool v2.1

## Version avec traduction automatique vers le fran√ßais

### Nouvelles fonctionnalit√©s v2.1 :
- üåç **D√©tection automatique de langue** source
- üîÑ **Traduction vers le fran√ßais** avec services multiples  
- üìù **Export bilingue** (original + fran√ßais)
- üéØ **Post-traitement fran√ßais** optimis√©
- üîß **Choix du service de traduction** (DeepL, Google)
- üìä **M√©tadonn√©es √©tendues** (langue source, confiance)

### Configuration rapide :
1. Modifiez les variables `TRANSLATION_SERVICE` et `SOURCE_INPUT`
2. Ex√©cutez toutes les cellules
3. R√©cup√©rez vos fichiers traduits en fran√ßais !

## 1. Installation et imports

In [None]:
# Installation
!pip install -q ffmpeg yt_dlp huggingface_hub
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q torch torchaudio torchvision
!pip install -q deepl googletrans==4.0.0rc1 language-tool-python

# Imports
import os, subprocess, glob, zipfile, time, json, re
from pathlib import Path
from typing import Optional, Dict, List, Tuple
import torch
import whisper
from yt_dlp import YoutubeDL

# Services de traduction
try:
    import deepl
    DEEPL_AVAILABLE = True
except ImportError:
    DEEPL_AVAILABLE = False

try:
    from googletrans import Translator as GoogleTranslator
    GOOGLE_AVAILABLE = True
except ImportError:
    GOOGLE_AVAILABLE = False

try:
    import language_tool_python
    LANGUAGE_TOOL_AVAILABLE = True
except ImportError:
    LANGUAGE_TOOL_AVAILABLE = False

print(f"‚úÖ Services disponibles:")
print(f"  - DeepL: {'‚úÖ' if DEEPL_AVAILABLE else '‚ùå'}")
print(f"  - Google Translate: {'‚úÖ' if GOOGLE_AVAILABLE else '‚ùå'}")
print(f"  - Language Tool: {'‚úÖ' if LANGUAGE_TOOL_AVAILABLE else '‚ùå'}")

## 2. Configuration

In [None]:
# === CONFIGURATION PRINCIPALE ===
TRANSLATION_SERVICE = "auto"      # "auto", "deepl", "google"
SOURCE_TYPE = "1"                 # "1"=URL, "2"=Dossier, "3"=Fichier
SOURCE_INPUT = "https://www.youtube.com/watch?v=afUrxn0NT2s"  # Modifiez cette URL

# Param√®tres avanc√©s
EXPORT_BILINGUAL = True
IMPROVE_FRENCH = True
temp_directory = '/kaggle/working'
whisper_model_name = "large-v3"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Configuration des API
DEEPL_API_KEY = os.environ.get('DEEPL_API_KEY', None)
translators = {}

if DEEPL_AVAILABLE and DEEPL_API_KEY:
    try:
        translators['deepl'] = deepl.Translator(DEEPL_API_KEY)
        print("‚úÖ DeepL configur√©")
    except Exception as e:
        print(f"‚ùå Erreur DeepL: {e}")

if GOOGLE_AVAILABLE:
    try:
        translators['google'] = GoogleTranslator()
        print("‚úÖ Google Translate configur√©")
    except Exception as e:
        print(f"‚ùå Erreur Google: {e}")

if LANGUAGE_TOOL_AVAILABLE and IMPROVE_FRENCH:
    try:
        grammar_checker = language_tool_python.LanguageTool('fr')
        print("‚úÖ Correcteur fran√ßais configur√©")
    except Exception as e:
        print(f"‚ùå Erreur correcteur: {e}")
        grammar_checker = None
else:
    grammar_checker = None

print(f"\nüéØ WHISPER TRANSCRIPTION + TRANSLATION TOOL v2.1")
print(f"üîß Device : {device}")
print(f"üåç Service traduction : {TRANSLATION_SERVICE}")
print(f"üìÅ Source : {['', 'URL', 'Dossier', 'Fichier'][int(SOURCE_TYPE)]}")
print(f"üìÑ Input : {SOURCE_INPUT}")

## 3. Fonctions utilitaires

In [None]:
def format_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    milliseconds = int((seconds % 1) * 1000)
    return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"

def detect_file_type(filepath):
    audio_extensions = ('.mp3', '.wav', '.aac', '.flac', '.ogg', '.m4a')
    video_extensions = ('.mp4', '.avi', '.mkv', '.webm', '.mov', '.flv')
    filepath_lower = filepath.lower()
    if filepath_lower.endswith(audio_extensions):
        return "audio"
    elif filepath_lower.endswith(video_extensions):
        return "video"
    else:
        return "unknown"

def detect_language(model, audio_path):
    try:
        audio = whisper.load_audio(audio_path)
        audio = whisper.pad_or_trim(audio, length=30 * 16000)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)
        _, probs = model.detect_language(mel)
        detected_language = max(probs, key=probs.get)
        confidence = probs[detected_language]
        return detected_language, confidence
    except Exception as e:
        print(f"‚ùå Erreur d√©tection langue: {e}")
        return "en", 0.5

def get_language_name(code):
    languages = {
        'en': 'Anglais', 'fr': 'Fran√ßais', 'es': 'Espagnol', 'de': 'Allemand',
        'it': 'Italien', 'pt': 'Portugais', 'ru': 'Russe', 'ja': 'Japonais'
    }
    return languages.get(code, f"Langue ({code})")

def translate_text(text: str, service: str = "auto") -> Tuple[str, str]:
    if not text or not text.strip():
        return text, "none"
    
    services_order = ['deepl', 'google'] if service == "auto" else [service]
    
    for svc in services_order:
        try:
            if svc == 'deepl' and 'deepl' in translators:
                result = translators['deepl'].translate_text(text, target_lang="FR")
                return result.text, "deepl"
            elif svc == 'google' and 'google' in translators:
                result = translators['google'].translate(text, dest="fr")
                return result.text, "google"
        except Exception as e:
            print(f"‚ùå Erreur {svc}: {e}")
            continue
    
    return text, "failed"

def improve_french_text(text: str) -> str:
    if not IMPROVE_FRENCH or not grammar_checker:
        return text
    try:
        matches = grammar_checker.check(text)
        corrected_text = language_tool_python.utils.correct(text, matches)
        corrected_text = re.sub(r'\s+', ' ', corrected_text)
        return corrected_text.strip()
    except Exception as e:
        return text

print("‚úÖ Fonctions utilitaires d√©finies")

## 4. Chargement du mod√®le

In [None]:
print(f"ü§ñ Chargement du mod√®le Whisper '{whisper_model_name}' sur {device}...")
whisper_model = whisper.load_model(whisper_model_name, device=device)
print("‚úÖ Mod√®le Whisper charg√© avec succ√®s")

## 5. Traitement principal

In [None]:
# Pr√©paration
os.makedirs(temp_directory, exist_ok=True)
files_to_process = []
generated_files = []

print("üöÄ D√©but du traitement...")

# Gestion des sources
if SOURCE_TYPE == "1":  # URL
    print(f"üì• T√©l√©chargement : {SOURCE_INPUT}")
    ydl_opts = {
        'format': 'bestvideo+bestaudio/best',
        'outtmpl': os.path.join(temp_directory, '%(title)s.%(ext)s'),
        'noplaylist': True,
        'quiet': True,
    }
    
    with YoutubeDL(ydl_opts) as ydl:
        ydl.extract_info(SOURCE_INPUT, download=True)
    
    video_files = glob.glob(os.path.join(temp_directory, "*"))
    video_files = [f for f in video_files if detect_file_type(f) in ["video", "audio"]]
    if video_files:
        downloaded_file = max(video_files, key=os.path.getctime)
        files_to_process.append(downloaded_file)
        print(f"‚úÖ T√©l√©charg√© : {os.path.basename(downloaded_file)}")

elif SOURCE_TYPE == "2":  # Dossier
    extensions = ['*.mp4', '*.avi', '*.mkv', '*.webm', '*.mov', '*.flv', 
                  '*.mp3', '*.wav', '*.aac', '*.flac', '*.ogg', '*.m4a']
    for ext in extensions:
        files_to_process.extend(glob.glob(os.path.join(SOURCE_INPUT, '**', ext), recursive=True))
    print(f"üìä {len(files_to_process)} fichiers trouv√©s")

elif SOURCE_TYPE == "3":  # Fichier
    if os.path.isfile(SOURCE_INPUT):
        files_to_process.append(SOURCE_INPUT)
        print(f"üìÑ Fichier s√©lectionn√© : {os.path.basename(SOURCE_INPUT)}")

print(f"\nüìã Nombre de fichiers √† traiter : {len(files_to_process)}")

## 6. Transcription et traduction

In [None]:
# Traitement de chaque fichier
for i, file_path in enumerate(files_to_process, 1):
    print(f"\nüìÅ Fichier {i}/{len(files_to_process)}: {os.path.basename(file_path)}")
    
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    output_base = os.path.join(temp_directory, base_name)
    
    # Conversion si n√©cessaire
    file_type = detect_file_type(file_path)
    if file_type == "audio":
        audio_source = file_path
    elif file_type == "video":
        audio_source = output_base + ".mp3"
        print(f"üîÑ Conversion vid√©o ‚Üí audio...")
        command = ["ffmpeg", "-i", file_path, "-vn", "-acodec", "libmp3lame", "-q:a", "0", "-y", audio_source]
        subprocess.run(command, check=True, capture_output=True)
        print("‚úÖ Conversion termin√©e")
    else:
        print(f"‚ùå Type non support√© : {file_path}")
        continue
    
    # D√©tection de langue
    print("üîç D√©tection de la langue...")
    detected_lang, confidence = detect_language(whisper_model, audio_source)
    lang_name = get_language_name(detected_lang)
    print(f"üåç Langue : {lang_name} ({detected_lang}) - Confiance: {confidence:.2f}")
    
    # Transcription
    print(f"üìù Transcription en {lang_name}...")
    result = whisper_model.transcribe(audio_source, language=detected_lang, verbose=False, device=device)
    
    # Traduction
    if detected_lang != 'fr':
        print(f"üîÑ Traduction vers le fran√ßais...")
        
        # Traduire le texte complet
        translated_text, service_used = translate_text(result["text"], TRANSLATION_SERVICE)
        improved_text = improve_french_text(translated_text)
        
        # Traduire chaque segment
        for segment in result["segments"]:
            segment["text_original"] = segment["text"]
            translated_segment, _ = translate_text(segment["text"], TRANSLATION_SERVICE)
            segment["text_fr"] = improve_french_text(translated_segment)
        
        print(f"‚úÖ Traduction termin√©e avec {service_used}")
    else:
        print("üá´üá∑ Audio d√©j√† en fran√ßais")
        improved_text = improve_french_text(result["text"])
        for segment in result["segments"]:
            segment["text_original"] = segment["text"]
            segment["text_fr"] = improve_french_text(segment["text"])
        service_used = "none"
    
    # Sauvegarde
    print("üíæ Sauvegarde des fichiers...")
    
    # Texte fran√ßais
    french_text_file = output_base + "_fr.txt"
    with open(french_text_file, "w", encoding="utf-8") as f:
        f.write(improved_text)
    generated_files.append(french_text_file)
    print(f"‚úÖ {os.path.basename(french_text_file)}")
    
    # Texte original (si diff√©rent)
    if EXPORT_BILINGUAL and detected_lang != "fr":
        original_text_file = output_base + "_original.txt"
        with open(original_text_file, "w", encoding="utf-8") as f:
            f.write(result["text"])
        generated_files.append(original_text_file)
        print(f"‚úÖ {os.path.basename(original_text_file)}")
    
    # SRT fran√ßais
    french_srt_file = output_base + "_fr.srt"
    with open(french_srt_file, "w", encoding="utf-8") as f:
        for j, segment in enumerate(result["segments"]):
            start_time = format_timestamp(segment["start"])
            end_time = format_timestamp(segment["end"])
            text = segment.get("text_fr", segment["text"]).strip()
            f.write(f"{j+1}\n{start_time} --> {end_time}\n{text}\n\n")
    generated_files.append(french_srt_file)
    print(f"‚úÖ {os.path.basename(french_srt_file)}")
    
    # SRT bilingue (si activ√©)
    if EXPORT_BILINGUAL and detected_lang != "fr":
        bilingual_srt_file = output_base + "_bilingue.srt"
        with open(bilingual_srt_file, "w", encoding="utf-8") as f:
            for j, segment in enumerate(result["segments"]):
                start_time = format_timestamp(segment["start"])
                end_time = format_timestamp(segment["end"])
                original = segment.get("text_original", segment["text"]).strip()
                french = segment.get("text_fr", segment["text"]).strip()
                f.write(f"{j+1}\n{start_time} --> {end_time}\n{original}\n{french}\n\n")
        generated_files.append(bilingual_srt_file)
        print(f"‚úÖ {os.path.basename(bilingual_srt_file)}")
    
    # M√©tadonn√©es JSON
    metadata_file = output_base + "_metadata.json"
    metadata = {
        "source_file": os.path.basename(file_path),
        "source_language": detected_lang,
        "source_language_name": lang_name,
        "language_confidence": confidence,
        "translation_service": service_used,
        "processed_at": time.strftime("%Y-%m-%d %H:%M:%S"),
        "whisper_model": whisper_model_name,
        "device": device
    }
    with open(metadata_file, "w", encoding="utf-8") as f:
        json.dump(metadata, f, indent=2, ensure_ascii=False)
    generated_files.append(metadata_file)
    print(f"‚úÖ {os.path.basename(metadata_file)}")

print("\nüéâ TRAITEMENT TERMIN√â !")
print("=" * 50)
print(f"üìä Fichiers trait√©s : {len(files_to_process)}")
print(f"üìù Fichiers g√©n√©r√©s : {len(generated_files)}")
print(f"üìÇ R√©pertoire : {temp_directory}")
print("\nüìÑ Fichiers g√©n√©r√©s :")
for file_path in generated_files:
    print(f"  ‚úÖ {os.path.basename(file_path)}")

## 7. Cr√©ation d'archive (optionnel)

In [None]:
# Cr√©ation d'une archive ZIP
CREATE_ARCHIVE = True

if CREATE_ARCHIVE and generated_files:
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    archive_path = os.path.join(temp_directory, f"transcriptions_fr_{timestamp}.zip")
    
    print(f"üì¶ Cr√©ation de l'archive : {os.path.basename(archive_path)}")
    
    with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in generated_files:
            if os.path.exists(file_path):
                zipf.write(file_path, os.path.basename(file_path))
    
    print(f"‚úÖ Archive cr√©√©e : {archive_path}")
    print(f"üìä Taille : {os.path.getsize(archive_path) / 1024:.1f} KB")
else:
    print("üì¶ Cr√©ation d'archive d√©sactiv√©e")

print("\nüéØ WHISPER TRANSCRIPTION + TRANSLATION v2.1 - TERMIN√â ‚úÖ")
print("Tous vos fichiers traduits en fran√ßais sont pr√™ts !")