In [None]:
from openai import OpenAI

client = OpenAI(api_key="")
audio_file = open("/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3", "rb")

transcription = client.audio.transcriptions.create(
  model="whisper-1", 
  file=audio_file, 
  response_format="verbose_json",
  prompt = (
    "This is a professionally recorded Hindi movie. "
    "The audio includes male, female, and child speakers. "
    "Language is Hindi, often poetic and emotional. "
    "There may be ambient sounds or music. "
    "Transcribe spoken content accurately including honorifics and slang. "
    "Include both Hindi and English words as spoken. "
    "Character names include Ravi, Pooja, Inspector Khan, and Sita."
)
)

print(transcription.text)

In [None]:
import datetime

def to_srt_time(seconds):
    td = datetime.timedelta(seconds=seconds)
    total_seconds = int(td.total_seconds())
    milliseconds = int((td.total_seconds() - total_seconds) * 1000)
    return str(td).split('.')[0].zfill(8).replace(".", ",") + f",{milliseconds:03}"

def save_srt_from_transcription(transcription, srt_output_path, min_duration=1.0):
    segments = transcription.segments
    merged_segments = []
    
    prev_text = None
    current_start = None
    current_end = None

    for segment in segments:
        text = segment.text.strip()
        start = segment.start
        end = segment.end

        if prev_text == text and (start - current_end) <= 1.0:
            # Extend the previous segment
            current_end = end
            merged_segments[-1]['end'] = end
        else:
            # Start a new segment
            merged_segments.append({'start': start, 'end': end, 'text': text})
            current_start = start
            current_end = end
            prev_text = text

    # Write merged to SRT
    with open(srt_output_path, 'w', encoding='utf-8') as srt_file:
        for idx, seg in enumerate(merged_segments, start=1):
            duration = seg['end'] - seg['start']
            if duration < min_duration:
                continue  # optionally skip very short segments
            start = to_srt_time(seg['start'])
            end = to_srt_time(seg['end'])
            srt_file.write(f"{idx}\n{start} --> {end}\n{seg['text']}\n\n")


# Save the SRT
save_srt_from_transcription(transcription, "ahista_ahista_part11.srt")


In [None]:
import subprocess
import datetime
from openai import OpenAI


# 1. Preprocess the audio to clean artifacts
def preprocess_audio(input_path, output_path="cleaned.wav"):
    command = [
        "ffmpeg",
        "-y",  # Overwrite if exists
        "-i", input_path,
        "-ar", "16000",  # 16kHz sample rate
        "-ac", "1",      # mono
        "-af", "dynaudnorm",  # dynamic audio normalization
        output_path
    ]
    try:
        subprocess.run(command, check=True)
        print(f"✅ Audio preprocessed and saved to: {output_path}")
        return output_path
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"❌ FFmpeg failed: {e}")


# 2. Format timestamp for SRT
def to_srt_time(seconds):
    td = datetime.timedelta(seconds=seconds)
    total_seconds = int(td.total_seconds())
    milliseconds = int((td.total_seconds() - total_seconds) * 1000)
    return str(td).split('.')[0].zfill(8).replace(".", ",") + f",{milliseconds:03}"


# 3. Filter repetitive segments
def filter_repeated_segments(segments, min_gap=1.0, max_repeats=2):
        """
        Filters out segments that repeat too often in close succession.

        Args:
            segments (list): List of segments with `.text`, `.start`, and `.end`.
            min_gap (float): Minimum time gap to allow same text again.
            max_repeats (int): Max allowed repetitions of the same text.

        Returns:
            list: Filtered list of segments.
        """
        filtered = []
        prev_text = ""
        prev_end = 0
        repeat_count = 0

        for seg in segments:
            current_text = seg.text.strip()
            if current_text == prev_text.strip() and (seg.start - prev_end) < min_gap:
                repeat_count += 1
                if repeat_count >= max_repeats:
                    continue  # Skip this repeated segment
            else:
                repeat_count = 0  # Reset for a new phrase

            filtered.append(seg)
            prev_text = current_text
            prev_end = seg.end

        return filtered


# 4. Merge short identical subtitles and write SRT
def save_srt_from_transcription(segments, srt_output_path, min_duration=1.0):
    merged = []
    prev_text = None
    current_start = None
    current_end = None

    for seg in segments:
        text = seg.text.strip()
        start = seg.start
        end = seg.end

        if prev_text == text and (start - current_end) <= 1.0:
            merged[-1]['end'] = end
        else:
            merged.append({'start': start, 'end': end, 'text': text})
            current_start = start
            current_end = end
            prev_text = text

    # Write to SRT
    with open(srt_output_path, 'w', encoding='utf-8') as f:
        for idx, seg in enumerate(merged, start=1):
            if seg['end'] - seg['start'] < min_duration:
                continue
            start = to_srt_time(seg['start'])
            end = to_srt_time(seg['end'])
            f.write(f"{idx}\n{start} --> {end}\n{seg['text']}\n\n")

    print(f"✅ SRT saved to: {srt_output_path}")


# 5. Complete pipeline runner
def run_translation_pipeline(
    input_audio_path,
    api_key,
    srt_output_path="output_translated.srt"
):
    # Step 1: Preprocess
    cleaned_audio = preprocess_audio(input_audio_path)

    # Step 2: Translate using OpenAI Whisper API
    client = OpenAI(api_key=api_key)
    with open(cleaned_audio, "rb") as audio_file:
        result = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file,
            response_format="verbose_json",
            language="hi",  # original audio language
            translate=True,  # translate to English
            prompt=(
                "This is a professionally recorded Hindi movie. "
                "Translate all spoken Hindi to fluent, grammatically correct English. "
                "Preserve tone, intent, and avoid hallucinating or repeating any content."
            )
        )

    # Step 3: Clean segments
    segments = filter_repeated_segments(result.segments)

    # Step 4: Save as SRT
    save_srt_from_transcription(segments, srt_output_path)


# # ----------------------
# # ✅ Usage Example
# # ----------------------
# if __name__ == "__main__":
#     run_translation_pipeline(
#         input_audio_path="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3",
#         api_key="sk-proj-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
#         srt_output_path="ahista_ahista_part1_translated.srt"
#     )


In [None]:
run_translation_pipeline(
        input_audio_path="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3",
        api_key="",
        srt_output_path="ahista_ahista_part1_translated12.srt"
    )

In [None]:
import subprocess
import datetime
import requests
import json


def preprocess_audio(input_path, output_path="cleaned.wav"):
    command = [
        "ffmpeg", "-y", "-i", input_path,
        "-ar", "16000", "-ac", "1",
        "-af", "dynaudnorm", output_path
    ]
    subprocess.run(command, check=True)
    return output_path


def to_srt_time(seconds):
    td = datetime.timedelta(seconds=seconds)
    total_seconds = int(td.total_seconds())
    milliseconds = int((seconds - total_seconds) * 1000)
    time_str = str(td).split('.')[0].zfill(8).replace(".", ",")
    return f"{time_str},{milliseconds:03}"


def get_transcription_segments(audio_path, api_key):
    url = "https://api.openai.com/v1/audio/transcriptions"
    headers = {"Authorization": f"Bearer {api_key}"}
    with open(audio_path, "rb") as f:
        files = {"file": (audio_path, f, "audio/wav")}
        data = {
            "model": "whisper-1",
            "response_format": "verbose_json",
            "language": "hi",  # for proper alignment
        }
        response = requests.post(url, headers=headers, files=files, data=data)
        return response.json()["segments"]


def get_translated_text(audio_path, api_key):
    import sys
    url = "https://api.openai.com/v1/audio/translations"
    headers = {"Authorization": f"Bearer {api_key}"}

    with open(audio_path, "rb") as f:
        files = {
            "file": (audio_path, f, "audio/wav"),
        }
        data = {
            "model": "whisper-1",
            "response_format": "json",
        }

        response = requests.post(url, headers=headers, files=files, data=data)

        # Handle non-200 responses or empty responses
        if not response.ok:
            print(f"❌ HTTP {response.status_code} - {response.reason}")
            print("🔍 Response content:")
            print(response.text[:1000], file=sys.stderr)  # Print first 1000 characters
            response.raise_for_status()

        try:
            return response.json()["text"]
        except Exception as e:
            print("❌ Failed to parse JSON response:")
            print(response.text[:1000], file=sys.stderr)
            raise e


def split_translated_text(translated_text, segment_count):
    # Simple greedy split based on punctuation and segment count
    import re
    sentences = re.split(r'(?<=[.?!])\s+', translated_text)
    if len(sentences) < segment_count:
        # Pad
        sentences += [""] * (segment_count - len(sentences))
    elif len(sentences) > segment_count:
        # Merge excess
        merged = []
        chunk_size = len(sentences) // segment_count
        for i in range(0, len(sentences), chunk_size):
            merged.append(" ".join(sentences[i:i+chunk_size]))
        return merged[:segment_count]
    return sentences[:segment_count]


def save_srt(segments, translated_sentences, srt_path):
    with open(srt_path, "w", encoding="utf-8") as f:
        for idx, (seg, txt) in enumerate(zip(segments, translated_sentences), start=1):
            start = to_srt_time(seg["start"])
            end = to_srt_time(seg["end"])
            f.write(f"{idx}\n{start} --> {end}\n{txt.strip()}\n\n")
    print(f"✅ Saved SRT: {srt_path}")


def run_dual_pass_translation_pipeline(input_audio, api_key, srt_output_path):
    cleaned = preprocess_audio(input_audio)
    print("🔁 Getting transcription for timestamps...")
    segments = get_transcription_segments(cleaned, api_key)
    print("🌍 Getting translated text...")
    translated_text = get_translated_text(cleaned, api_key)
    print("✂️ Splitting translation...")
    translated_lines = split_translated_text(translated_text, len(segments))
    print("💾 Saving SRT...")
    save_srt(segments, translated_lines, srt_output_path)


# 🔧 Usage
# if __name__ == "__main__":
#     run_dual_pass_translation_pipeline(
#         input_audio="/path/to/audio.mp3",
#         api_key="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
#         srt_output_path="translated_output.srt"
#     )


In [None]:
run_dual_pass_translation_pipeline(
        input_audio="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3",
        api_key="",
        srt_output_path="ahista_ahista_part1_translated12.srt"
    )

In [None]:
video_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/split_files/ahista_ahista_part1.mp4"
subtitle_path = "/home/csc/Documents/Multilingual-Transcriber/plugins/experiments/ahista_ahista_part1_telugu.ass"
output_path = "/home/csc/Documents/Multilingual-Transcriber/plugins/experiments/ahista_ahista_part1_subtitled_telugu.mp4"

In [None]:
import os
import subprocess
import shutil

def ensure_utf8_encoding(input_srt: str, output_srt: str):
    """Ensure subtitle file is UTF-8 encoded."""
    with open(input_srt, 'r', encoding='utf-8', errors='replace') as f:
        content = f.read()
    with open(output_srt, 'w', encoding='utf-8') as f:
        f.write(content)

def burn_subtitles_to_video(video_path: str, subtitle_path: str, output_path: str, font_name: str = "Noto Sans Telugu"):
    """Burn subtitles into a video using FFmpeg with specified font."""
    # Ensure subtitle file is UTF-8 encoded
    utf8_subtitle_path = os.path.splitext(subtitle_path)[0] + "_utf8.srt"
    ensure_utf8_encoding(subtitle_path, utf8_subtitle_path)

    # Escape subtitle path for FFmpeg if it contains spaces
    escaped_subs_path = utf8_subtitle_path.replace(":", "\\:").replace(",", "\\,").replace(" ", "\\ ")

    # Construct the FFmpeg command
    command = [
        "ffmpeg",
        "-y",  # Overwrite output
        "-i", video_path,
        "-vf", f"subtitles='{escaped_subs_path}':force_style='FontName={font_name}'",
        "-c:a", "copy",
        output_path
    ]

    print(f"Running FFmpeg command:\n{' '.join(command)}")
    subprocess.run(command, check=True)
    print(f"✅ Subtitle burned video saved at: {output_path}")



In [None]:

video_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/split_files/ahista_ahista_part1.mp4"              # Update this path
subtitle_path = "/home/csc/Documents/Multilingual-Transcriber/plugins/experiments/ahista_ahista_part1_telugu.srt"      # Update this path
output_path = "/home/csc/Documents/Multilingual-Transcriber/plugins/experiments/ahista_ahista_part1_subtitled_telugu_new.mp4"   # Update this path
burn_subtitles_to_video(video_path, subtitle_path, output_path)


In [None]:
import subprocess

command = [
    "ffmpeg", "-y", "-i", video_path,
    "-vf", f"subtitles={subtitle_path}:force_style='FontName=Noto Sans Telugu'",
    "-c:a", "copy",
    output_path
]

subprocess.run(command)


In [None]:
import subprocess
command = [
                "ffmpeg", "-i", video_path,
                "-vf", f"subtitles={subtitle_path}",
                "-c:a", "copy",
                output_path
            ]
subprocess.run(command)

In [None]:
import os
import datetime
from google.cloud import speech, translate_v2 as translate
from google.cloud import storage
import ffmpeg

# Set credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/csc/Downloads/nimixitsubtitling-05a073252e73.json"

class HindiToEnglishSRT:
    def __init__(self, audio_path: str, srt_output_path: str = "output.srt", bucket_name: str = "subtitling_gcs"):
        self.audio_path = audio_path
        self.srt_output_path = srt_output_path
        self.bucket_name = bucket_name
        self.speech_client = speech.SpeechClient()
        self.translate_client = translate.Client()
        self.storage_client = storage.Client()

    def preprocess_audio(self, output_path="cleaned.wav"):
        ffmpeg.input(self.audio_path).output(output_path, ar=16000, ac=1, format='wav').run(overwrite_output=True)
        return output_path

    def upload_to_gcs(self, file_path: str, blob_name: str) -> str:
        bucket = self.storage_client.bucket(self.bucket_name)
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(file_path)
        gcs_uri = f"gs://{self.bucket_name}/{blob_name}"
        print(f"☁️ Uploaded to GCS: {gcs_uri}")
        return gcs_uri

    def to_srt_time(self, seconds):
        td = datetime.timedelta(seconds=seconds)
        return str(td).split('.')[0].zfill(8).replace(".", ",") + f",{int((td.total_seconds() - int(td.total_seconds())) * 1000):03}"

    def transcribe_and_translate(self, gcs_uri):
        audio = speech.RecognitionAudio(uri=gcs_uri)
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=16000,
            language_code="hi-IN",
            enable_automatic_punctuation=True,
            enable_word_time_offsets=True
        )

        operation = self.speech_client.long_running_recognize(config=config, audio=audio)
        print("⏳ Waiting for transcription...")
        response = operation.result(timeout=600)

        segments = []
        index = 1
        for result in response.results:
            alt = result.alternatives[0]
            if not alt.words:
                continue
            start_time = alt.words[0].start_time.total_seconds()
            end_time = alt.words[-1].end_time.total_seconds()
            hindi_text = alt.transcript.strip()
            english = self.translate_client.translate(hindi_text, target_language="en")["translatedText"]
            segments.append({
                "index": index,
                "start": self.to_srt_time(start_time),
                "end": self.to_srt_time(end_time),
                "text": english
            })
            index += 1
        return segments

    def save_srt(self, segments):
        with open(self.srt_output_path, "w", encoding="utf-8") as f:
            for seg in segments:
                f.write(f"{seg['index']}\n")
                f.write(f"{seg['start']} --> {seg['end']}\n")
                f.write(f"{seg['text']}\n\n")

    def run_pipeline(self):
        print("🔧 Preprocessing audio...")
        cleaned = self.preprocess_audio()
        print("☁️ Uploading to GCS...")
        gcs_uri = self.upload_to_gcs(cleaned, "audio/cleaned.wav")
        print("📝 Transcribing and translating...")
        segments = self.transcribe_and_translate(gcs_uri)
        print("💾 Saving SRT...")
        self.save_srt(segments)
        print(f"✅ Done! SRT saved at {self.srt_output_path}")


In [None]:
converter = HindiToEnglishSRT(
    audio_path="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3",
    srt_output_path="translated_subtitles.srt"
)
converter.run_pipeline()


In [None]:
import os
import ffmpeg
import datetime
from tqdm import tqdm
from openai import OpenAI
from pathlib import Path
from openai.types.audio import TranscriptionSegment

class WhisperHindiToEnglishSRT:
    def __init__(self, audio_path: str, output_srt: str = "output.srt", correct_grammar: bool = True):
        self.audio_path = audio_path
        self.output_srt = output_srt
        self.correct_grammar = correct_grammar
        self.wav_path = "converted.wav"
        self.client = OpenAI(api_key="")

    def convert_to_wav(self):
        print("🎧 Converting audio to 16kHz mono WAV...")
        ffmpeg.input(self.audio_path).output(
            self.wav_path, ar=16000, ac=1, format='wav'
        ).run(overwrite_output=True)

    def transcribe_with_whisper(self):
        print("📝 Transcribing & translating with Whisper API...")
        with open(self.wav_path, "rb") as audio_file:
            response = self.client.audio.translations.create(
                model="whisper-1",
                file=audio_file,
                response_format="verbose_json"
            )
        return response.segments

    def correct_grammar_with_gpt(self, segments):
        print("🔤 Correcting grammar using GPT-4...")
        corrected_segments = []
        for seg in tqdm(segments):
            res = self.client.chat.completions.create(
                model="gpt-4",
                messages=[
                    {"role": "system", "content": "You are a professional subtitle editor."},
                    {"role": "user", "content": f"Correct grammar, punctuation and fluency: {seg.text}"}
                ]
            )
            corrected_text = res.choices[0].message.content.strip()
            corrected_seg = {
                "text": corrected_text,
                "start": seg.start,
                "end": seg.end
            }
            corrected_segments.append(corrected_seg)
        return corrected_segments


    def format_srt_timestamp(self, seconds: float) -> str:
        td = datetime.timedelta(seconds=seconds)
        total = str(td).split(".")[0]
        milliseconds = int((td.total_seconds() - int(td.total_seconds())) * 1000)
        return f"{total},{milliseconds:03d}"

    def save_srt(self, segments):
        print(f"💾 Saving SRT to {self.output_srt}...")
        with open(self.output_srt, "w", encoding="utf-8") as f:
            for i, seg in enumerate(segments):
                start = self.format_srt_timestamp(seg.start)
                end = self.format_srt_timestamp(seg.end)
                f.write(f"{i + 1}\n{start} --> {end}\n{seg.text.strip()}\n\n")


    def run(self):
        self.convert_to_wav()
        segments = self.transcribe_with_whisper()
        if self.correct_grammar:
            segments = self.correct_grammar_with_gpt(segments)
        self.save_srt(segments)
        print(f"✅ Done! SRT saved at: {self.output_srt}")


In [None]:
converter = WhisperHindiToEnglishSRT(
    audio_path="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3",  # ← change this
    output_srt="translated_subtitles1.srt",
    correct_grammar=False
)
converter.run()

In [None]:
def burn_subtitles_to_video(self, video_path: str, output_video_path: str):
    print(f"🎬 Burning subtitles into video...")
    if not os.path.exists(self.output_srt):
        raise FileNotFoundError(f"SRT file not found: {self.output_srt}")

    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")

    # Use ffmpeg to burn subtitles
    ffmpeg.input(video_path).output(
        output_video_path,
        vf=f"subtitles={self.output_srt}",
        c="libx264",
        crf=23,
        preset="medium"
    ).run(overwrite_output=True)

    print(f"✅ Subtitled video saved at: {output_video_path}")
burn_subtitles_to_video(
    video_path="/path/to/original_video.mp4",
    output_video_path="video_with_subtitles.mp4"
)

In [None]:
import sys
import os

# Add the root project directory to sys.path
project_root = os.path.abspath("..")  # Adjust as needed
if project_root not in sys.path:
    sys.path.append(project_root)
from utils.config import get_settings
get_settings().openai_key

In [None]:
import sys
import os

# Add the root project directory to sys.path
project_root = os.path.abspath("..")  # Adjust as needed
if project_root not in sys.path:
    sys.path.append(project_root)
from models.translate import  TranslationUtils
from utils.srt_parser import SRTTranslator
translator = TranslationUtils() 


srt = SRTTranslator(translator)

srt.translate_srt_file_batch_with_google_translate(
    input_path="/home/csc/Downloads/ahista_ahista_part1_hin.srt",
    output_path="ahista_ahista_part1_telugu.srt",
    target_language="te"
)

In [None]:
video_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/split_files/babloo_bachelor_part2.mp4"
subtitle_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/srt_files/Kannada/babloo_bachelor_part2__kn_SRTfile.ass"
output_path = "babloo_bachelor_part2__kn_subtitled1.mp4"

In [None]:
import subprocess

video_path = ""
subtitle_path = ""
output_path = ""

command = [
    "ffmpeg", "-y",
    "-i", video_path,
    "-vf", f"subtitles='{subtitle_path}':fontsdir='/usr/share/fonts/truetype/noto':force_style='FontName=Noto Sans Kannada'",
    "-c:a", "copy",
    output_path
]

subprocess.run(command)

In [None]:
import sys
import os

project_root = "/home/csc/Documents/Multilingual-Transcriber"
if project_root not in sys.path:
    sys.path.append(project_root)
from plugins.evalutions.evalution_new import TranslationEvaluator
evaluator  = TranslationEvaluator()
response = evaluator.generate_srt_pairs(src_dir="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/srt_files/Base/",
                             tgt_dir="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/srt_files/Kannada/",
                             src_suffix= "__hi_SRTfile.srt", 
                             tgt_suffix="__kn_SRTfile.srt")

evaluator.validate_batch_gemini(response, src_lang="Hindi", tgt_lang="Kannada", output_dir="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/evaluation/Kannada/")

In [None]:
import sys
import os

project_root = "/home/csc/Documents/Multilingual-Transcriber"
if project_root not in sys.path:
    sys.path.append(project_root)
from plugins.evalutions.evalution import rename_srt_files_with_language
rename_srt_files_with_language(directory="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/srt_files/Base/" )

In [None]:
import json
import time
import google.generativeai as genai

class TranslationEvaluator:
    def __init__(self, api_key: str):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel("gemini-1.5-pro")  # ✅ FIX

    def evaluate_translation_gemini(self, 
                                    text_src: str,
                                    text_tgt: str,
                                    src_lang: str = " ",
                                    tgt_lang: str = " ",
                                    retries: int = 3) -> float:
        if not text_src.strip() or not text_tgt.strip():
            return 0.0

        system_msg = (
            "You are a professional evaluator of translation quality.\n"
            "Judge how well Text-TGT conveys the meaning of Text-SRC.\n"
            "Ignore minor typos, small transcription errors, or differences in phrasing, "
            "as long as the meaning and spoken intent are preserved.\n"
            "Phonetic or sounding-similar translations are acceptable.\n"
            "Only respond with JSON: {\"score\": float between 0 and 1}.\n"
        )

        user_msg = f"Text-SRC ({src_lang}): {text_src.strip()}\n" \
                   f"Text-TGT ({tgt_lang}): {text_tgt.strip()}"

        for attempt in range(retries):
            try:
                response = self.model.generate_content(
                    contents=[{"role": "user", "parts": [{"text": system_msg + "\n\n" + user_msg}]}]
                )
                content = response.text.strip()

                if content.startswith("```"):
                    content = content.strip("`").split("```")[1] if "```" in content[3:] else content.strip("`")

                score = float(json.loads(content)["score"])
                return score

            except Exception as e:
                print(f"[attempt {attempt + 1}] Gemini error: {e}")
                time.sleep(1)

        return 0.0


In [1]:
import sys
import os

project_root = "/home/csc/Documents/Multilingual-Transcriber"
if project_root not in sys.path:
    sys.path.append(project_root)
from plugins.evalutions.evalution_new import TranslationEvaluator
evaluator  = TranslationEvaluator()


evaluator.validate_pair_gemini(src_file ="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/srt_files/Base/babloo_bachelor_part2__hi_SRTfile.srt",
                               tgt_file="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/srt_files/Kannada/babloo_bachelor_part2__kn_SRTfile.srt", 
                               out_csv="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/babloo_bachelor/evaluation/Kannada/test.csv",
                      src_lang="Hindi", tgt_lang="Kannada")

In [None]:
from google import genai
from google.genai.types import HttpOptions

client = genai.Client(http_options=HttpOptions(api_version="v1"), api_key="")
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="How does AI work?",
)
print(response.text)
# Example response:
# Okay, let's break down how AI works. It's a broad field, so I'll focus on the ...
#
# Here's a simplified overview:
# ...

In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
import moviepy.editor as mp
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np
import threading
import os
import time

# --- Global Settings ---
SAMPLERATE = 44100  # Samples per second for audio recording
TEMP_AUDIO_FILENAME = "temp_dubbed_audio.wav"

class DubbingApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Simple Dubber POC")
        self.root.geometry("400x250")

        self.video_path = None
        self.recording_thread = None
        self.is_recording = False
        self.recorded_frames = []

        # --- UI Elements ---
        self.label = tk.Label(root, text="1. Select a video file to dub", font=("Arial", 12))
        self.label.pack(pady=10)

        self.btn_load = tk.Button(root, text="Load Video", command=self.load_video)
        self.btn_load.pack(pady=5)

        self.status_label = tk.Label(root, text="No video loaded.", wraplength=380)
        self.status_label.pack(pady=10)

        self.btn_record = tk.Button(root, text="Start Dubbing", state=tk.DISABLED, command=self.start_dubbing_process)
        self.btn_record.pack(pady=10)

        self.btn_export = tk.Button(root, text="Export Video", state=tk.DISABLED, command=self.export_video)
        self.btn_export.pack(pady=10)

    def load_video(self):
        """Opens a file dialog to select a video file."""
        path = filedialog.askopenfilename(
            filetypes=[("Video Files", "*.mp4 *.mkv *.avi"), ("All files", "*.*")]
        )
        if path:
            self.video_path = path
            self.status_label.config(text=f"Loaded: {os.path.basename(self.video_path)}")
            self.btn_record.config(state=tk.NORMAL)
            self.btn_export.config(state=tk.DISABLED) # Disable export until a recording is made
            print(f"Video loaded: {self.video_path}")
            # Clean up old recordings if a new video is loaded
            if os.path.exists(TEMP_AUDIO_FILENAME):
                os.remove(TEMP_AUDIO_FILENAME)


    def start_dubbing_process(self):
        """Starts the video playback and audio recording in separate threads."""
        if not self.video_path:
            messagebox.showerror("Error", "No video file loaded.")
            return

        self.btn_load.config(state=tk.DISABLED)
        self.btn_record.config(state=tk.DISABLED, text="Recording...")
        self.status_label.config(text="Recording... Watch the video and speak!")

        # Thread for playing video
        video_thread = threading.Thread(target=self.play_video)
        
        # Thread for recording audio
        self.recording_thread = threading.Thread(target=self.record_audio)

        video_thread.start()
        self.recording_thread.start()
        
        # Check periodically if threads are done
        self.root.after(100, self.check_dubbing_finished)

    def play_video(self):
        """Plays the video clip using MoviePy's preview."""
        print("Starting video playback...")
        try:
            with mp.VideoFileClip(self.video_path) as clip:
                # preview will block until the video is finished or closed
                clip.preview()
        except Exception as e:
            print(f"Error playing video: {e}")
        print("Video playback finished.")

    def record_audio(self):
        """Records audio from the microphone for the duration of the video."""
        print("Starting audio recording...")
        self.is_recording = True
        self.recorded_frames = []

        try:
            with mp.VideoFileClip(self.video_path) as clip:
                duration = clip.duration

            # Define a callback function for the audio stream
            def callback(indata, frames, time, status):
                if status:
                    print(status)
                self.recorded_frames.append(indata.copy())

            # Start recording
            with sd.InputStream(samplerate=SAMPLERATE, channels=1, callback=callback):
                # We can't just sleep for the duration, because the video player might be closed early.
                # Instead, we rely on the main thread to stop us.
                # A more robust solution would use inter-thread communication.
                # For this POC, we just let it record until the app state changes.
                while self.is_recording:
                    sd.sleep(100) # Sleep in short intervals

        except Exception as e:
            print(f"Error during audio recording: {e}")
        
        print("Audio recording stopped.")
        # Once stopped, save the file
        if self.recorded_frames:
            recording = np.concatenate(self.recorded_frames, axis=0)
            write(TEMP_AUDIO_FILENAME, SAMPLERATE, recording)
            print(f"Audio saved to {TEMP_AUDIO_FILENAME}")
            self.root.after(0, lambda: self.btn_export.config(state=tk.NORMAL))


    def check_dubbing_finished(self):
        """Checks if the recording process is complete and updates the UI."""
        # A simple way to detect finish: the preview window is gone.
        # A more robust check would be needed for a real app.
        # We assume the user closes the video window to stop recording.
        
        # Find the preview window
        preview_active = False
        for w in tk.Tk().winfo_children(): # A bit of a hack to find the SDL window
            if "pygame" in str(w):
                preview_active = True
                break
        
        # A better check: is the recording thread alive?
        if self.recording_thread and self.recording_thread.is_alive():
            self.root.after(100, self.check_dubbing_finished)
        else:
            self.is_recording = False # Signal recording thread to stop
            self.btn_load.config(state=tk.NORMAL)
            self.btn_record.config(state=tk.NORMAL, text="Start Dubbing")
            self.status_label.config(text="Recording finished. Ready to export.")


    def export_video(self):
        """Merges the recorded audio with the video and saves it."""
        if not self.video_path or not os.path.exists(TEMP_AUDIO_FILENAME):
            messagebox.showerror("Error", "No recorded audio found to export.")
            return

        save_path = filedialog.asksaveasfilename(
            defaultextension=".mp4",
            filetypes=[("MP4 Video", "*.mp4")]
        )
        if not save_path:
            return

        self.status_label.config(text="Exporting... This may take a while.")
        self.root.update() # Force UI update

        try:
            print("Loading original video clip...")
            video_clip = mp.VideoFileClip(self.video_path)
            
            print("Loading dubbed audio clip...")
            audio_clip = mp.AudioFileClip(TEMP_AUDIO_FILENAME)
            
            # If audio is longer than video, trim it
            if audio_clip.duration > video_clip.duration:
                audio_clip = audio_clip.subclip(0, video_clip.duration)

            print("Replacing audio and writing to file...")
            final_clip = video_clip.set_audio(audio_clip)
            
            # Use a good codec, add threads for speed
            final_clip.write_videofile(save_path, codec='libx264', audio_codec='aac', threads=4)

            messagebox.showinfo("Success", f"Video successfully exported to {save_path}")

        except Exception as e:
            messagebox.showerror("Export Error", f"An error occurred: {e}")
        finally:
            # Clean up
            self.status_label.config(text="Export finished. Load a new video or re-dub.")
            video_clip.close()
            audio_clip.close()
            if os.path.exists(TEMP_AUDIO_FILENAME):
                os.remove(TEMP_AUDIO_FILENAME)
            self.btn_export.config(state=tk.DISABLED)

if __name__ == "__main__":
    root = tk.Tk()
    app = DubbingApp(root)
    root.mainloop()