In [None]:

from openai import OpenAI

client = OpenAI(api_key = "")
audio_file = open("/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3", "rb")

# Translation with segment-level SRT output
srt_output = client.audio.translations.create(
    model="whisper-1", 
    file=audio_file,
    response_format='srt',
    prompt=(
        "This is a professionally recorded Hindi movie. "
        "Speakers include men, women, and children. "
        "Include emotional, poetic, slang, and culturally nuanced expressions. "
        "Ensure every spoken word is translated accurately without skipping or hallucinating."
    )
)

# Save to file
with open("output_translated.srt", "w", encoding="utf-8") as f:
    f.write(srt_output)
print("✅ Saved: output_translated.srt")



In [None]:
import subprocess
import os
import datetime
from openai import OpenAI

# === Step 1: Convert MP3 to 16kHz WAV (mono, PCM) ===
def convert_audio_to_wav(input_path, output_path):
    try:
        subprocess.run([
            "ffmpeg",
            "-y",  # Overwrite without asking
            "-i", input_path,
            "-ac", "1",                # mono audio
            "-ar", "16000",            # 16kHz
            "-c:a", "pcm_s16le",       # 16-bit PCM
            output_path
        ], check=True)
        print(f"✅ Audio converted: {output_path}")
    except subprocess.CalledProcessError as e:
        print("❌ FFmpeg conversion failed:", e)
        raise

# === Step 2: Format time for SRT ===
def to_srt_time(seconds):
    return str(datetime.timedelta(seconds=seconds)).split(".")[0].replace(".", ",").zfill(8)

# === Step 3: Convert segments to SRT ===
def convert_segments_to_srt(segments):
    srt = []
    for i, seg in enumerate(segments):
        if not seg.text.strip():
            continue
        srt.append(str(i + 1))
        srt.append(f"{to_srt_time(seg.start)} --> {to_srt_time(seg.end)}")
        srt.append(seg.text.strip())
        srt.append("")
    return "\n".join(srt)

# === Step 4: Run Whisper translation ===
def transcribe_and_translate(input_mp3, output_srt, temp_wav="temp_output.wav"):
    # 1. Convert audio
    convert_audio_to_wav(input_mp3, temp_wav)

    # 2. Transcribe using Whisper API
    client = OpenAI(api_key="")
    with open(temp_wav, "rb") as audio_file:
        result = client.audio.translations.create(
            model="whisper-1",
            file=audio_file,
            response_format="verbose_json",
            prompt=(
                "This is a professionally recorded Hindi movie. "
                "It contains real conversations, not stock disclaimers. "
                "Translate all spoken dialogues to natural, fluent English. "
                "Avoid fictional disclaimers or hallucinated text. "
                "Speakers include men, women, and children. "
                "Preserve cultural tone, slang, and emotion. "
                "Do not invent content. Skip silence, but not real dialogue."
            )
        )

    # 3. Convert to SRT
    srt_data = convert_segments_to_srt(result.segments)
    with open(output_srt, "w", encoding="utf-8") as f:
        f.write(srt_data)
    print(f"✅ Subtitle saved: {output_srt}")

    # 4. Clean up
    if os.path.exists(temp_wav):
        os.remove(temp_wav)

# === Run the full pipeline ===
transcribe_and_translate(
    input_mp3="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3",
    output_srt="output_cleaned.srt"
)


In [None]:
from openai import OpenAI
import datetime

def to_srt_time(seconds):
    td = datetime.timedelta(seconds=seconds)
    return str(td).split(".")[0].zfill(8).replace(".", ",")

def group_words_to_srt(words, max_duration=3.0):
    srt = []
    idx = 1
    chunk_words = []
    chunk_start = None
    chunk_end = None

    for word in words:
        w = word.word
        s = word.start
        e = word.end

        if chunk_start is None:
            chunk_start = s
        chunk_end = e
        chunk_words.append(w)

        if chunk_end - chunk_start >= max_duration or w.endswith(('.', '?', '!')):
            srt.append(str(idx))
            srt.append(f"{to_srt_time(chunk_start)} --> {to_srt_time(chunk_end)}")
            srt.append(" ".join(chunk_words))
            srt.append("")
            idx += 1
            chunk_words = []
            chunk_start = None

    if chunk_words:
        srt.append(str(idx))
        srt.append(f"{to_srt_time(chunk_start)} --> {to_srt_time(chunk_end)}")
        srt.append(" ".join(chunk_words))
        srt.append("")

    return "\n".join(srt)

# Initialize OpenAI client
client = OpenAI(api_key = "")

audio_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3"
with open(audio_path, "rb") as audio_file:
    result = client.audio.transcriptions.create(
        model="whisper-1",
        file=audio_file,
        response_format='verbose_json',
        prompt="This is a professionally recorded Hindi movie. Capture all speech, slang, emotional tones, background phrases, and ambient speech.",
        timestamp_granularities=["word"]
    )

# Validate word-level timestamps
if not getattr(result, "words", None):
    raise ValueError("❌ Word-level timestamps not returned. Ensure timestamp_granularities=['word'] is supported and audio is valid.")

# Generate and save the final SRT
srt_text = group_words_to_srt(result.words)
with open("output_translated_synced.srt", "w", encoding="utf-8") as f:
    f.write(srt_text)

print("✅ SRT saved as 'output_translated_synced.srt'")


In [None]:
from openai import OpenAI
import datetime

client = OpenAI(api_key = "")
audio_file = open("/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/ahista_ahista/audio_files/ahista_ahista_part1_audio.mp3", "rb")

# API call
# Get detailed word-level output
result = client.audio.transcriptions.create(
    model="whisper-1",
    file=audio_file,
    response_format='verbose_json',
    timestamp_granularities=["word"],
    prompt="This is a professionally recorded Hindi movie. Include all spoken content with emotions, slang, honorifics."
)



# SRT time formatter
def to_srt_time(seconds):
    return str(datetime.timedelta(seconds=seconds)).split(".")[0].replace(".", ",").zfill(8)

# Grouping function
def words_to_srt(words, max_duration=5.0):
    srt = []
    idx = 1
    chunk = []
    start = None

    for word in words:
        w = word.word
        s = word.start
        e = word.end

        if start is None:
            start = s

        chunk.append(w)

        if e - start >= max_duration or w.endswith(('.', '!', '?')):
            srt.append(str(idx))
            srt.append(f"{to_srt_time(start)} --> {to_srt_time(e)}")
            srt.append(" ".join(chunk))
            srt.append("")
            idx += 1
            chunk = []
            start = None

    # Handle leftover words
    if chunk:
        srt.append(str(idx))
        srt.append(f"{to_srt_time(start)} --> {to_srt_time(e)}")
        srt.append(" ".join(chunk))
        srt.append("")

    return "\n".join(srt)

# Convert & save
srt_text = words_to_srt(result.words)
with open("output_word_level.srt", "w", encoding="utf-8") as f:
    f.write(srt_text)

print("✅ Word-aligned SRT saved as 'output_word_level.srt'")


In [None]:
def format_time(seconds: float) -> str:
    """
    Converts seconds to SRT time format.
    """
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    milliseconds = int((seconds - int(seconds)) * 1000)
    return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"

In [None]:
import os, math , openai , torch
from abc import ABC, abstractmethod
from typing import Optional, Any
from datetime import timedelta
from faster_whisper import WhisperModel
if torch.cuda.is_available():
    torch.cuda.empty_cache()


model_size = "large-v3"

# Run on GPU with FP16
model = WhisperModel(model_size, device="cuda", compute_type="float16")
prompt = "This is a professionally recorded Hindi movie. The speakers include men, women, and children. \
    There may be emotional or poetic expressions. Accurately capture all speech including honorifics and slang."


segments, info = model.transcribe(audio="/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/Ahista/audiofiles/ahista_ahista_part4_audio.mp3", 
                                  language= 'hi',
                                  beam_size=5)

print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

# for segment in segments:
#     print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))

# Write the transcription to an SRT file
with open("test.srt", "w", encoding="utf-8") as f:
    for i, segment in enumerate(segments, start=1):
        start_time = format_time(segment.start)
        end_time = format_time(segment.end)
        text = segment.text.strip()
        f.write(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")




In [None]:
import os, math , openai , torch
from abc import ABC, abstractmethod
from typing import Optional, Any
from datetime import timedelta
from faster_whisper import WhisperModel
if torch.cuda.is_available():
    torch.cuda.empty_cache()


model_size = "large-v3"

# Run on GPU with FP16
model = WhisperModel(model_size, device="cuda", compute_type="float16")
prompt = "This is a professionally recorded Hindi movie with emotional and poetic expressions."
segments, _ = model.transcribe(audio = "/home/csc/Documents/Multilingual-Transcriber/shared_data/Ahista/audiofiles/ahista_ahista_part1_audio.mp3", 
                               language= 'hi', beam_size=5, initial_prompt="This is a professionally recorded Hindi movie with emotional and poetic expressions.")
segments_L = list(segments)

with open("test4.srt", "w", encoding="utf-8") as f:
        for i, segment in enumerate(segments_L, start=1):
            start_time = format_time(segment.start)
            end_time = format_time(segment.end)
            text = segment.text.strip()
            f.write(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")


In [None]:
import pysrt

subs = pysrt.open('/home/csc/Documents/Multilingual-Transcriber/plugins/experiments/test4.srt')

for sub in subs:
    print("Index:", sub.index)
    print("Start:", sub.start)
    print("End:", sub.end)
    print("Text:", sub.text)
    print("---")


In [None]:
from openai import OpenAI


client = OpenAI(api_key="")

def get_similarity_score(text1, text2):
    prompt = f"""
You are a function that compares two texts for contextual similarity.

Instructions:
- Return your output in JSON format.
- Only include two fields: "score" (float from 0 to 1), and "explanation" (a short sentence).

Example output:
{{"score": 0.85, "explanation": "The texts describe similar concepts using different words."}}

Text A: {text1}

Text B: {text2}
"""

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )

    import json
    message = response.choices[0].message.content

    try:
        data = json.loads(message)
        return data["score"], data["explanation"]
    except Exception as e:
        raise ValueError(f"Could not parse LLM output: {message}") from e

# Example
text1 = "ऐसे नहीं आयेगा यह देखो अब आपमाच शुरू होना आ जाएगा वो."
text2 = "ऐसे आयेगा यह देखो अब आपमाच शुरू होना आ जाएगा वो."

score, explanation = get_similarity_score(text1, text2)
print("Score:", score)
print("Explanation:", explanation)





In [None]:
import openai
import json

from openai import OpenAI


client = OpenAI(api_key="")


def get_similarity_score(text1, text2):
    prompt = f"""
You are a function that compares two texts for contextual similarity.

Instructions:
- Return your output in JSON format.
- Include exactly two fields: 
  "score" (float between 0 and 1), and 
  "explanation" (a short sentence explaining the similarity).

Example output:
{{"score": 0.85, "explanation": "The texts describe similar concepts using different words."}}

Text A: {text1}

Text B: {text2}
"""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )

    message = response.choices[0].message.content.strip()

    # 🔧 Remove Markdown formatting if present
    if message.startswith("```"):
        message = message.strip("`")  # remove backticks
        message = "\n".join(line for line in message.splitlines() if not line.strip().startswith("json"))

    try:
        data = json.loads(message)
        return data["score"], data["explanation"]
    except Exception as e:
        raise ValueError(f"Could not parse LLM output: {message}") from e

# 🔍 Example usage
text1 = "ऐसे नहीं आयेगा यह देखो अब आपमाच शुरू होना आ जाएगा वो."
text2 = "ऐसे आयेगा यह देखो अब आपमाच शुरू होना आ जाएगा वो."

score, explanation = get_similarity_score(text1, text2)
print("Score:", score)
print("Explanation:", explanation)


In [None]:
from openai import OpenAI


client = OpenAI(api_key="")
audio_file = open("/home/csc/Documents/Multilingual-Transcriber/shared_data/Ahista/audiofiles/ahista_ahista_part1_audio.mp3", "rb")

translation = client.audio.transcriptions.create(
    model="gpt-4o-mini-transcribe", 
    file=audio_file,
    response_format="text",
    prompt= "This is a professionally recorded Hindi movie with emotional and poetic expressions."
)

print(translation)

In [None]:
from googletrans import Translator

# Create a translator object
translator = Translator()

# Text to translate
text_to_translate = "अरे मैंने कहने का वो मतलब नहीं था, वो जो पास में नवजीवन बुड़ा आश्रम है न, वहाँ पे बुड़ों की सेवा करने के लिए लोगों की जरूरत पड़ती है, खास कर छोरियों की"

# Translate the text to Spanish
translated_text = translator.translate(text_to_translate, dest='es').text

# Print the translated text
print(translated_text)  # Output: Hola, mundo!

In [None]:
from openai import OpenAI
client = OpenAI(api_key = "" )
def translate_text(text, target_language):
    response = openai.ChatCompletion.create(
        model="gpt-4",  # or "gpt-4" if you have access
        messages=[
            {"role": "system", "content": f"You are a helpful translator. Translate everything to {target_language}."},
            {"role": "user", "content": text}
        ],
        temperature=0.7
    )
    return response['choices'][0]['message']['content'].strip()

In [None]:
text = "अरे मैंने कहने का वो मतलब नहीं था, वो जो पास में नवजीवन बुड़ा आश्रम है न, वहाँ पे बुड़ों की सेवा करने के लिए लोगों की जरूरत पड़ती है, खास कर छोरियों की" 
target_language = "en" # Spanish 
translation = translate_text(text, target_language) 
print(translation) # ¡Hola mundo!

In [None]:
pip install SpeechRecognition gtts

In [None]:
# Importing necessary modules required
import speech_recognition as spr
from googletrans import Translator

get_sentence = "अरे मैंने कहने का वो मतलब नहीं था, वो जो पास में नवजीवन बुड़ा आश्रम है न, वहाँ पे बुड़ों की सेवा करने के लिए लोगों की जरूरत पड़ती है, खास कर छोरियों की" 
#
translator = Translator()

# Source and target languages
from_lang = 'hi'
to_lang = 'gu'



# Translate the text
text_to_translate = translator.translate(get_sentence, src=from_lang, dest=to_lang)
translated_text = text_to_translate.text
print(translated_text)
                
      

In [None]:
pip install google-cloud-translate pysrt


In [None]:
import requests

def translate_text(text, target_language="bhasa", source_language="hi", api_key=" "):
    url = "https://translation.googleapis.com/language/translate/v2"
    params = {
        'q': text,
        'target': target_language,
        'source': source_language,
        'key': api_key
    }

    response = requests.get(url, params=params)
    if response.status_code == 200:
        result = response.json()
        return result['data']['translations'][0]['translatedText']
    else:
        raise Exception(f"Translation failed: {response.text}")

# Example usage:
translated = translate_text("आप कैसे हैं?", "id")  # Hindi to Bahasa Indonesia
print(translated)

In [None]:
from google.cloud import translate_v2 as translate
import os

# Set your service account credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = " "

# Initialize Google Translate client
translate_client = translate.Client()

def translate_text(text, target_language="id"):
    # Translate from Hindi ('hi') to target_language (Bahasa Indonesia = "id")
    result = translate_client.translate(text, target_language=target_language, source_language="hi")
    return result['translatedText']

In [None]:
from openai import OpenAI
client = OpenAI(api_key = "" )


def translate_text_gpt(text, target_language):
    prompt = f"Translate the following Hindi text to {target_language}:\n\n{text}"

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a professional translator."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,
    )

    return response.choices[0].message.content.strip()

# Example usage
text = "अरे मैंने कहने का वो मतलब नहीं था..."
translated = translate_text_gpt(text, "Gujarati")
print(translated)



In [None]:
l  =['gu','mr', 'es']
for i in range(len(l)):
    print(l[i])

In [None]:

# To Print all the languages that google
# translator supports
import googletrans


print(googletrans.LANGUAGES)

In [None]:
import os
import subprocess
import json

def get_video_info(path):
    cmd = [
        "ffprobe", "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=codec_name,width,height",
        "-of", "json", path
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"ffprobe failed on {path}: {result.stderr}")
    
    stream = json.loads(result.stdout)["streams"][0]
    return (stream["codec_name"], stream["width"], stream["height"])


In [None]:
def validate_video_formats(video_paths, base_dir):
    baseline = None
    for path in video_paths:
        full_path = os.path.join(base_dir, path)
        if not os.path.exists(full_path):
            raise FileNotFoundError(f"File not found: {full_path}")
        info = get_video_info(full_path)
        if baseline is None:
            baseline = info
        elif info != baseline:
            raise ValueError(f"Incompatible video format: {path} has {info}, expected {baseline}")


In [None]:
def merge_videos_ffmpeg_fast(video_paths, base_dir, output_path):
    validate_video_formats(video_paths, base_dir)

    concat_file = os.path.join(base_dir, "concat_list.txt")
    with open(concat_file, "w") as f:
        for video in video_paths:
            full_path = os.path.join(base_dir, video)
            f.write(f"file '{full_path}'\n")

    cmd = [
        "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_file,
        "-c", "copy", output_path
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg merge failed: {result.stderr}")

    # os.remove(concat_file)
    print(f"Merged video saved to: {output_path}")


In [None]:
from natsort import natsorted
import os

video_dir = "/path/to/videos"
video_files = os.listdir(video_dir)

# Only include .mp4 files
video_files = [f for f in video_files if f.endswith(".mp4")]

# Apply natural sort
sorted_files = natsorted(video_files)

# Write to input.txt in correct format
with open("input.txt", "w") as f:
    for filename in sorted_files:
        full_path = os.path.join(video_dir, filename)
        f.write(f"file '{full_path}'\n")


In [None]:
video_dir = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/Rishtey/subtitled/Bhasa/"
video_files = sorted([f for f in os.listdir(video_dir) if f.endswith(".mp4")])
output_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/Rishtey/final_merged1.mp4"

merge_videos_ffmpeg_fast(video_files, video_dir, output_path)


In [None]:
import os
import subprocess
import json
from natsort import natsorted  # <-- for natural sorting

def get_video_info(path):
    cmd = [
        "ffprobe", "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=codec_name,width,height",
        "-of", "json", path
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"ffprobe failed on {path}: {result.stderr}")
    
    stream = json.loads(result.stdout)["streams"][0]
    return (stream["codec_name"], stream["width"], stream["height"])

def validate_video_formats(video_paths, base_dir):
    baseline = None
    for path in video_paths:
        full_path = os.path.join(base_dir, path)
        if not os.path.exists(full_path):
            raise FileNotFoundError(f"File not found: {full_path}")
        info = get_video_info(full_path)
        if baseline is None:
            baseline = info
        elif info != baseline:
            raise ValueError(f"Incompatible video format: {path} has {info}, expected {baseline}")

def merge_videos_ffmpeg_fast(video_paths, base_dir, output_path):
    # Natural sort to ensure correct order like part1, part2, ..., part10
    video_paths = natsorted(video_paths)

    # Validate formats before merging
    validate_video_formats(video_paths, base_dir)

    concat_file = os.path.join(base_dir, "concat_list.txt")
    with open(concat_file, "w") as f:
        for video in video_paths:
            full_path = os.path.join(base_dir, video)
            f.write(f"file '{full_path}'\n")

    cmd = [
        "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_file,
        "-c", "copy", output_path
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg merge failed: {result.stderr}")

    # Optionally delete concat list
    # os.remove(concat_file)
    print(f"Merged video saved to: {output_path}")


In [None]:
video_dir = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/Rishtey/subtitled/Bhasa/"
video_files = sorted([f for f in os.listdir(video_dir) if f.endswith(".mp4")])
output_path = "/home/csc/Documents/Multilingual-Transcriber/shared_data/movieslist/Rishtey/final_merged2.mp4"

merge_videos_ffmpeg_fast(video_files, video_dir, output_path)

In [None]:
import os
import subprocess
import ffmpeg

def synchronize_and_embed_subtitles(video_path, subtitle_path, output_path):
    """
    Synchronize and embed subtitles with the video.
    """
    synced_subtitle_path = "synced_" + os.path.basename(subtitle_path)

    # Step 1: Sync using ffsubsync
    subprocess.run([
        "ffsubsync", video_path,
        "-i", subtitle_path,
        "-o", synced_subtitle_path
    ], check=True)

    # Step 2: Embed subtitle with ffmpeg (no re-encoding)
    subprocess.run([
        "ffmpeg", "-y", "-i", video_path,
        "-vf", f"subtitles={synced_subtitle_path}",
        "-c:a", "copy",  # avoid re-encoding audio
        output_path
    ], check=True)

    os.remove(synced_subtitle_path)
    print(f"Subtitled video saved to {output_path}")


In [None]:
import json
import time
import pathlib
import csv
import itertools
from typing import Iterator, Tuple
import pandas as pd
from openai import OpenAI

client = OpenAI()  # auth handled via env or config

# ──────────────────────────────
def parse_srt_stream(path: str | pathlib.Path) -> Iterator[Tuple[int, str]]:
    with open(path, encoding="utf-8", errors="ignore") as f:
        idx, lines = None, []
        for line in f:
            line = line.strip()
            if line == "":
                if idx is not None:
                    yield idx, " ".join(lines)
                idx, lines = None, []
                continue

            if idx is None and line.isdigit():
                idx = int(line)
            elif "-->" in line:
                continue  # ignore timecodes
            else:
                lines.append(line)
        if idx is not None:
            yield idx, " ".join(lines)

# ──────────────────────────────
def llm_similarity(text1: str, text2: str, retries=3) -> float:
    if not text1.strip() or not text2.strip():
        return 0.0  # empty line = no similarity

    prompt = f"""
You are a function that compares two texts for contextual similarity.
Return only JSON with:
  "score" (0-1 float) and "explanation" (1 sentence).
Text A: {text1}
Text B: {text2}
"""

    for attempt in range(retries):
        try:
            res = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0,
            ).choices[0].message.content.strip()

            if res.startswith("```"):
                res = "\n".join(l for l in res.strip("`").splitlines()
                                if not l.lstrip().startswith("json"))

            score = json.loads(res).get("score", 0.0)
            return float(score)
        except Exception as e:
            print(f"[retry {attempt+1}] LLM error: {e}")
            time.sleep(1)
    return 0.0

# ──────────────────────────────
def validate_pair_streamed(src_file: str, tgt_file: str, out_csv: str):
    with open(out_csv, "w", encoding="utf-8", newline="") as f_out:
        writer = csv.writer(f_out)
        writer.writerow(["file_src", "file_tgt", "index", "src_text", "tgt_text", "similarity"])

        src_iter = parse_srt_stream(src_file)
        tgt_iter = parse_srt_stream(tgt_file)

        for (i1, t1), (i2, t2) in itertools.zip_longest(src_iter, tgt_iter, fillvalue=(None, "")):
            score = llm_similarity(t1, t2)
            writer.writerow([
                pathlib.Path(src_file).name,
                pathlib.Path(tgt_file).name,
                i1 if i1 is not None else i2,
                t1,
                t2,
                round(score, 4)
            ])

# ──────────────────────────────
def validate_batch_streamed(pairs: list[Tuple[str, str]], output_dir="output_csvs"):
    pathlib.Path(output_dir).mkdir(exist_ok=True)

    for src, tgt in pairs:
        out_file = pathlib.Path(output_dir) / f"{pathlib.Path(src).stem}__vs__{pathlib.Path(tgt).stem}.csv"
        print(f"▶ Processing: {src} vs {tgt} → {out_file.name}")
        validate_pair_streamed(src, tgt, out_file)



In [None]:
import time
import torch
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor
from core.logging import SingletonLogger, log_exceptions
from config.settings import get_settings
from models import format_time, translate_text_openai, translate_text_google, wrap_text
from utils.language_const import LANGUAGES


class Transcribe(ABC):
    @abstractmethod
    def AudioTranscriptiontoFile(self, model, inputpath: str, languagestoconvert: list, outputfolder: str, outputpath: str, *args, **kwargs):
        pass


class AudioTranscriptor(Transcribe):
    def __init__(self):
        self.logger = SingletonLogger.getInstance().logger
        self.settings = get_settings()

    @log_exceptions("Failed during audio transcription")
    def AudioTranscriptiontoFile(self, model, inputpath: str, languagestoconvert: list, outputfolder: str, outputpath: str, *args, **kwargs):
        self.logger.info("Starting base transcription")
        segments, info = model.transcribe(audio=inputpath, language='hi', beam_size=5)
        self.logger.info(f"Detected language '{info.language}' with probability {info.language_probability:.2f}")

        segments_L = list(segments)
        basepath = f"{outputfolder}Base/{outputpath}"

        with open(basepath, "w", encoding="utf-8") as f:
            index = 1
            for segment in segments_L:
                wrapped_lines = wrap_text(segment.text.strip(), max_words=15)
                total_lines = len(wrapped_lines) or 1
                segment_duration = segment.end - segment.start
                duration_per_line = segment_duration / total_lines

                for i, line in enumerate(wrapped_lines):
                    line_start = segment.start + i * duration_per_line
                    line_end = line_start + duration_per_line
                    f.write(f"{index}\n{format_time(line_start)} --> {format_time(line_end)}\n{line}\n\n")
                    index += 1

        self.logger.info(f"Base SRT file saved at {basepath}")
        time.sleep(3)

        def process_language(to_lang):
            lang_code = LANGUAGES[to_lang]
            subfolder = {
                'ms': "Malay", 'id': "Bhasa", 'bho': "Bhojpuri", 'gu': "Gujarati",
                'mr': "Marathi", 'kn': "Kannada", 'ml': "Malayalam",
                'ta': "Tamil", 'es': "Spanish"
            }.get(lang_code, "Translated")

            basepath_lan = f"{outputfolder}{subfolder}/{outputpath}"

            with open(basepath_lan, "w", encoding="utf-8") as fp:
                index = 1
                for segment in segments_L:
                    text = segment.text.strip()
                    translated = (
                        translate_text_google(text, lang_code)
                        if lang_code in ("bho", "id")
                        else translate_text_openai(text, lang_code)
                    )
                    wrapped_lines = wrap_text(translated, max_words=15)
                    line_count = len(wrapped_lines) or 1
                    seg_duration = segment.end - segment.start
                    slice_len = seg_duration / line_count

                    for i, line in enumerate(wrapped_lines):
                        line_start = segment.start + i * slice_len
                        line_end = line_start + slice_len
                        fp.write(f"{index}\n{format_time(line_start)} --> {format_time(line_end)}\n{line}\n\n")
                        index += 1

            self.logger.info(f"SRT for {to_lang} saved at {basepath_lan}")
            time.sleep(2)

        # Run translation in threads
        with ThreadPoolExecutor(max_workers=len(languagestoconvert)) as executor:
            executor.map(process_language, languagestoconvert)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            self.logger.info("CUDA memory cache cleared")
