In [None]:
import pandas as pd
from transformers import pipeline
import torch
import os
from tqdm import tqdm
from google.colab import files  # ✅ For downloading

# Set device
device = 0 if torch.cuda.is_available() else -1

# Load translation pipeline
translator = pipeline(
    "translation",
    model="facebook/m2m100_418M",
    device=device,
    batch_size=16
)
translator.model.config.forced_bos_token_id = translator.tokenizer.get_lang_id("ur")

def batch_translate_texts(sentences):
    translations = translator(sentences, src_lang="en", tgt_lang="ur")
    return [item['translation_text'] for item in translations]

def translate_file(file_path):
    df = pd.read_excel(file_path)

    if 'English Sentence' not in df.columns:
        print(f"❌ Skipping {file_path}: 'English Sentence' column not found.")
        return

    english_sentences = df['English Sentence'].fillna("").tolist()

    # Break into batches
    batch_size = 16
    urdu_transcripts = []
    for i in tqdm(range(0, len(english_sentences), batch_size), desc=f"Translating {file_path}"):
        batch = english_sentences[i:i+batch_size]
        urdu_transcripts.extend(batch_translate_texts(batch))

    # Save results
    df['Urdu Sentence (facebook/m2m100_418M)'] = urdu_transcripts

    local_output_path = os.path.splitext(os.path.basename(file_path))[0] + '-translated.xlsx'
    df.to_excel(local_output_path, index=False)
    print(f"✅ Saved translated file locally: {local_output_path}")

    # ✅ Download from Colab
    files.download(local_output_path)

if _name_ == "_main_":
    # Upload the file manually in Colab, e.g., 'drama.xlsx'
    files_to_translate = ['umait_dataset.xlsx']

    for file in files_to_translate:
        if os.path.exists(file):
            translate_file(file)
        else:
            print(f"⚠️ File not found: {file}")

    print("🎉 Translation and download completed!")