In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()
from transformers import MBartForConditionalGeneration, AutoTokenizer,MBart50Tokenizer,MBart50TokenizerFast,AutoModelForSeq2SeqLM

In [None]:
# âœ… Your Custom Summary Function
def generate_summary(text, model, tokenizer, device):
    model.to(device)

    # Tokenize input text
    inputs = tokenizer(text, max_length=512, truncation=True, return_tensors="pt", padding="max_length").to(device)

    # Generate summary
    summary_ids = model.generate(
        inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_length=200,
        min_length=100,
        length_penalty=1.0,
        num_beams=4,
        no_repeat_ngram_size=4,
        early_stopping=True,
        pad_token_id=tokenizer.pad_token_id,
        do_sample=False
    )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


In [None]:
model_path = "/content/drive/My Drive/Thesis_Dataset/fine_tuned_bangla_t5"

# Load tokenizer and model correctly
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load transcribed text from Excel
input_excel = "/content/drive/MyDrive/Thesis_Transcription/Text_Output/chunk_transcripts.xlsx"
df = pd.read_excel(input_excel)

# Apply your summary function to each row
print("ðŸ“„ Generating summaries for all transcribed chunks...")
df['Summary'] = df['Text'].progress_apply(lambda x: generate_summary(str(x), model, tokenizer, device))

# Save to new Excel
output_excel_path = "/content/drive/MyDrive/Thesis_Transcription/Text_Output/text_with_summaries_banglat5.xlsx"
df.to_excel(output_excel_path, index=False)

print(f"âœ… Excel with summaries saved: {output_excel_path}")

# Save all summaries as one merged text
merged_summary_text = "\n".join(df['Summary'].dropna().astype(str).tolist())
summary_txt_path = "/content/drive/MyDrive/Thesis_Transcription/Text_Output/merged_summary_banglat5_without_regex.txt"
with open(summary_txt_path, "w", encoding="utf-8") as f:
    f.write(merged_summary_text)

