#  Cell 1: Install Required Libraries

In [2]:
from transformers import pipeline, AutoTokenizer
import torch

# Cell 2 :  Load Whisper ASR Model

In [None]:
# Initialize the ASR pipeline with the Whisper model
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")

# Cell 3 : Transcribe Audio to Text (test.wav → transcription.txt)

In [None]:
# Process the audio file and get the transcription
result = pipe("test.wav", return_timestamps=True)

# Extract the text from the result
text = result['text']

# Save the text to a file
with open("transcription.txt", "w") as file:
    file.write(text)

print("Transcription saved as transcription.txt")

# Cell 4 : Define Chunked Summarization Function

In [None]:
def summarize_large_transcript(input_file, output_file="call_summary.txt"):
    """Ultra-robust large transcript summarization with complete error handling"""
    try:
        # 1. MODEL SETUP
        model_name = "facebook/bart-large-cnn"
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        summarizer = pipeline(
            task="summarization",
            model=model_name,
            tokenizer=tokenizer,
            device=0 if torch.cuda.is_available() else -1,
            framework="pt"
        )

        # 2. INPUT HANDLING
        with open(input_file, 'r', encoding='utf-8') as f:
            transcript = f.read().strip()

        if not transcript:
            raise ValueError("Empty transcript file")

        # 3. SMART CHUNKING SYSTEM
        def create_chunks(text, max_tokens=768, overlap=64):
            """Create chunks with token count and context overlap"""
            tokens = tokenizer.encode(text, return_tensors="pt")[0]
            chunks = []
            for i in range(0, len(tokens), max_tokens - overlap):
                chunk_tokens = tokens[i:i + max_tokens]
                chunks.append(tokenizer.decode(chunk_tokens, skip_special_tokens=True))
            return chunks

        chunks = create_chunks(transcript)
        if not chunks:
            raise ValueError("Failed to create text chunks")

        # 4. CHUNK PROCESSING WITH AUTO-ADJUSTMENT
        successful_summaries = []
        for i, chunk in enumerate(chunks, 1):
            try:
                chunk_word_count = len(chunk.split())

                # Dynamic length calculation
                max_len = min(128, max(30, chunk_word_count // 3))
                min_len = max(15, max_len // 2)

                # Safe summary generation
                summary = summarizer(
                    chunk,
                    max_length=max_len,
                    min_length=min_len,
                    do_sample=False,
                    truncation=True,
                    no_repeat_ngram_size=2
                )

                if summary and isinstance(summary, list) and 'summary_text' in summary[0]:
                    successful_summaries.append(summary[0]['summary_text'])
                    print(f"✓ Chunk {i}/{len(chunks)} processed ({len(summary[0]['summary_text'].split())} words)")
                else:
                    print(f"⚠️ Chunk {i} produced empty summary")

            except Exception as e:
                print(f"✕ Chunk {i} failed: {str(e)}")
                continue

        # 5. FINAL SUMMARY GENERATION
        if not successful_summaries:
            raise RuntimeError("No chunks processed successfully")

        combined = ' '.join(successful_summaries)
        final_word_count = len(combined.split())

        # Adjust final summary length based on content
        final_max = min(150, max(50, final_word_count // 2))
        final_min = min(30, final_max // 2)

        final_summary = summarizer(
            combined,
            max_length=final_max,
            min_length=final_min,
            truncation=True
        )[0]['summary_text']

        # 6. OUTPUT VALIDATION
        if len(final_summary.split()) < 20:  # Fallback if too short
            final_summary = successful_summaries[0] if successful_summaries else "No meaningful summary generated"

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(final_summary)

        print(f"\n✔ Success! Summary ({len(final_summary.split())} words) saved to {output_file}")
        return final_summary

    except Exception as e:
        print(f"🔥 Critical failure: {str(e)}")
        return None



# Cell 5 : Run Summarizer (transcription.txt → call_summary.txt)

In [None]:
if __name__ == "__main__":
    print("🚀 Starting advanced summarization...")
    summary = summarize_large_transcript("transcription.txt")
    if summary:
        print("\n🌈 FINAL SUMMARY:\n")
        print(summary)
    else:
        print("❌ Summarization failed. Please check your input file.")