# Intelligent Document Summarizer
This notebook demonstrates extractive and abstractive summarization on business documents while preserving diverse viewpoints.

## Setup
Install dependencies from requirements.txt and import packages.

In [None]:
!pip install -r requirements.txt

## Load Sample Data

In [None]:
import glob
from pathlib import Path
files = sorted(glob.glob('sample_data/*.txt'))
docs = {Path(f).stem: open(f).read() for f in files}
docs

## Extractive Summarization
We use the Sumy LexRank algorithm with basic viewpoint preservation.

In [None]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

def extractive_summary(text, sentences=2, preserve_viewpoints=True):
    parser = PlaintextParser.from_string(text, Tokenizer('english'))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, sentences)
    sent_text = [str(s) for s in summary]
    if preserve_viewpoints:
        viewpoint_sents = [s for s in text.split('
') if any(k in s.lower() for k in ['however', 'but', 'concern', 'question'])]
        if viewpoint_sents and not any(v in sent_text for v in viewpoint_sents):
            sent_text.append(viewpoint_sents[0])
    return ' '.join(sent_text)

for name, text in docs.items():
    print(name, ':', extractive_summary(text))

## Abstractive Summarization
Using a Transformer model with length control.

In [None]:
from transformers import pipeline
model_name = 'sshleifer/distilbart-cnn-12-6'
summarizer = pipeline('summarization', model=model_name)

def abstractive_summary(text, max_length=60, min_length=20):
    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
    return summary

for name, text in docs.items():
    print(name, ':', abstractive_summary(text))

## Evaluation Metrics
We calculate ROUGE and a simple viewpoint coverage metric.

In [None]:
from rouge_score import rouge_scorer

def viewpoint_coverage(original, summary):
    viewpoints = [s for s in original.split('
') if any(k in s.lower() for k in ['however', 'but', 'concern', 'question'])]
    if not viewpoints:
        return 1.0
    covered = sum(1 for v in viewpoints if v.strip() in summary)
    return covered / len(viewpoints)

def evaluate(original, summary):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = scorer.score(original, summary)
    coverage = viewpoint_coverage(original, summary)
    return {**scores, 'viewpoint_coverage': coverage}

for name, text in docs.items():
    summ = abstractive_summary(text)
    print(name, evaluate(text, summ))

## Length-Controlled Summaries for Different Audiences

In [None]:
audiences = {'executive': 30, 'detailed': 60}
for audience, length in audiences.items():
    print(f'\nAudience: {audience}')
    for name, text in docs.items():
        print(name, ':', abstractive_summary(text, max_length=length))

## Conclusion
This notebook demonstrated extractive and abstractive summarization with viewpoint preservation.