# Summarizing Key Issues

### Getting topics file

In [16]:
import pandas as pd

df = pd.read_csv("combined_responses.csv")
df.columns = df.columns.str.strip()

grouped_comments = df.groupby("Issue")["Comment"].apply(lambda comments: list(comments.astype(str)))
counts = df["Issue"].value_counts().to_dict()
counts

{'WAT': 1001,
 'GEN': 846,
 'SO': 325,
 'FIN': 263,
 'PER': 231,
 'NEPA': 209,
 'ALT': 165,
 'WET': 149,
 'PD': 148,
 'MERC': 132,
 'O': 132,
 'MEPA': 105,
 'HU': 94,
 'LAN': 92,
 'CUM': 83,
 'GT': 78,
 'AIR': 72,
 'WI': 61,
 'CR': 59,
 'ROD': 39,
 'WILD': 24,
 'VEG': 22,
 'AQ': 21,
 'COE': 18,
 'HAZ': 16,
 'LU': 15,
 'EDIT': 3,
 'N': 3}

### Summarizing topics

In [17]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
import time
import pandas as pd

def summarize_text(text, sentence_count=5):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, sentence_count)
    return ' '.join(str(sentence) for sentence in summary)

def split_comments_into_chunks(comments, max_chars=4096):
    chunks = []
    current_chunk = []
    current_length = 0
    for comment in comments:
        comment = str(comment).strip()
        if not comment:
            continue
        comment_length = len(comment)
        if current_length + comment_length + 1 > max_chars:
            chunks.append(" ".join(current_chunk))
            current_chunk = [comment]
            current_length = comment_length
        else:
            current_chunk.append(comment)
            current_length += comment_length + 1
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

summaries = []

for issue_id, comment_list in grouped_comments.items():
    print(f"Summarizing Issue ID: {issue_id}")
    start_time = time.time()
    try:
        chunks = split_comments_into_chunks(comment_list, max_chars=4096)
        chunk_summaries = []
        for chunk in chunks:
            result = summarize_text(chunk, sentence_count=5)
            chunk_summaries.append(result)
        if len(chunk_summaries) == 1:
            final_summary = chunk_summaries[0]
        else:
            joined_summary = " ".join(chunk_summaries)
            final_summary = summarize_text(joined_summary, sentence_count=5)
    except Exception as e:
        print(f"  Error summarizing {issue_id}: {e}")
        final_summary = "[Summary Failed]"
    end_time = time.time()
    print(f"Finished in {end_time - start_time:.2f} seconds\n")
    summaries.append((issue_id, counts.get(issue_id, 0), final_summary))

summary_df = pd.DataFrame(summaries, columns=["Issue", "Count", "Summary"])
summary_df = summary_df.sort_values(by="Count", ascending=False)
summary_df.to_csv("results/issues_summarized1.csv", index=False)

Summarizing Issue ID: AIR
Finished in 0.40 seconds

Summarizing Issue ID: ALT
Finished in 0.96 seconds

Summarizing Issue ID: AQ
Finished in 0.07 seconds

Summarizing Issue ID: COE
Finished in 0.07 seconds

Summarizing Issue ID: CR
Finished in 0.25 seconds

Summarizing Issue ID: CUM
Finished in 0.34 seconds

Summarizing Issue ID: EDIT
Finished in 0.01 seconds

Summarizing Issue ID: FIN
Finished in 0.92 seconds

Summarizing Issue ID: GEN
Finished in 2.63 seconds

Summarizing Issue ID: GT
Finished in 0.49 seconds

Summarizing Issue ID: HAZ
Finished in 0.06 seconds

Summarizing Issue ID: HU
Finished in 0.26 seconds

Summarizing Issue ID: LAN
Finished in 0.32 seconds

Summarizing Issue ID: LU
Finished in 0.05 seconds

Summarizing Issue ID: MEPA
Finished in 0.26 seconds

Summarizing Issue ID: MERC
Finished in 0.74 seconds

Summarizing Issue ID: N
Finished in 0.01 seconds

Summarizing Issue ID: NEPA
Finished in 1.13 seconds

Summarizing Issue ID: O
Finished in 0.65 seconds

Summarizing Issue