In [1]:
# --------------------------------------------
# Frequency-Based Extractive Summarization
# --------------------------------------------

import spacy
import string
import heapq

# Step 2: Load SpaCy English model
nlp = spacy.load('en_core_web_sm')

# Step 3: Input Text
text = """
Artificial Intelligence (AI) is revolutionizing industries across the globe. 
Healthcare has benefited from AI-powered diagnosis tools, robotic surgeries, and patient monitoring systems.
Financial institutions utilize AI for fraud detection, algorithmic trading, and customer service chatbots. 
The transportation sector is undergoing changes with autonomous vehicles and intelligent traffic management. 
Despite its advantages, AI also presents ethical dilemmas like data privacy issues, algorithmic bias, and job automation concerns.
Research organizations and governments are working together to create guidelines for the responsible development of AI technologies.
Future innovations are expected to make AI an even more integral part of daily life, impacting education, entertainment, and personalized services.
"""

# Step 4: Preprocessing
doc = nlp(text)

# Extract sentences
sentences = [sent.text.strip() for sent in doc.sents]

print("\n🔵 Sentences:")
for idx, sent in enumerate(sentences):
    print(f"{idx+1}: {sent}")

# Step 5: Word Frequency Calculation

word_freq = {}

# Clean and count non-stopword tokens
for token in doc:
    if token.text.lower() not in nlp.Defaults.stop_words and token.text.lower() not in string.punctuation:
        if token.text.lower() not in word_freq.keys():
            word_freq[token.text.lower()] = 1
        else:
            word_freq[token.text.lower()] += 1

print("\n🔵 Word Frequencies:")
for word, freq in word_freq.items():
    print(f"{word}: {freq}")

# Normalize Frequencies
max_freq = max(word_freq.values())
for word in word_freq.keys():
    word_freq[word] = word_freq[word] / max_freq

print("\n🔵 Normalized Word Frequencies:")
for word, freq in word_freq.items():
    print(f"{word}: {freq:.4f}")

# Step 6: Sentence Scoring
sentence_scores = {}

for sent in sentences:
    sent_doc = nlp(sent.lower())
    for token in sent_doc:
        if token.text in word_freq.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_freq[token.text]
            else:
                sentence_scores[sent] += word_freq[token.text]

print("\n🔵 Sentence Scores:")
for sent, score in sentence_scores.items():
    print(f"\nSentence: {sent}\nScore: {score:.4f}")

# Step 7: Select Top Sentences
summary_sentences = heapq.nlargest(3, sentence_scores, key=sentence_scores.get)

print("\n🔵 Selected Top Sentences for Summary:")
for idx, sent in enumerate(summary_sentences):
    print(f"{idx+1}: {sent}")

# Step 8: Final Summary
summary = ' '.join(summary_sentences)

print("\n🔵 Final Extracted Summary:")
print(summary)



🔵 Sentences:
1: Artificial Intelligence (AI) is revolutionizing industries across the globe.
2: Healthcare has benefited from AI-powered diagnosis tools, robotic surgeries, and patient monitoring systems.
3: Financial institutions utilize AI for fraud detection, algorithmic trading, and customer service chatbots.
4: The transportation sector is undergoing changes with autonomous vehicles and intelligent traffic management.
5: Despite its advantages, AI also presents ethical dilemmas like data privacy issues, algorithmic bias, and job automation concerns.
6: Research organizations and governments are working together to create guidelines for the responsible development of AI technologies.
7: Future innovations are expected to make AI an even more integral part of daily life, impacting education, entertainment, and personalized services.

🔵 Word Frequencies:

: 8
artificial: 1
intelligence: 1
ai: 6
revolutionizing: 1
industries: 1
globe: 1
healthcare: 1
benefited: 1
powered: 1
diagnosis