<a href="https://colab.research.google.com/github/basavakruti/NLP-exps/blob/main/bkm_text_summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from collections import Counter
import string

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
def summarize_text(text, summary_ratio=0.3):
    """
    Summarizes the input text using extractive summarization.

    Parameters:
        text (str): Input text to summarize
        summary_ratio (float): Ratio of sentences to include in summary (default=0.3)

    Returns:
        str: Summarized text
    """
    sentences = sent_tokenize(text)
    if len(sentences) == 0:
        return "No sentences found in the input text."

    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text.lower())
    words = [word for word in words if word not in stop_words and word not in string.punctuation]

    word_frequencies = Counter(words)

    max_frequency = max(word_frequencies.values(), default=1)
    for word in word_frequencies:
        word_frequencies[word] /= max_frequency

    sentence_scores = {}
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_frequencies:
                sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_frequencies[word]

        num_sentences = max(1, int(len(sentences) * summary_ratio))
    summarized_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:num_sentences]

    return " ".join(summarized_sentences)

In [None]:
if __name__ == "__main__":
    text_example = """
    Natural Language Processing (NLP) helps computers understand human language.
    Basavakruti uses NLP to summarize text automatically.
    With NLP, tasks like translation and sentiment analysis become easier.
    """
    summary = summarize_text(text_example, summary_ratio=0.5)  # summarizing 50% of sentences
    print("Original Text:\n")
    print(text_example)

    print("\nSummarized Text:\n")
    print(summary)

Original Text:


    Natural Language Processing (NLP) helps computers understand human language. 
    Basavakruti uses NLP to summarize text automatically. 
    With NLP, tasks like translation and sentiment analysis become easier. 
    

Summarized Text:


    Natural Language Processing (NLP) helps computers understand human language.
