In [2]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from collections import defaultdict
import string

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
# Download the 'punkt_tab' data package
nltk.download('punkt_tab') # This line is added to download the necessary tokenizer data

def summarize_text(text, num_sentences=3):
    # Tokenize text into sentences
    sentences = sent_tokenize(text)

    # Tokenize text into words and remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    words = [word for word in words if word not in stop_words and word not in string.punctuation]

    # Calculate word frequencies
    word_frequencies = FreqDist(words)

    # Normalize word frequencies
    max_frequency = max(word_frequencies.values())
    for word in word_frequencies:
        word_frequencies[word] /= max_frequency

    # Score each sentence based on word frequencies
    sentence_scores = defaultdict(int)
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_frequencies:
                sentence_scores[sentence] += word_frequencies[word]

    # Sort sentences by their scores
    ranked_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)

    # Select top-ranked sentences
    summary = " ".join(ranked_sentences[:num_sentences])
    return summary

# Example usage
text = """
Natural Language Processing (NLP) is a subfield of artificial intelligence that deals with the interaction between computers and humans using natural language. The ultimate objective of NLP is to read, decipher, understand, and make sense of human language in a valuable way.
By utilizing NLP, computers can process large amounts of natural language data. Applications of NLP are wide-ranging, including machine translation, chatbots, and text summarization.
"""
print("Original Text:")
print(text)
print("\nSummarized Text:")
print(summarize_text(text, num_sentences=2))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Original Text:

Natural Language Processing (NLP) is a subfield of artificial intelligence that deals with the interaction between computers and humans using natural language. The ultimate objective of NLP is to read, decipher, understand, and make sense of human language in a valuable way.
By utilizing NLP, computers can process large amounts of natural language data. Applications of NLP are wide-ranging, including machine translation, chatbots, and text summarization.


Summarized Text:

Natural Language Processing (NLP) is a subfield of artificial intelligence that deals with the interaction between computers and humans using natural language. The ultimate objective of NLP is to read, decipher, understand, and make sense of human language in a valuable way.
