In [11]:
pip install nltk scikit-learn textstat requests beautifulsoup4 numpy

Collecting textstat
  Downloading textstat-0.7.7-py3-none-any.whl.metadata (15 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Collecting cmudict (from textstat)
  Downloading cmudict-1.0.33-py3-none-any.whl.metadata (3.6 kB)
Downloading textstat-0.7.7-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hDownloading cmudict-1.0.33-py3-none-any.whl (939 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m939.4/939.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading pyphen-0.17.2-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pyphen, cmudict, textstat
Successfully installed cmudict-1.0.33 pyphen-0.17.2 textstat-0.7.7
Note: you 

In [20]:
import re
import nltk
from collections import Counter
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import heapq

# Download required resources
nltk.download('punkt')
nltk.download('stopwords')

def clean_text(text):
    """Clean and preprocess the text"""
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^a-zA-Z0-9\s\.]', '', text)
    return text.strip()

def calculate_word_frequency(sentences, stop_words):
    """Calculate normalized word frequency"""
    word_freq = Counter()
    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        for word in words:
            if word.isalpha() and word not in stop_words:
                word_freq[word] += 1
    max_freq = max(word_freq.values(), default=1)
    for word in word_freq:
        word_freq[word] /= max_freq
    return word_freq

def score_sentences(sentences, word_freq):
    """Assign a score to each sentence"""
    scores = {}
    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        score = sum(word_freq.get(word, 0) for word in words if word.isalpha())
        if len(words) > 0:
            scores[sentence] = score / len(words)
    return scores

def summarize(text, num_sentences=3):
    """Return summary with top N scored sentences"""
    text = clean_text(text)
    sentences = sent_tokenize(text)
    if len(sentences) <= num_sentences:
        return text
    stop_words = set(stopwords.words('english'))
    word_freq = calculate_word_frequency(sentences, stop_words)
    sentence_scores = score_sentences(sentences, word_freq)
    summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    return ' '.join([s for s in sentences if s in summary_sentences])

# === Example usage ===
if __name__ == "__main__":
    print("=== TEXT SUMMARIZER ===")
    print("Paste your article below. Press Enter twice to summarize.\n")

    lines = []
    while True:
        line = input()
        if line.strip() == "" and lines:
            break
        lines.append(line)
    input_text = "\n".join(lines)

    result = summarize(input_text, num_sentences=3)
    print("\n=== SUMMARY ===")
    print(result)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/sanjayjangid/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sanjayjangid/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


=== TEXT SUMMARIZER ===
Paste your article below. Press Enter twice to summarize.


=== SUMMARY ===
At each grid point the model predicts five Earthsurface variables  including temperature wind speed and direction and mean sealevel pressure  and six atmospheric variables at each of 37 levels of altitude including specific humidity wind speed and direction and temperature. While GraphCasts training was computationally intensive the resulting forecasting model is highly efficient. In a comprehensive performance evaluation against the goldstandard deterministic system HRES GraphCast provided more accurate predictions on more than 90 of 1380 test variables and forecast lead times see our Science paper for details.
