In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [token.lower() for token in tokens]
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    preprocessed_text = ' '.join(filtered_tokens)
    return preprocessed_text

def generate_summary(text, num_sentences=2):
    preprocessed_text = preprocess_text(text)
    
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([preprocessed_text, text])
    
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
    ranked_sentences = sorted(range(len(similarity_matrix[0])), key=lambda x: similarity_matrix[0][x], reverse=True)
    summary = ' '.join([nltk.sent_tokenize(text)[i] for i in ranked_sentences[:num_sentences]])
    return summary


user_text = input("Enter the text to be summarized:\n")

summary = generate_summary(user_text)
print("Summary:")
print(summary)


[nltk_data] Downloading package punkt to C:\Users\Chaitanya
[nltk_data]     Sivamani\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Chaitanya
[nltk_data]     Sivamani\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Enter the text to be summarized:
Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence  concerned with the interactions between computers and human language, in particular how to program computers  to process and analyze large amounts of natural language data. Challenges in natural language processing  frequently involve speech recognition, natural language understanding, and natural language generation.  Tokenization is the process of breaking text into words, phrases, symbols, or other meaningful elements called tokens.  The list of tokens becomes input for further processing, such as parsing or text mining. Tokenization is useful both  in linguistics (where it is a form of text segmentation) and in computer science, where it forms part of lexical analysis. 
Summary:
Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence  concerned with the interactions between computers and