In [1]:
!pip install nltk numpy networkx scikit-learn streamlit rouge


Collecting streamlit
  Downloading streamlit-1.47.0-py3-none-any.whl.metadata (9.0 kB)
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m362.9 kB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rouge-1.0.1-py3-none-any.whl (13 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)


In [2]:
import nltk
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [11]:
text = """
Mahendra Singh Dhoni ([məˈheːndrə ˈsɪŋɡʱ ˈdʱoːniː] ⓘ; born 7 July 1981) is an Indian professional cricketer who plays as a right-handed batter and a wicket-keeper. Widely regarded as one of the most prolific wicket-keeper batsmen and captains and one of the greatest ODI batsmen ever, he represented the Indian cricket team and was the captain of the side in limited overs formats from 2007 to 2017 and in test cricket from 2008 to 2014. Dhoni has captained the most international matches and is the most successful Indian captain. He has led India to victory in the 2007 ICC World Twenty20, the 2011 Cricket World Cup, and the 2013 ICC Champions Trophy, being the only captain to win three different limited overs ICC tournaments. He also led the teams that won the Asia Cup in 2010, 2016 and was a member of the title winning squad in 2018.

Born in Ranchi, Dhoni made his first class debut for Bihar in 1999. He made his debut for the Indian cricket team on 23 December 2004 in an ODI against Bangladesh and played his first test a year later against Sri Lanka. In 2007, he became the captain of the ODI side before taking over in all formats by 2008. Dhoni retired from test cricket in 2014 but continued playing in limited overs cricket till 2019. He has scored 17,266 runs in international cricket including 10,000 plus runs at an average of more than 50 in ODIs.

In the Indian Premier League (IPL), Dhoni plays for Chennai Super Kings (CSK), leading them to the final on ten occasions and winning it five times (2010, 2011, 2018, 2021 and 2023 ) jointly sharing this title with Rohit Sharma . He has also led CSK to two Champions League T20 titles in 2010 and 2014. Dhoni is among the few batsmen to have scored more than five thousand runs in the IPL, as well as being the first wicket-keeper to do so.

In 2008, Dhoni was awarded India's highest sport honour Major Dhyan Chand Khel Ratna Award by Government of India. He received the fourth highest civilian award Padma Shri in 2009 and third highest civilian award Padma Bhushan in 2018. Dhoni holds an honorary rank of Lieutenant colonel in the Parachute Regiment of the Indian Territorial Army which was presented to him by the Indian Army in 2011. In June 2025, he was inducted into ICC Cricket Hall of Fame
"""

In [12]:
def preprocess(text):
    text = re.sub(r'\s+', ' ', text)
    sentences = nltk.sent_tokenize(text)
    clean_sentences = []
    for sent in sentences:
        sent = re.sub(r'[^a-zA-Z]', ' ', sent)
        sent = sent.lower()
        sent = ' '.join([word for word in sent.split() if word not in stopwords.words('english')])
        clean_sentences.append(sent)
    return sentences, clean_sentences

nltk.download('punkt_tab') # Download the missing resource
original_sentences, clean_sentences = preprocess(text)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [13]:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(clean_sentences)
similarity_matrix = cosine_similarity(vectors)


In [16]:

nx_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(nx_graph)


In [17]:
ranked_sentences = sorted(((score, original_sentences[i]) for i, score in scores.items()), reverse=True)


In [18]:
summary = ' '.join([ranked_sentences[i][1] for i in range(2)])
print("SUMMARY:\n")
print(summary)

SUMMARY:

Widely regarded as one of the most prolific wicket-keeper batsmen and captains and one of the greatest ODI batsmen ever, he represented the Indian cricket team and was the captain of the side in limited overs formats from 2007 to 2017 and in test cricket from 2008 to 2014. Dhoni is among the few batsmen to have scored more than five thousand runs in the IPL, as well as being the first wicket-keeper to do so.


In [19]:
from rouge import Rouge
rouge = Rouge()
scores = rouge.get_scores(summary, text)
print("\nROUGE Scores:\n", scores)


ROUGE Scores:
 [{'rouge-1': {'r': 0.23394495412844038, 'p': 1.0, 'f': 0.379182153060903}, 'rouge-2': {'r': 0.19618528610354224, 'p': 1.0, 'f': 0.32801822049242174}, 'rouge-l': {'r': 0.23394495412844038, 'p': 1.0, 'f': 0.379182153060903}}]


In [3]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to C:\Users\Arunava
[nltk_data]     Chakraborty\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Arunava
[nltk_data]     Chakraborty\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True