In [26]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.probability import FreqDist
import string
import matplotlib.pyplot as plt
import sklearn.feature_extraction
from sklearn.feature_extraction.text import TfidfVectorizer
import rouge
from rouge import Rouge

In [11]:
text='''Edge on systems reveal the properties of disk galaxies as a function of height, z, above the plane. Four local edge-on galaxies, that are close enough to have been resolved into stars by the Hubble Space Telescope, show thick disks, composed of a red stellar population, which is old and relatively metal rich. Color gradients, ∆(V-I)/∆z, are zero or slightly positive. Favored models may have an explicit thick disk formation phase.
'''
text=text.lower()
text = ' '.join([word for word in text.split() if word not in (stopwords.words('english'))])

In [12]:
text=text.lower()
text = ' '.join([word for word in text.split() if word not in (stopwords.words('english'))])
sents=sent_tokenize(text)
# for i in range(len(sents)):
#     translator=str.maketrans('','',string.punctuation)
#     sents[i]=sents[i].translate(translator)
    
sents

['edge systems reveal properties disk galaxies function height, z, plane.',
 'four local edge-on galaxies, close enough resolved stars hubble space telescope, show thick disks, composed red stellar population, old relatively metal rich.',
 'color gradients, ∆(v-i)/∆z, zero slightly positive.',
 'favored models may explicit thick disk formation phase.']

In [13]:
def calculate_tfidf_scores(texts):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(texts)
    feature_names = vectorizer.get_feature_names_out()
    tfidf_scores = tfidf_matrix.toarray()
    return feature_names, tfidf_scores

def sentence_scoring(text):
    sentences = sent_tokenize(text)
    word_scores = {}

    # Compute TF-IDF scores for words
    feature_names, tfidf_scores = calculate_tfidf_scores([text])
    for word, score in zip(feature_names, tfidf_scores[0]):
        word_scores[word] = score

    sentence_scores = {}

    # Compute scores for each sentence
    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        score = sum(word_scores.get(word, 0) for word in words) / len(words)
        sentence_scores[sentence]=score

    # Sort sentences based on scores in descending order
#     sentence_scores.sort(key=lambda x: x[1], reverse=True)

    return sentence_scores

#function to find key from dictvalue
def findkey(dictionary, value):
    for key, val in dictionary.items():
        if val == value:
            return key
    return None  # Value not found

In [14]:
sentence_scoring(text)

{'edge systems reveal properties disk galaxies function height, z, plane.': 0.12679436672186678,
 'four local edge-on galaxies, close enough resolved stars hubble space telescope, show thick disks, composed red stellar population, old relatively metal rich.': 0.11701085077110553,
 'color gradients, ∆(v-i)/∆z, zero slightly positive.': 0.05283098613411116,
 'favored models may explicit thick disk formation phase.': 0.1526228488318767}

In [15]:
freqdict=sentence_scoring(text)

In [16]:
scoreset=[]
for sent in sents:
    scoreset.append(freqdict[sent])
scoreset=sorted(scoreset,reverse=True)
print(scoreset)

[0.1526228488318767, 0.12679436672186678, 0.11701085077110553, 0.05283098613411116]


In [17]:
summaryset=[]
for i in scoreset:
    summaryset.append(findkey(freqdict,i))
summaryfinal=' '.join(summaryset)
print(summaryfinal)

favored models may explicit thick disk formation phase. edge systems reveal properties disk galaxies function height, z, plane. four local edge-on galaxies, close enough resolved stars hubble space telescope, show thick disks, composed red stellar population, old relatively metal rich. color gradients, ∆(v-i)/∆z, zero slightly positive.


In [21]:
type(text)

str

In [27]:
rouge=Rouge()

In [28]:
scores=rouge.get_scores(text,summaryfinal)

In [29]:
print(scores)

[{'rouge-1': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}, 'rouge-2': {'r': 0.9777777777777777, 'p': 0.9777777777777777, 'f': 0.9777777727777778}, 'rouge-l': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}}]
