# **TextRank NER Implementation**

Reference Source: https://spacy.io/usage/linguistic-features#named-entities

In [None]:
#Textrank Algorithm
import networkx as nx
import numpy as np
from nltk.tokenize.punkt import PunktSentenceTokenizer
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer

def textrank(document):

    sentence_tokenizer = PunktSentenceTokenizer()
    sentences = sentence_tokenizer.tokenize(document)
 
    bow_matrix = CountVectorizer().fit_transform(sentences)
    normalized = TfidfTransformer().fit_transform(bow_matrix)
 
    similarity_graph = normalized * normalized.T
 
    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
    scores = nx.pagerank(nx_graph)
    ranked = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    temp = dict(ranked)
    select_rank = [a_tuple[0] for a_tuple in ranked] 
    
    if len(ranked) == 1:
      res = [temp.get(t, 0) for t in select_rank[:1]]
      summary = ' '.join([str(elem) for elem in res])
    
    elif len(ranked) <= 5:
      res = [temp.get(t, 0) for t in select_rank[:int(len(select_rank)/2)]]
      summary = ' '.join([str(elem) for elem in res])
    
    else:
      
      res = [temp.get(t, 0) for t in select_rank[:5]]
      summary = ' '.join([str(elem) for elem in res])
       
    return summary

In [None]:
txt = """
After the sound and the fury, weeks of demonstrations and anguished calls for racial justice, the man whose death gave rise to an international movement, and whose last words — “I can’t breathe” — have been a rallying cry, will be laid to rest on Tuesday at a private funeral in Houston.George Floyd, who was 46, will then be buried in a grave next to his mother’s.The service, scheduled to begin at 11 a.m. at the Fountain of Praise church, comes after five days of public memorials in Minneapolis, North Carolina and Houston and two weeks after a Minneapolis police officer was caught on video pressing his knee into Mr. Floyd’s neck for nearly nine minutes before Mr. Floyd died. That officer, Derek Chauvin, has been charged with second-degree murder and second-degree manslaughter. His bail was set at $1.25 million in a court appearance on Monday. The outpouring of anger and outrage after Mr. Floyd’s death — and the speed at which protests spread from tense, chaotic demonstrations in the city where he died to an international movement from Rome to Rio de Janeiro — has reflected the depth of frustration borne of years of watching black people die at the hands of the police or vigilantes while calls for change went unmet.
"""

In [None]:
textrank(txt)

'Floyd’s death — and the speed at which protests spread from tense, chaotic demonstrations in the city where he died to an international movement from Rome to Rio de Janeiro — has reflected the depth of frustration borne of years of watching black people die at the hands of the police or vigilantes while calls for change went unmet. \nAfter the sound and the fury, weeks of demonstrations and anguished calls for racial justice, the man whose death gave rise to an international movement, and whose last words — “I can’t breathe” — have been a rallying cry, will be laid to rest on Tuesday at a private funeral in Houston.George Floyd, who was 46, will then be buried in a grave next to his mother’s.The service, scheduled to begin at 11 a.m. at the Fountain of Praise church, comes after five days of public memorials in Minneapolis, North Carolina and Houston and two weeks after a Minneapolis police officer was caught on video pressing his knee into Mr. The outpouring of anger and outrage afte

# **Applying Spacy for summaries in English**

In [None]:
import spacy

nlp = spacy.load("en")
# POS Tagging
def pos(sent):
    se=nlp(sent)
    for word in se:
        print(word,word.pos_)

In [None]:
from spacy.lang.en import English

# Load English tokenizer, tagger, parser, NER and word vectors
nlp = English()

text = textrank(txt)

my_doc = nlp(text)

In [None]:
# Create list of word tokens
token_list = []
for token in my_doc:
    token_list.append(token.text)
print(token_list)

['Floyd', '’s', 'death', '—', 'and', 'the', 'speed', 'at', 'which', 'protests', 'spread', 'from', 'tense', ',', 'chaotic', 'demonstrations', 'in', 'the', 'city', 'where', 'he', 'died', 'to', 'an', 'international', 'movement', 'from', 'Rome', 'to', 'Rio', 'de', 'Janeiro', '—', 'has', 'reflected', 'the', 'depth', 'of', 'frustration', 'borne', 'of', 'years', 'of', 'watching', 'black', 'people', 'die', 'at', 'the', 'hands', 'of', 'the', 'police', 'or', 'vigilantes', 'while', 'calls', 'for', 'change', 'went', 'unmet', '.', '\n', 'After', 'the', 'sound', 'and', 'the', 'fury', ',', 'weeks', 'of', 'demonstrations', 'and', 'anguished', 'calls', 'for', 'racial', 'justice', ',', 'the', 'man', 'whose', 'death', 'gave', 'rise', 'to', 'an', 'international', 'movement', ',', 'and', 'whose', 'last', 'words', '—', '“', 'I', 'ca', 'n’t', 'breathe', '”', '—', 'have', 'been', 'a', 'rallying', 'cry', ',', 'will', 'be', 'laid', 'to', 'rest', 'on', 'Tuesday', 'at', 'a', 'private', 'funeral', 'in', 'Houston',

In [None]:
import en_core_web_sm

# load en_core_web_sm of English for vocabulary, syntax & entities
nlp = en_core_web_sm.load()

#  "nlp" Objectis used to create documents with linguistic annotations.
docs = nlp(text)

In [None]:
# See the Pos tagging words
for word in docs:
    print(word.text,word.pos_)

Floyd PROPN
’s PART
death NOUN
— PUNCT
and CCONJ
the DET
speed NOUN
at ADP
which DET
protests NOUN
spread VERB
from ADP
tense ADJ
, PUNCT
chaotic ADJ
demonstrations NOUN
in ADP
the DET
city NOUN
where ADV
he PRON
died VERB
to ADP
an DET
international ADJ
movement NOUN
from ADP
Rome PROPN
to ADP
Rio PROPN
de PROPN
Janeiro PROPN
— PUNCT
has AUX
reflected VERB
the DET
depth NOUN
of ADP
frustration NOUN
borne NOUN
of ADP
years NOUN
of ADP
watching VERB
black ADJ
people NOUN
die VERB
at ADP
the DET
hands NOUN
of ADP
the DET
police NOUN
or CCONJ
vigilantes VERB
while SCONJ
calls NOUN
for ADP
change NOUN
went VERB
unmet ADJ
. PUNCT

 SPACE
After ADP
the DET
sound NOUN
and CCONJ
the DET
fury NOUN
, PUNCT
weeks NOUN
of ADP
demonstrations NOUN
and CCONJ
anguished VERB
calls NOUN
for ADP
racial ADJ
justice NOUN
, PUNCT
the DET
man NOUN
whose DET
death NOUN
gave VERB
rise NOUN
to ADP
an DET
international ADJ
movement NOUN
, PUNCT
and CCONJ
whose DET
last ADJ
words NOUN
— PUNCT
“ PUNCT
I PRON
ca VE

In [None]:
from spacy import displacy
#Accessing entity identifiers
nytimes= nlp(text)

entities=[(i, i.label_, i.label) for i in nytimes.ents]
entities

[(Floyd, 'PERSON', 380),
 (Rome, 'GPE', 384),
 (Rio de Janeiro, 'GPE', 384),
 (weeks, 'DATE', 391),
 (Tuesday, 'DATE', 391),
 (Houston, 'GPE', 384),
 (George Floyd, 'PERSON', 380),
 (46, 'DATE', 391),
 (11 a.m., 'TIME', 392),
 (the Fountain of Praise, 'FAC', 9191306739292312949),
 (five days, 'DATE', 391),
 (Minneapolis, 'GPE', 384),
 (North Carolina, 'GPE', 384),
 (Houston, 'GPE', 384),
 (two weeks, 'DATE', 391),
 (Minneapolis, 'GPE', 384),
 (The, 'PERSON', 380),
 (Floyd, 'PERSON', 380),
 (nearly nine minutes, 'TIME', 392)]

In [None]:
# Since this is an interactive Jupyter environment, we can use displacy.render here
displacy.render(nytimes, style = "ent",jupyter = True)