# Using TextRank

In [None]:
import numpy as np
import pandas as pd
import nltk
import re
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from gensim.models import Word2Vec
from scipy import spatial
import networkx as nx

In [None]:
text='''Several rounds of talks between the government and protesting farmers have failed to resolve the impasse over the three farm laws. The kisan bodies, which have been protesting in the national capital for almost two months, demanding the repeal of three contentious farm laws have remained firm on their decision to hold a tractor rally on the occasion of Republic Day. The rally will begin from three locations and will pass through pre-approved routes in the national capital.

The farmer bodies have issued strict instructions to ensure that no untoward incident takes place during the protests. While 3000 volunteers will be assisting Delhi Police in ensuring law and order during the rallies, a war room has been established to coordinate the peaceful progress of the march.

Each rally will have about 40 volunteers as medical staff, emergency personnel besides others. Farmers have been asked to display national flag and play patriotic songs. '''

In [None]:
import pprint
pprint.pprint(text)

('Several rounds of talks between the government and protesting farmers have '
 'failed to resolve the impasse over the three farm laws. The kisan bodies, '
 'which have been protesting in the national capital for almost two months, '
 'demanding the repeal of three contentious farm laws have remained firm on '
 'their decision to hold a tractor rally on the occasion of Republic Day. The '
 'rally will begin from three locations and will pass through pre-approved '
 'routes in the national capital.\n'
 '\n'
 'The farmer bodies have issued strict instructions to ensure that no untoward '
 'incident takes place during the protests. While 3000 volunteers will be '
 'assisting Delhi Police in ensuring law and order during the rallies, a war '
 'room has been established to coordinate the peaceful progress of the march.\n'
 '\n'
 'Each rally will have about 40 volunteers as medical staff, emergency '
 'personnel besides others. Farmers have been asked to display national flag '
 'and play p

In [None]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
sentences=sent_tokenize(text)
sentences_clean=[re.sub(r'[^\w\s]','',sentence.lower()) for sentence in sentences]
stop_words = stopwords.words('english')
sentence_tokens=[[words for words in sentence.split(' ') if words not in stop_words] for sentence in sentences_clean]

w2v=Word2Vec(sentence_tokens,size=1,min_count=1,iter=1000)
sentence_embeddings=[[w2v[word][0] for word in words] for words in sentence_tokens]
max_len=max([len(tokens) for tokens in sentence_tokens])
sentence_embeddings=[np.pad(embedding,(0,max_len-len(embedding)),'constant') for embedding in sentence_embeddings]

In [None]:
similarity_matrix = np.zeros([len(sentence_tokens), len(sentence_tokens)])
for i,row_embedding in enumerate(sentence_embeddings):
    for j,column_embedding in enumerate(sentence_embeddings):
        similarity_matrix[i][j]=1-spatial.distance.cosine(row_embedding,column_embedding)

In [None]:
nx_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(nx_graph)

In [None]:
top_sentence={sentence:scores[index] for index,sentence in enumerate(sentences)}
top=dict(sorted(top_sentence.items(), key=lambda x: x[1], reverse=True)[:4])

In [None]:
print("Original Article")
print('*' * 100)
pprint.pprint(text)
print('*' * 100)
print("\n")
print("Summary")
print('*' * 100)
for sent in sentences:
    if sent in top.keys():
        pprint.pprint(sent)
print('*' * 100)

Original Article
****************************************************************************************************
('Several rounds of talks between the government and protesting farmers have '
 'failed to resolve the impasse over the three farm laws. The kisan bodies, '
 'which have been protesting in the national capital for almost two months, '
 'demanding the repeal of three contentious farm laws have remained firm on '
 'their decision to hold a tractor rally on the occasion of Republic Day. The '
 'rally will begin from three locations and will pass through pre-approved '
 'routes in the national capital.\n'
 '\n'
 'The farmer bodies have issued strict instructions to ensure that no untoward '
 'incident takes place during the protests. While 3000 volunteers will be '
 'assisting Delhi Police in ensuring law and order during the rallies, a war '
 'room has been established to coordinate the peaceful progress of the march.\n'
 '\n'
 'Each rally will have about 40 volunteers as 

# Using Pretrained transformer Model

In [None]:
! pip install transformers

In [None]:
from transformers import pipeline

In [None]:
summarizer.model

In [None]:
from transformers import pipeline
summarizer = pipeline("summarization")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1621.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1222317369.0, style=ProgressStyle(descr…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898822.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=26.0, style=ProgressStyle(description_w…




In [None]:
result = summarizer(text, max_length=130, min_length=30, do_sample=False)

In [None]:
print("Original Article")
print('*' * 100)
pprint.pprint(text)
print('*' * 100)
print("\n")
print("Summary")
print('*' * 100)
pprint.pprint(result[0]['summary_text'])
print('*' * 100)