In [0]:
!pip install -U pandas
!pip install -U numpy
!pip install -U nltk
!pip install -U sumy

In [0]:
import nltk
nltk.download('punkt')

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

from sumy.summarizers.lex_rank import LexRankSummarizer 
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Read files and Pre-processing


In [0]:
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen

# topics
resp = urlopen('https://github.com/kavgan/opinosis/raw/master/OpinosisDataset1.0_0.zip')
zipfile = ZipFile(BytesIO(resp.read()))

list_file_name = []
for file_name in zipfile.namelist():
    if 'topics/' in file_name:
        list_file_name.append(file_name)

print(len(list_file_name))

51


In [0]:
lexrank_summarizer = LexRankSummarizer(Stemmer('english')) # Lex rank
lexrank_summarizer.stop_words = get_stop_words('english')

textrank_summarizer = TextRankSummarizer(Stemmer('english')) # text rank
textrank_summarizer.stop_words = get_stop_words('english')

lsa_summarizer = LsaSummarizer(Stemmer('english')) # lsa
lsa_summarizer.stop_words = get_stop_words('english')

## Get list sentences

In [0]:
#name of the plain-text file ~ bbc news dataset
sentences = []
for line in zipfile.open(list_file_name[0]).readlines():
    sentences.append(str(line).replace('b\'', '').replace('b\"', '').replace('\\r\\n', ''))

In [0]:
parser = PlaintextParser(sentences, Tokenizer('english'))

## Lex Rank


In [28]:
for sentence in lexrank_summarizer(parser.document, 10):
    print(sentence)

"', " DESTINATION TIME, , This is pretty accurate too .
'", " The directions are highly accurate down to a  T  .
'", " but after that it is very easy and quite accurate to use .
'", " The accuracy at this point is very good .
'", "I updated to the latest 2010 map soon after I received the unit, so the map is accurate to me .
'", "The estimated time to arrival does not seem to calculate the travelling time accurately .
'", " Less traveled rural roads will not be accurate on any unit .
'", "What the 255w does best is find a street address, business, point of interest, hospital or airport and give you turn, by, turn directions with amazing accuracy .
'", 'I\'m really glad I bought it though, and like the easy to read graphics, the voice used to tell you the name of the street you are to turn on, the uncannily accurate estimates of mileage and time of arrival at your destination .
'", ' I can\'t believe how accurate and detailed the information estimated time of arrival,speed limits along 

## Text Rank


In [29]:
for sentence in textrank_summarizer(parser.document, 10):
    print(sentence)

[", and is very, very accurate .
'", ' But, it\'s always very accurate .
'", " The directions are highly accurate down to a  T  .
'", " but after that it is very easy and quite accurate to use .
"', " If your looking for a nice, accurate GPS for not so much money, got with this one .
'", "0 out of 5 stars Inexpensive, accurate, plenty of features, August 6, 2009'", " The only glitch I have found so far is that the speed limits are not 100% accurate, although the GPS, amazingly, is able to very accurately tell you how fast your vehicle is moving .
'", " The directions provided have all been quite accurate thus far .
'", ',  Very Accurate but with one small glitch I found ,  I\'ll explain in the CONS"', "This is a great GPS, it is so easy to use and it is always accurate .
'", 'To date it\'s been a very easy to use and accurate .
'", " seems to be rather accurate .


## LSA

In [30]:
for sentence in lsa_summarizer(parser.document, 10):
    print(sentence)

'", " but for the most part, we find that the Garmin software provides accurate directions, whereever we intend to go .
'", ' This function is not accurate if you don\'t leave it in battery mode say, when you stop at the Cracker Barrell for lunch and to play one of those trangle games with the tees .
"', " It provides immediate alternatives if the route from the online map program was inaccurate or blocked by an obstacle .
'", " The most accurate review stated that these machines are adjunct to a good map and signs on the interstate .
'", " Because the accuracy is good to the street address level, it may not be able to guide you to the exact location if your destination is inside a shopping mall .
'", ' I used it the day I bought it,   and then this morning, and as soon as it comes on it is  ready to navigate  The only downfall of this product, and the only reason I did not give it 5 stars is the fact that the speed limit it displays for the road you are on isn\'t 100% accurate .
'", "