In [1]:
# Filtering out the warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

from nltk import bigrams, word_tokenize
from nltk.probability import FreqDist
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords

In [3]:
df = pd.read_csv('1429_1.csv')
df.columns

Index(['id', 'name', 'asins', 'brand', 'categories', 'keys', 'manufacturer',
       'reviews.date', 'reviews.dateAdded', 'reviews.dateSeen',
       'reviews.didPurchase', 'reviews.doRecommend', 'reviews.id',
       'reviews.numHelpful', 'reviews.rating', 'reviews.sourceURLs',
       'reviews.text', 'reviews.title', 'reviews.userCity',
       'reviews.userProvince', 'reviews.username'],
      dtype='object')

In [4]:
df = df[['asins','reviews.rating','reviews.text']]
df.dropna(inplace = True)
df.head()

Unnamed: 0,asins,reviews.rating,reviews.text
0,B01AHB9CN2,5.0,This product so far has not disappointed. My c...
1,B01AHB9CN2,5.0,great for beginner or experienced person. Boug...
2,B01AHB9CN2,5.0,Inexpensive tablet for him to use and learn on...
3,B01AHB9CN2,4.0,I've had my Fire HD 8 two weeks now and I love...
4,B01AHB9CN2,5.0,I bought this for my grand daughter when she c...


In [5]:
df = df.reset_index(drop=True)

In [6]:
product1 = df.where(df['asins'] == 'B01AHB9CN2')
product1 = product1.dropna()
product1 = product1.reset_index(drop=True)

In [7]:
product1

Unnamed: 0,asins,reviews.rating,reviews.text
0,B01AHB9CN2,5.0,This product so far has not disappointed. My c...
1,B01AHB9CN2,5.0,great for beginner or experienced person. Boug...
2,B01AHB9CN2,5.0,Inexpensive tablet for him to use and learn on...
3,B01AHB9CN2,4.0,I've had my Fire HD 8 two weeks now and I love...
4,B01AHB9CN2,5.0,I bought this for my grand daughter when she c...
...,...,...,...
2809,B01AHB9CN2,5.0,I had the original Kindle and just decided to ...
2810,B01AHB9CN2,5.0,My 10 year old daughter loves it. Battery grea...
2811,B01AHB9CN2,5.0,Several friends suggested I buy the Kindle Fir...
2812,B01AHB9CN2,5.0,I had a new one but it stopped working....So I...


In [8]:
pos_rev = []
neg_rev = []
for i in range(len(product1)):
    if df['reviews.rating'][i] > 3.0:
        pos_rev.append(str(df['reviews.text'][i]))
    elif df['reviews.rating'][i] <= 3.0:
        neg_rev.append(str(df['reviews.text'][i]))

In [9]:
len(neg_rev)

147

In [10]:
def bigram_extractive_summarization_all_reviews(reviews):
    all_reviews_text = ' '.join(reviews)

    # Tokenize sentences
    sentences = sent_tokenize(all_reviews_text)

    # Tokenize words
    words = word_tokenize(all_reviews_text.lower())

    # bigrams
    bigrams_list = list(bigrams(words))
    bigram_frequencies = FreqDist(bigrams_list)

    # sentence scores
    scores = {sentence: sum(bigram_frequencies[bigram] for bigram in bigrams(sentence.lower().split())) for sentence in sentences}

    # the sentence for the summary
    summary_sentence = max(scores, key=scores.get)

    return summary_sentence

summary = bigram_extractive_summarization_all_reviews(pos_rev)
print(f"\nSummary for All Reviews:\n{summary}\n")


Summary for All Reviews:
I was using this for a while it was 10X faster then my kindle 5th generation yes 1gb of ram but the App Store made from Amazon is not the best so I downloaded the google play store to get the apps I mostly used I only use my kindle fire had 8 for books and reading or watching YouTube or the web I'm giving the rating 4 stars If you are using Amazon Prime, you definitely like this table as it can stream the video, audio, reading book... easy to carry with you I've had kindles since they first came out love it Perfect for my elderly mom to play games and use for reading It is a good tablet, but is a little slow downloading apps Great for what it is.



In [11]:
summary = bigram_extractive_summarization_all_reviews(neg_rev)
print(f"\nSummary for All Reviews:\n{summary}\n")


Summary for All Reviews:
May be good for others i Bought this around black friday for $60 hoping it would be awesome... it failed so hard i tried multiple different micro SD cards none of which were recognized and YES i formated them with every format i could think of ... Fat32, NTFS, Fat, Xfat... i even tried to have the tablet do it... didnt work... to make matters worse half the apps i wanted to use werent in the app store and i came to find out that it isnt linked to the normal google play store this tablet has its own app store which is missing many common apps... the main reason i bought this was to play clash of clans and i cant because it wasnt on the app store... i tried to also use aftermarket play stores to play COC but it didnt work... launched and played 1 time but didnt work or update after that... needless to say i returned it and bought a $250 samsung galaxy tab A 10.1 (2016 version) with S-pen and its WAYYYYY better... bottom line you get what you pay for... also hint

## Make it shorter

In [12]:
def bigram_extractive_summarization_all_reviews(reviews):
    all_reviews_text = ' '.join(reviews)

    # Tokenize sentences
    sentences = sent_tokenize(all_reviews_text)

    # Tokenize words, remove stop words and numbers
    stop_words = set(stopwords.words('english'))
    words = [word.lower() for word in word_tokenize(all_reviews_text) if word.isalpha() and word.lower() not in stop_words]

    # bigrams
    bigrams_list = list(bigrams(words))
    bigram_frequencies = FreqDist(bigrams_list)

    # sentence scores
    scores = {sentence: sum(bigram_frequencies[bigram] for bigram in bigrams(sentence.lower().split())) for sentence in sentences}

    # the top sentence for the summary
    summary_sentence = max(scores, key=scores.get)

    return summary_sentence

summary = bigram_extractive_summarization_all_reviews(pos_rev)
print(f"\nSummary for All Reviews:\n{summary}\n")


Summary for All Reviews:
great tablet well worth the price which was a deal on black friday Original Kindle Fire would finally no longer charge so bought this to replace.



In [13]:
summary = bigram_extractive_summarization_all_reviews(neg_rev)
print(f"\nSummary for All Reviews:\n{summary}\n")


Summary for All Reviews:
May be good for others i Bought this around black friday for $60 hoping it would be awesome... it failed so hard i tried multiple different micro SD cards none of which were recognized and YES i formated them with every format i could think of ... Fat32, NTFS, Fat, Xfat... i even tried to have the tablet do it... didnt work... to make matters worse half the apps i wanted to use werent in the app store and i came to find out that it isnt linked to the normal google play store this tablet has its own app store which is missing many common apps... the main reason i bought this was to play clash of clans and i cant because it wasnt on the app store... i tried to also use aftermarket play stores to play COC but it didnt work... launched and played 1 time but didnt work or update after that... needless to say i returned it and bought a $250 samsung galaxy tab A 10.1 (2016 version) with S-pen and its WAYYYYY better... bottom line you get what you pay for... also hint

## & shorter

In [14]:
def bigram_extractive_summarization_all_reviews(reviews):
    all_reviews_text = ' '.join(reviews)

    # Tokenize sentences
    sentences = sent_tokenize(all_reviews_text)

    # Tokenize words
    words = word_tokenize(all_reviews_text.lower())

    # bigrams
    bigrams_list = list(bigrams(words))
    bigram_frequencies = FreqDist(bigrams_list)

    # sentence scores 
    scores = {sentence: sum(bigram_frequencies[bigram] for bigram in bigrams(sentence.lower().split())) for sentence in sentences}

    # the top sentence for the summary
    summary_sentence = max(scores)

    return summary_sentence

summary = bigram_extractive_summarization_all_reviews(pos_rev)
print(f"\nSummary for All Reviews:\n{summary}\n")


Summary for All Reviews:
İt is cheap tablet option coming from amazon.



In [15]:
summary = bigram_extractive_summarization_all_reviews(neg_rev)
print(f"\nSummary for All Reviews:\n{summary}\n")


Summary for All Reviews:
works great for the price.but it shoves amazon stuff down your hroat like the fire tv.and is linked to your amazon 1 click buy if you haveit set up that way.



In [17]:
# pos_rev