# Excerpts Extraction 

In [104]:
# For monitoring duration of pandas processes
from tqdm import tqdm, tqdm_pandas

# To avoid RuntimeError: Set changed size during iteration
tqdm.monitor_interval = 0

# Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm`
# (can use `tqdm_gui`, `tqdm_notebook`, optional kwargs, etc.)
tqdm.pandas(desc="Progress:")

# Now you can use `progress_apply` instead of `apply`
# and `progress_map` instead of `map`
# can also groupby:
# df.groupby(0).progress_apply(lambda x: x**2)

In [2]:
import pandas as pd
import numpy as np
import nltk

In [3]:
import plotly 
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
print(cf.__version__)
# Configure cufflings 
cf.set_config_file(offline=False, world_readable=True, theme='pearl')

0.12.1


In [4]:
reviews_and_ratings_df = pd.read_pickle('../data/interim/001_pre_processed_reviews+and_ratings.p')
reviews_and_ratings_df.head()

Unnamed: 0,reviewerID,asin,reviewText,overall
0,A2XQ5LZHTD4AFT,000100039X,A timeless classic. It is a very demanding an...,5.0
1,AF7CSSGV93RXN,000100039X,I first read The Prophet by Kahlil Gibran over...,5.0
2,A1NPNGWBVD9AK3,000100039X,This is one of the first (literary) books I re...,5.0
3,A3IS4WGMFR4X65,000100039X,The Prophet is Kahlil Gibran's best known work...,5.0
4,AWLFVCT9128JV,000100039X,Gibran Khalil Gibran was born in 1883 in what ...,5.0


In [5]:
reviews_vs_feature_opinion_pairs = pd.read_pickle("../data/interim/006_pairs_per_review.p")

In [6]:
reviews_vs_feature_opinion_pairs.head()

Unnamed: 0,userId,asin,reviewText,imp_nns,num_of_imp_nouns,pairs,num_of_pairs
0,A2XQ5LZHTD4AFT,000100039X,"[(timeless, NN), ( classic, JJ), ( demanding, ...","[kneads, profits, preachers, territory, exile,...",26,"[(birth, prophets), (book, flows)]",2
2,A1NPNGWBVD9AK3,000100039X,"[(one, CD), ( first, NNP), ( literary, JJ), ( ...","[kneads, profits, preachers, territory, exile,...",26,"[(relevant, catechism), (within, prophets), (t...",4
4,AWLFVCT9128JV,000100039X,"[(gibran, NN), ( khalil, NNP), ( gibran, NNP),...","[kneads, profits, preachers, territory, exile,...",26,"[(forty-eight, almustafa)]",1
5,AFY0BT42DDYZV,000100039X,"[(days, NNS), ( kahlil, VBP), ( gibrans, NNS),...","[kneads, profits, preachers, territory, exile,...",26,"[(souls, profits), (wordofmouth, twentysix), (...",3
13,A2ZZHMT58ZMVCZ,000100039X,"[(prophet, NN), ( waited, VBD), ( twelve, CD),...","[kneads, profits, preachers, territory, exile,...",26,"[(bear, departs), (others, pillars), (similar,...",4


In [7]:
df00 = reviews_vs_feature_opinion_pairs[['userId','asin','pairs']]
df00.columns = ['reviewerID','asin','pairs']
df00.head()

Unnamed: 0,reviewerID,asin,pairs
0,A2XQ5LZHTD4AFT,000100039X,"[(birth, prophets), (book, flows)]"
2,A1NPNGWBVD9AK3,000100039X,"[(relevant, catechism), (within, prophets), (t..."
4,AWLFVCT9128JV,000100039X,"[(forty-eight, almustafa)]"
5,AFY0BT42DDYZV,000100039X,"[(souls, profits), (wordofmouth, twentysix), (..."
13,A2ZZHMT58ZMVCZ,000100039X,"[(bear, departs), (others, pillars), (similar,..."


In [8]:
df01 = df00.merge(reviews_and_ratings_df, left_on=['reviewerID','asin'], right_on=['reviewerID','asin'], how='inner')
df01[0:31]

Unnamed: 0,reviewerID,asin,pairs,reviewText,overall
0,A2XQ5LZHTD4AFT,000100039X,"[(birth, prophets), (book, flows)]",A timeless classic. It is a very demanding an...,5.0
1,A1NPNGWBVD9AK3,000100039X,"[(relevant, catechism), (within, prophets), (t...",This is one of the first (literary) books I re...,5.0
2,AWLFVCT9128JV,000100039X,"[(forty-eight, almustafa)]",Gibran Khalil Gibran was born in 1883 in what ...,5.0
3,AFY0BT42DDYZV,000100039X,"[(souls, profits), (wordofmouth, twentysix), (...","These days, Kahlil Gibran's ""The Prophet"" ofte...",5.0
4,A2ZZHMT58ZMVCZ,000100039X,"[(bear, departs), (others, pillars), (similar,...",A prophet has waited twelve years in a coastal...,5.0
5,ADIDQRLLR4KBQ,000100039X,"[(beautiful, metaphors), (live, prophets)]","Being an Atheist, it may seem strange to some ...",5.0
6,A281NPSIMI1C2R,000100039X,"[(pain, waves), (separate, almustafa)]","I am alive like you, and I am standing beside ...",5.0
7,A2R64CR74I98K3,000100039X,"[(religious, texts)]",This is a very usefull book that can be used a...,5.0
8,AF4QKY2R2TD3U,000100039X,"[(rich, metaphors)]","""Say not, 'I have found the truth,' but rather...",5.0
9,A3SMT15X2QVUR8,000100039X,"[(orphalese, metaphor)]",The Prophet Almustafa waits in the city of Orp...,5.0


### Break reviews to their composing sentences

In [9]:
from nltk.tokenize import sent_tokenize
df01['reviewText'] = df01['reviewText'].progress_apply(lambda review: sent_tokenize(review))
df01.head()

Progress:: 100%|██████████| 249871/249871 [02:08<00:00, 1939.69it/s]


Unnamed: 0,reviewerID,asin,pairs,reviewText,overall
0,A2XQ5LZHTD4AFT,000100039X,"[(birth, prophets), (book, flows)]","[A timeless classic., It is a very demanding a...",5.0
1,A1NPNGWBVD9AK3,000100039X,"[(relevant, catechism), (within, prophets), (t...",[This is one of the first (literary) books I r...,5.0
2,AWLFVCT9128JV,000100039X,"[(forty-eight, almustafa)]",[Gibran Khalil Gibran was born in 1883 in what...,5.0
3,AFY0BT42DDYZV,000100039X,"[(souls, profits), (wordofmouth, twentysix), (...","[These days, Kahlil Gibran's ""The Prophet"" oft...",5.0
4,A2ZZHMT58ZMVCZ,000100039X,"[(bear, departs), (others, pillars), (similar,...",[A prophet has waited twelve years in a coasta...,5.0


After identifying the distinct sentences, next we need to apply the same normalisation process we employed at the beggining of this project, but this time on each sentence rather than on reviews.

In [65]:
# Word Tokenize
import re
import string
import inflect
from nltk.corpus import wordnet
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import regexp_tokenize
tokenizer=RegexpTokenizer('[\'\w\-]+',gaps=False)

# Convert to Lowercase
def convert_to_lowercase(sentence):

    for i in range(len(sentence)):
        sentence[i] = sentence[i].lower()
    return sentence

# Eliminate Punctuation
def eliminate_punctuation(sentence, regex):
    new_sentence = []
    for token in sentence:
        new_token = regex.sub(u'', token)
        if not new_token == u'':
            new_sentence.append(new_token)
    return new_sentence

r1 = re.compile("([a-zA-Z]+)([0-9]+)")
r2 = re.compile("([0-9]+)([a-zA-Z]+)")
r3 = re.compile("([a-zA-Z]+)([0-9]+)([a-zA-Z]+)")
r4 = re.compile("([0-9]+)([a-zA-Z]+)([0-9]+)")

# Split words into numbers
def split_words_and_nums(sentence):
    new_sentence = []
    for token in sentence:
        firstRegexIsTrue = r1.match(token)
        secondRegexIsTrue = r2.match(token)
        thirdRegexIsTrue = r3.match(token)
        fourthRegexIsTrue = r4.match(token)
    
        if(firstRegexIsTrue):
            new_sentence.append(firstRegexIsTrue.group(0))
            new_sentence.append(firstRegexIsTrue.group(1))
        elif(firstRegexIsTrue):
            new_sentence.append(secondRegexIsTrue.group(0))
            new_sentence.append(secondRegexIsTrue.group(1))
        elif(thirdRegexIsTrue):
            new_sentence.append(thirdRegexIsTrue.group(0))
            new_sentence.append(thirdRegexIsTrue.group(1))
            new_sentence.append(thirdRegexIsTrue.group(2))
        elif(fourthRegexIsTrue):
            new_sentence.append(fourthRegexIsTrue.group(0))
            new_sentence.append(fourthRegexIsTrue.group(1))
            new_sentence.append(fourthRegexIsTrue.group(2))
        else:
            new_sentence.append(token)
    return new_sentence

## Convert Numbers to Words
def numStringToWord(sentence, p):        
    for i in range(len(sentence)):
        if(sentence[i].isdigit()):
            if(len(sentence[i])<10):
                sentence[i] = p.number_to_words(sentence[i])
    return sentence

# Replace negatives with antonyms 
class AntonymReplacer(object):
    def replace(self, token, pos=None):
        antonyms = set()
        for syn in wordnet.synsets(token, pos=pos):
            for lemma in syn.lemmas():
                for antonym in lemma.antonyms():
                    antonyms.add(antonym.name())
        if len(antonyms) == 1:
            return antonyms.pop()
        else:
            return None
        
    def replace_negations(self, sentence):
        i, l = 0, len(sentence)
        tokens = []
        while i<l:
            token = sentence[i]
            if token == 'not' and i+1 <l:
                ant = self.replace(sentence[i+1])
                if ant:
                    tokens.append(ant)
                    i += 2
                    continue
            tokens.append(token)
            i += 1

        return tokens

In [66]:
replacer = AntonymReplacer()
regex=re.compile('[%s]' % re.escape(string.punctuation))
p = inflect.engine()
def normalise_and_tokenize_sentences(review):
    new_review = []
    for sentence in review:
        step_0 = tokenizer.tokenize(sentence)
        step_1 = convert_to_lowercase(step_0)
        step_2 = eliminate_punctuation(step_1, regex)
        step_3 = split_words_and_nums(step_2)
        step_4 = numStringToWord(step_3, p)
        step_5 = replacer.replace_negations(step_4)
        new_review.append(step_5)
    
    return new_review

In [67]:
df2 = df01.assign(norm_sentences = df01['reviewText'].progress_apply(lambda reviewText:normalise_and_tokenize_sentences(reviewText)))
df2.head()

Progress:: 100%|██████████| 249871/249871 [04:15<00:00, 976.54it/s] 


Unnamed: 0,reviewerID,asin,pairs,reviewText,overall,norm_sentences
0,A2XQ5LZHTD4AFT,000100039X,"[(birth, prophets), (book, flows)]","[A timeless classic., It is a very demanding a...",5.0,"[[a, timeless, classic], [it, is, a, very, dem..."
1,A1NPNGWBVD9AK3,000100039X,"[(relevant, catechism), (within, prophets), (t...",[This is one of the first (literary) books I r...,5.0,"[[this, is, one, of, the, first, literary, boo..."
2,AWLFVCT9128JV,000100039X,"[(forty-eight, almustafa)]",[Gibran Khalil Gibran was born in 1883 in what...,5.0,"[[gibran, khalil, gibran, was, born, in, one t..."
3,AFY0BT42DDYZV,000100039X,"[(souls, profits), (wordofmouth, twentysix), (...","[These days, Kahlil Gibran's ""The Prophet"" oft...",5.0,"[[these, days, kahlil, gibrans, the, prophet, ..."
4,A2ZZHMT58ZMVCZ,000100039X,"[(bear, departs), (others, pillars), (similar,...",[A prophet has waited twelve years in a coasta...,5.0,"[[a, prophet, has, waited, twelve, years, in, ..."


In [68]:
df2.to_pickle('../data/interim/007_pre_processed_dataset_for_excerpts_extraction.p')

## Begin Excerpt Extraction

In [69]:
matrix_m01 = df2.as_matrix()

In [70]:
matrix_m02 = np.append(matrix_m01,np.zeros([len(matrix_m01),1]),1)
sample = pd.DataFrame(matrix_m02[0:10])
sample

Unnamed: 0,0,1,2,3,4,5,6
0,A2XQ5LZHTD4AFT,000100039X,"[(birth, prophets), (book, flows)]","[A timeless classic., It is a very demanding a...",5,"[[a, timeless, classic], [it, is, a, very, dem...",0
1,A1NPNGWBVD9AK3,000100039X,"[(relevant, catechism), (within, prophets), (t...",[This is one of the first (literary) books I r...,5,"[[this, is, one, of, the, first, literary, boo...",0
2,AWLFVCT9128JV,000100039X,"[(forty-eight, almustafa)]",[Gibran Khalil Gibran was born in 1883 in what...,5,"[[gibran, khalil, gibran, was, born, in, one t...",0
3,AFY0BT42DDYZV,000100039X,"[(souls, profits), (wordofmouth, twentysix), (...","[These days, Kahlil Gibran's ""The Prophet"" oft...",5,"[[these, days, kahlil, gibrans, the, prophet, ...",0
4,A2ZZHMT58ZMVCZ,000100039X,"[(bear, departs), (others, pillars), (similar,...",[A prophet has waited twelve years in a coasta...,5,"[[a, prophet, has, waited, twelve, years, in, ...",0
5,ADIDQRLLR4KBQ,000100039X,"[(beautiful, metaphors), (live, prophets)]","[Being an Atheist, it may seem strange to some...",5,"[[being, an, atheist, it, may, seem, strange, ...",0
6,A281NPSIMI1C2R,000100039X,"[(pain, waves), (separate, almustafa)]","[I am alive like you, and I am standing beside...",5,"[[i, am, alive, like, you, and, i, am, standin...",0
7,A2R64CR74I98K3,000100039X,"[(religious, texts)]",[This is a very usefull book that can be used ...,5,"[[this, is, a, very, usefull, book, that, can,...",0
8,AF4QKY2R2TD3U,000100039X,"[(rich, metaphors)]","[""Say not, 'I have found the truth,' but rathe...",5,"[[say, not, i, have, found, the, truth, but, r...",0
9,A3SMT15X2QVUR8,000100039X,"[(orphalese, metaphor)]",[The Prophet Almustafa waits in the city of Or...,5,"[[the, prophet, almustafa, waits, in, the, cit...",0


In [81]:
def identify_excerpt_index_for(review_sentences, pair):
    index = None
    for i in range(len(review_sentences)):
        sentence = review_sentences[i]
        if pair[0] in sentence:
            if pair[1] in sentence:
                index = i
                break
    return index

In [82]:
from tqdm import tqdm

with tqdm(total=len(matrix_m02)) as pbar:
    for i in range(len(matrix_m02)):
        excerpt_indices = []
        actual_sentences = matrix_m02[i][3]
        review_sentences = matrix_m02[i][5]
        pairs = matrix_m02[i][2]
        
        for pair in pairs:
            index_of_sentence_with_pair = identify_excerpt_index_for(review_sentences,pair)
            
            if index_of_sentence_with_pair is not None and index_of_sentence_with_pair not in excerpt_indices:
                excerpt_indices.append(index_of_sentence_with_pair)
    
        excerpts = []
        for index in excerpt_indices:
            excerpts.append(actual_sentences[index])
    
        matrix_m02[i][6] = excerpts
    
        pbar.update(1)

100%|██████████| 249871/249871 [00:06<00:00, 38568.67it/s]


In [83]:
df20 = pd.DataFrame(matrix_m02)
df20.columns = ['reviewerID','asin','pairs','reviewText','overall','norm_sentences','excerpts']
df20.head()

Unnamed: 0,reviewerID,asin,pairs,reviewText,overall,norm_sentences,excerpts
0,A2XQ5LZHTD4AFT,000100039X,"[(birth, prophets), (book, flows)]","[A timeless classic., It is a very demanding a...",5,"[[a, timeless, classic], [it, is, a, very, dem...","[There is much that hints at his birth place, ..."
1,A1NPNGWBVD9AK3,000100039X,"[(relevant, catechism), (within, prophets), (t...",[This is one of the first (literary) books I r...,5,"[[this, is, one, of, the, first, literary, boo...",[I believe that was my first taste of spiritua...
2,AWLFVCT9128JV,000100039X,"[(forty-eight, almustafa)]",[Gibran Khalil Gibran was born in 1883 in what...,5,"[[gibran, khalil, gibran, was, born, in, one t...",[He died of cancer in a New York hospital at t...
3,AFY0BT42DDYZV,000100039X,"[(souls, profits), (wordofmouth, twentysix), (...","[These days, Kahlil Gibran's ""The Prophet"" oft...",5,"[[these, days, kahlil, gibrans, the, prophet, ...","[There is no political, religious, or commerci..."
4,A2ZZHMT58ZMVCZ,000100039X,"[(bear, departs), (others, pillars), (similar,...",[A prophet has waited twelve years in a coasta...,5,"[[a, prophet, has, waited, twelve, years, in, ...",[A local seeress who knows him best asks him t...


In [88]:
df30 = df20[['reviewerID','asin','overall','excerpts']]
df30.head()

Unnamed: 0,reviewerID,asin,overall,excerpts
0,A2XQ5LZHTD4AFT,000100039X,5,"[There is much that hints at his birth place, ..."
1,A1NPNGWBVD9AK3,000100039X,5,[I believe that was my first taste of spiritua...
2,AWLFVCT9128JV,000100039X,5,[He died of cancer in a New York hospital at t...
3,AFY0BT42DDYZV,000100039X,5,"[There is no political, religious, or commerci..."
4,A2ZZHMT58ZMVCZ,000100039X,5,[A local seeress who knows him best asks him t...


In [95]:
len(df30)

249871

In [96]:
df31 = df30[df30['excerpts'].map(lambda excerpts: len(excerpts)) > 0]
len(df31)

231936

## Get Polarity of Excerpts

In [101]:
import numpy as np
from textblob import TextBlob

def get_overal_polarity(excerpts):
    text = ''.join(excerpts)
    blob = TextBlob(text)
    
    polarity = []
    for sentence in blob.sentences:
        polarity.append(sentence.sentiment.polarity)

    return np.mean(polarity)

In [105]:
df40 = df31.assign(polarity = df31['excerpts'].progress_apply(lambda excerpts:get_overal_polarity(excerpts)))
df40.head()




Progress::   0%|          | 0/231936 [00:00<?, ?it/s][A[A[A


Progress::   0%|          | 104/231936 [00:00<03:44, 1034.93it/s][A[A[A


Progress::   0%|          | 196/231936 [00:00<03:52, 997.13it/s] [A[A[A


Progress::   0%|          | 305/231936 [00:00<03:47, 1017.66it/s][A[A[A


Progress::   0%|          | 420/231936 [00:00<03:39, 1052.71it/s][A[A[A


Progress::   0%|          | 518/231936 [00:00<03:44, 1029.55it/s][A[A[A


Progress::   0%|          | 636/231936 [00:00<03:36, 1069.24it/s][A[A[A


Progress::   0%|          | 737/231936 [00:00<03:40, 1050.53it/s][A[A[A


Progress::   0%|          | 845/231936 [00:00<03:38, 1057.18it/s][A[A[A


Progress::   0%|          | 946/231936 [00:00<03:45, 1025.33it/s][A[A[A


Progress::   0%|          | 1061/231936 [00:01<03:38, 1057.26it/s][A[A[A


Progress::   1%|          | 1167/231936 [00:01<03:38, 1057.09it/s][A[A[A


Progress::   1%|          | 1295/231936 [00:01<03:27, 1113.77it/s][A[A[A


Progr

Progress::   5%|▍         | 11186/231936 [00:10<03:36, 1020.55it/s][A[A[A


Progress::   5%|▍         | 11290/231936 [00:11<03:35, 1025.63it/s][A[A[A


Progress::   5%|▍         | 11394/231936 [00:11<03:39, 1003.99it/s][A[A[A


Progress::   5%|▍         | 11507/231936 [00:11<03:32, 1037.45it/s][A[A[A


Progress::   5%|▌         | 11612/231936 [00:11<03:32, 1035.58it/s][A[A[A


Progress::   5%|▌         | 11718/231936 [00:11<03:31, 1042.01it/s][A[A[A


Progress::   5%|▌         | 11834/231936 [00:11<03:25, 1072.00it/s][A[A[A


Progress::   5%|▌         | 11948/231936 [00:11<03:21, 1091.19it/s][A[A[A


Progress::   5%|▌         | 12058/231936 [00:11<03:45, 974.95it/s] [A[A[A


Progress::   5%|▌         | 12176/231936 [00:11<03:33, 1027.41it/s][A[A[A


Progress::   5%|▌         | 12282/231936 [00:11<03:36, 1012.60it/s][A[A[A


Progress::   5%|▌         | 12386/231936 [00:12<03:42, 984.87it/s] [A[A[A


Progress::   5%|▌         | 12486/231936 [00:12<03:4

Progress::  10%|▉         | 22380/231936 [00:21<03:39, 953.35it/s][A[A[A


Progress::  10%|▉         | 22485/231936 [00:22<03:33, 978.75it/s][A[A[A


Progress::  10%|▉         | 22600/231936 [00:22<03:24, 1023.60it/s][A[A[A


Progress::  10%|▉         | 22718/231936 [00:22<03:16, 1065.34it/s][A[A[A


Progress::  10%|▉         | 22826/231936 [00:22<03:15, 1067.52it/s][A[A[A


Progress::  10%|▉         | 22935/231936 [00:22<03:14, 1072.86it/s][A[A[A


Progress::  10%|▉         | 23045/231936 [00:22<03:13, 1079.77it/s][A[A[A


Progress::  10%|▉         | 23154/231936 [00:22<03:18, 1052.40it/s][A[A[A


Progress::  10%|█         | 23260/231936 [00:22<03:25, 1017.66it/s][A[A[A


Progress::  10%|█         | 23363/231936 [00:22<03:24, 1018.63it/s][A[A[A


Progress::  10%|█         | 23472/231936 [00:22<03:20, 1037.86it/s][A[A[A


Progress::  10%|█         | 23580/231936 [00:23<03:19, 1044.89it/s][A[A[A


Progress::  10%|█         | 23691/231936 [00:23<03:16,

Progress::  14%|█▍        | 33431/231936 [00:32<03:21, 983.35it/s][A[A[A


Progress::  14%|█▍        | 33532/231936 [00:32<03:20, 987.86it/s][A[A[A


Progress::  15%|█▍        | 33640/231936 [00:33<03:16, 1011.33it/s][A[A[A


Progress::  15%|█▍        | 33747/231936 [00:33<03:12, 1026.93it/s][A[A[A


Progress::  15%|█▍        | 33851/231936 [00:33<03:15, 1014.35it/s][A[A[A


Progress::  15%|█▍        | 33954/231936 [00:33<03:17, 1002.20it/s][A[A[A


Progress::  15%|█▍        | 34055/231936 [00:33<03:19, 992.11it/s] [A[A[A


Progress::  15%|█▍        | 34159/231936 [00:33<03:17, 1003.86it/s][A[A[A


Progress::  15%|█▍        | 34260/231936 [00:33<03:17, 1000.46it/s][A[A[A


Progress::  15%|█▍        | 34362/231936 [00:33<03:16, 1005.39it/s][A[A[A


Progress::  15%|█▍        | 34481/231936 [00:33<03:08, 1049.38it/s][A[A[A


Progress::  15%|█▍        | 34595/231936 [00:33<03:04, 1072.22it/s][A[A[A


Progress::  15%|█▍        | 34703/231936 [00:34<03:05,

Progress::  19%|█▉        | 43861/231936 [00:43<03:07, 1003.43it/s][A[A[A


Progress::  19%|█▉        | 43970/231936 [00:43<03:03, 1026.65it/s][A[A[A


Progress::  19%|█▉        | 44074/231936 [00:44<03:10, 987.10it/s] [A[A[A


Progress::  19%|█▉        | 44174/231936 [00:44<03:17, 953.07it/s][A[A[A


Progress::  19%|█▉        | 44279/231936 [00:44<03:12, 976.74it/s][A[A[A


Progress::  19%|█▉        | 44382/231936 [00:44<03:09, 991.87it/s][A[A[A


Progress::  19%|█▉        | 44482/231936 [00:44<03:09, 989.87it/s][A[A[A


Progress::  19%|█▉        | 44582/231936 [00:44<03:11, 980.17it/s][A[A[A


Progress::  19%|█▉        | 44682/231936 [00:44<03:10, 985.04it/s][A[A[A


Progress::  19%|█▉        | 44796/231936 [00:44<03:02, 1023.73it/s][A[A[A


Progress::  19%|█▉        | 44899/231936 [00:44<03:11, 979.16it/s] [A[A[A


Progress::  19%|█▉        | 45001/231936 [00:44<03:09, 989.03it/s][A[A[A


Progress::  19%|█▉        | 45104/231936 [00:45<03:07, 997.

Progress::  24%|██▍       | 55157/231936 [00:54<03:00, 979.26it/s] [A[A[A


Progress::  24%|██▍       | 55256/231936 [00:54<03:00, 977.49it/s][A[A[A


Progress::  24%|██▍       | 55355/231936 [00:54<03:11, 923.70it/s][A[A[A


Progress::  24%|██▍       | 55449/231936 [00:55<03:10, 926.96it/s][A[A[A


Progress::  24%|██▍       | 55553/231936 [00:55<03:04, 957.60it/s][A[A[A


Progress::  24%|██▍       | 55673/231936 [00:55<02:53, 1018.61it/s][A[A[A


Progress::  24%|██▍       | 55777/231936 [00:55<02:54, 1010.76it/s][A[A[A


Progress::  24%|██▍       | 55880/231936 [00:55<02:53, 1015.24it/s][A[A[A


Progress::  24%|██▍       | 55983/231936 [00:55<02:59, 982.84it/s] [A[A[A


Progress::  24%|██▍       | 56093/231936 [00:55<02:53, 1013.64it/s][A[A[A


Progress::  24%|██▍       | 56205/231936 [00:55<02:48, 1042.41it/s][A[A[A


Progress::  24%|██▍       | 56323/231936 [00:55<02:44, 1067.80it/s][A[A[A


Progress::  24%|██▍       | 56431/231936 [00:56<02:51, 1

Progress::  29%|██▉       | 66695/231936 [01:05<02:23, 1150.97it/s][A[A[A


Progress::  29%|██▉       | 66816/231936 [01:05<02:21, 1167.35it/s][A[A[A


Progress::  29%|██▉       | 66934/231936 [01:05<02:21, 1168.69it/s][A[A[A


Progress::  29%|██▉       | 67052/231936 [01:05<02:21, 1161.89it/s][A[A[A


Progress::  29%|██▉       | 67173/231936 [01:05<02:20, 1172.65it/s][A[A[A


Progress::  29%|██▉       | 67291/231936 [01:05<02:22, 1157.38it/s][A[A[A


Progress::  29%|██▉       | 67407/231936 [01:06<02:24, 1141.19it/s][A[A[A


Progress::  29%|██▉       | 67522/231936 [01:06<02:24, 1134.44it/s][A[A[A


Progress::  29%|██▉       | 67639/231936 [01:06<02:23, 1144.06it/s][A[A[A


Progress::  29%|██▉       | 67755/231936 [01:06<02:23, 1144.34it/s][A[A[A


Progress::  29%|██▉       | 67870/231936 [01:06<02:23, 1141.43it/s][A[A[A


Progress::  29%|██▉       | 67989/231936 [01:06<02:22, 1148.67it/s][A[A[A


Progress::  29%|██▉       | 68107/231936 [01:06<02:2

Progress::  34%|███▍      | 78405/231936 [01:16<02:16, 1124.31it/s][A[A[A


Progress::  34%|███▍      | 78535/231936 [01:16<02:10, 1171.64it/s][A[A[A


Progress::  34%|███▍      | 78653/231936 [01:16<02:12, 1161.11it/s][A[A[A


Progress::  34%|███▍      | 78773/231936 [01:16<02:10, 1170.40it/s][A[A[A


Progress::  34%|███▍      | 78900/231936 [01:16<02:07, 1197.66it/s][A[A[A


Progress::  34%|███▍      | 79039/231936 [01:16<02:02, 1248.59it/s][A[A[A


Progress::  34%|███▍      | 79167/231936 [01:16<02:01, 1256.98it/s][A[A[A


Progress::  34%|███▍      | 79303/231936 [01:16<01:58, 1284.72it/s][A[A[A


Progress::  34%|███▍      | 79433/231936 [01:17<02:06, 1208.41it/s][A[A[A


Progress::  34%|███▍      | 79556/231936 [01:17<02:05, 1214.40it/s][A[A[A


Progress::  34%|███▍      | 79679/231936 [01:17<02:11, 1161.95it/s][A[A[A


Progress::  34%|███▍      | 79797/231936 [01:17<02:15, 1122.26it/s][A[A[A


Progress::  34%|███▍      | 79911/231936 [01:17<02:1

Progress::  39%|███▊      | 89333/231936 [01:27<02:27, 963.74it/s][A[A[A


Progress::  39%|███▊      | 89453/231936 [01:27<02:19, 1023.03it/s][A[A[A


Progress::  39%|███▊      | 89557/231936 [01:27<02:22, 997.91it/s] [A[A[A


Progress::  39%|███▊      | 89665/231936 [01:27<02:19, 1020.42it/s][A[A[A


Progress::  39%|███▊      | 89778/231936 [01:27<02:15, 1050.61it/s][A[A[A


Progress::  39%|███▉      | 89884/231936 [01:27<02:19, 1015.88it/s][A[A[A


Progress::  39%|███▉      | 89998/231936 [01:27<02:15, 1048.60it/s][A[A[A


Progress::  39%|███▉      | 90104/231936 [01:27<02:16, 1039.92it/s][A[A[A


Progress::  39%|███▉      | 90228/231936 [01:27<02:10, 1088.73it/s][A[A[A


Progress::  39%|███▉      | 90354/231936 [01:28<02:04, 1134.19it/s][A[A[A


Progress::  39%|███▉      | 90491/231936 [01:28<01:58, 1193.12it/s][A[A[A


Progress::  39%|███▉      | 90616/231936 [01:28<01:57, 1207.59it/s][A[A[A


Progress::  39%|███▉      | 90743/231936 [01:28<01:55

Progress::  44%|████▍     | 101719/231936 [01:37<01:50, 1178.47it/s][A[A[A


Progress::  44%|████▍     | 101838/231936 [01:37<01:59, 1090.12it/s][A[A[A


Progress::  44%|████▍     | 101950/231936 [01:38<01:58, 1096.88it/s][A[A[A


Progress::  44%|████▍     | 102061/231936 [01:38<01:58, 1096.33it/s][A[A[A


Progress::  44%|████▍     | 102183/231936 [01:38<01:55, 1125.58it/s][A[A[A


Progress::  44%|████▍     | 102298/231936 [01:38<01:54, 1131.82it/s][A[A[A


Progress::  44%|████▍     | 102417/231936 [01:38<01:52, 1147.39it/s][A[A[A


Progress::  44%|████▍     | 102533/231936 [01:38<01:53, 1138.71it/s][A[A[A


Progress::  44%|████▍     | 102657/231936 [01:38<01:50, 1167.22it/s][A[A[A


Progress::  44%|████▍     | 102775/231936 [01:38<01:50, 1165.48it/s][A[A[A


Progress::  44%|████▍     | 102909/231936 [01:38<01:46, 1211.20it/s][A[A[A


Progress::  44%|████▍     | 103036/231936 [01:38<01:45, 1225.18it/s][A[A[A


Progress::  44%|████▍     | 103162/23193

Progress::  49%|████▊     | 112802/231936 [01:48<01:37, 1223.83it/s][A[A[A


Progress::  49%|████▊     | 112925/231936 [01:48<01:44, 1136.68it/s][A[A[A


Progress::  49%|████▊     | 113041/231936 [01:48<01:47, 1109.93it/s][A[A[A


Progress::  49%|████▉     | 113154/231936 [01:48<01:48, 1097.54it/s][A[A[A


Progress::  49%|████▉     | 113265/231936 [01:48<01:52, 1052.25it/s][A[A[A


Progress::  49%|████▉     | 113372/231936 [01:49<01:59, 989.87it/s] [A[A[A


Progress::  49%|████▉     | 113473/231936 [01:49<02:08, 920.31it/s][A[A[A


Progress::  49%|████▉     | 113574/231936 [01:49<02:05, 945.27it/s][A[A[A


Progress::  49%|████▉     | 113674/231936 [01:49<02:03, 960.44it/s][A[A[A


Progress::  49%|████▉     | 113772/231936 [01:49<02:05, 943.75it/s][A[A[A


Progress::  49%|████▉     | 113874/231936 [01:49<02:02, 962.26it/s][A[A[A


Progress::  49%|████▉     | 113972/231936 [01:49<02:02, 964.90it/s][A[A[A


Progress::  49%|████▉     | 114073/231936 [01:

Progress::  54%|█████▎    | 124238/231936 [01:59<01:48, 992.71it/s][A[A[A


Progress::  54%|█████▎    | 124338/231936 [01:59<01:48, 987.14it/s][A[A[A


Progress::  54%|█████▎    | 124438/231936 [01:59<01:49, 984.73it/s][A[A[A


Progress::  54%|█████▎    | 124554/231936 [01:59<01:44, 1030.53it/s][A[A[A


Progress::  54%|█████▍    | 124666/231936 [01:59<01:41, 1053.91it/s][A[A[A


Progress::  54%|█████▍    | 124773/231936 [01:59<01:46, 1006.75it/s][A[A[A


Progress::  54%|█████▍    | 124879/231936 [01:59<01:45, 1018.24it/s][A[A[A


Progress::  54%|█████▍    | 124997/231936 [02:00<01:40, 1059.84it/s][A[A[A


Progress::  54%|█████▍    | 125104/231936 [02:00<01:41, 1056.07it/s][A[A[A


Progress::  54%|█████▍    | 125211/231936 [02:00<01:43, 1031.69it/s][A[A[A


Progress::  54%|█████▍    | 125326/231936 [02:00<01:40, 1062.14it/s][A[A[A


Progress::  54%|█████▍    | 125433/231936 [02:00<01:45, 1007.80it/s][A[A[A


Progress::  54%|█████▍    | 125540/231936 [

Progress::  58%|█████▊    | 134986/231936 [02:10<01:29, 1082.57it/s][A[A[A


Progress::  58%|█████▊    | 135096/231936 [02:10<01:33, 1033.32it/s][A[A[A


Progress::  58%|█████▊    | 135201/231936 [02:10<01:36, 1002.04it/s][A[A[A


Progress::  58%|█████▊    | 135308/231936 [02:10<01:34, 1019.74it/s][A[A[A


Progress::  58%|█████▊    | 135424/231936 [02:10<01:31, 1055.90it/s][A[A[A


Progress::  58%|█████▊    | 135539/231936 [02:10<01:29, 1081.57it/s][A[A[A


Progress::  58%|█████▊    | 135648/231936 [02:10<01:29, 1074.61it/s][A[A[A


Progress::  59%|█████▊    | 135773/231936 [02:10<01:26, 1116.54it/s][A[A[A


Progress::  59%|█████▊    | 135886/231936 [02:11<01:26, 1106.44it/s][A[A[A


Progress::  59%|█████▊    | 135998/231936 [02:11<01:30, 1059.40it/s][A[A[A


Progress::  59%|█████▊    | 136105/231936 [02:11<01:31, 1053.06it/s][A[A[A


Progress::  59%|█████▊    | 136211/231936 [02:11<01:32, 1030.34it/s][A[A[A


Progress::  59%|█████▉    | 136318/23193

Progress::  63%|██████▎   | 146042/231936 [02:21<01:30, 949.83it/s][A[A[A


Progress::  63%|██████▎   | 146138/231936 [02:21<01:30, 952.13it/s][A[A[A


Progress::  63%|██████▎   | 146234/231936 [02:21<01:34, 910.33it/s][A[A[A


Progress::  63%|██████▎   | 146326/231936 [02:21<01:37, 879.96it/s][A[A[A


Progress::  63%|██████▎   | 146415/231936 [02:21<01:36, 881.85it/s][A[A[A


Progress::  63%|██████▎   | 146531/231936 [02:21<01:29, 949.38it/s][A[A[A


Progress::  63%|██████▎   | 146639/231936 [02:21<01:26, 982.08it/s][A[A[A


Progress::  63%|██████▎   | 146739/231936 [02:21<01:35, 892.03it/s][A[A[A


Progress::  63%|██████▎   | 146832/231936 [02:21<01:34, 902.44it/s][A[A[A


Progress::  63%|██████▎   | 146949/231936 [02:21<01:27, 968.03it/s][A[A[A


Progress::  63%|██████▎   | 147056/231936 [02:22<01:25, 995.05it/s][A[A[A


Progress::  63%|██████▎   | 147162/231936 [02:22<01:23, 1009.32it/s][A[A[A


Progress::  63%|██████▎   | 147265/231936 [02:22<01

Progress::  68%|██████▊   | 156589/231936 [02:31<01:19, 944.08it/s][A[A[A


Progress::  68%|██████▊   | 156685/231936 [02:32<01:22, 917.39it/s][A[A[A


Progress::  68%|██████▊   | 156780/231936 [02:32<01:21, 926.36it/s][A[A[A


Progress::  68%|██████▊   | 156885/231936 [02:32<01:18, 959.67it/s][A[A[A


Progress::  68%|██████▊   | 156989/231936 [02:32<01:16, 980.34it/s][A[A[A


Progress::  68%|██████▊   | 157088/231936 [02:32<01:17, 967.01it/s][A[A[A


Progress::  68%|██████▊   | 157193/231936 [02:32<01:15, 989.58it/s][A[A[A


Progress::  68%|██████▊   | 157293/231936 [02:32<01:15, 987.76it/s][A[A[A


Progress::  68%|██████▊   | 157393/231936 [02:32<01:20, 929.09it/s][A[A[A


Progress::  68%|██████▊   | 157487/231936 [02:32<01:23, 895.05it/s][A[A[A


Progress::  68%|██████▊   | 157589/231936 [02:32<01:20, 927.93it/s][A[A[A


Progress::  68%|██████▊   | 157693/231936 [02:33<01:17, 954.82it/s][A[A[A


Progress::  68%|██████▊   | 157803/231936 [02:33<01:

Progress::  72%|███████▏  | 167317/231936 [02:42<01:09, 931.14it/s][A[A[A


Progress::  72%|███████▏  | 167422/231936 [02:42<01:06, 963.03it/s][A[A[A


Progress::  72%|███████▏  | 167524/231936 [02:43<01:05, 978.29it/s][A[A[A


Progress::  72%|███████▏  | 167628/231936 [02:43<01:04, 992.68it/s][A[A[A


Progress::  72%|███████▏  | 167728/231936 [02:43<01:08, 935.73it/s][A[A[A


Progress::  72%|███████▏  | 167825/231936 [02:43<01:07, 944.75it/s][A[A[A


Progress::  72%|███████▏  | 167921/231936 [02:43<01:09, 915.84it/s][A[A[A


Progress::  72%|███████▏  | 168024/231936 [02:43<01:07, 946.77it/s][A[A[A


Progress::  72%|███████▏  | 168131/231936 [02:43<01:05, 976.50it/s][A[A[A


Progress::  73%|███████▎  | 168238/231936 [02:43<01:03, 1000.81it/s][A[A[A


Progress::  73%|███████▎  | 168339/231936 [02:43<01:04, 985.59it/s] [A[A[A


Progress::  73%|███████▎  | 168439/231936 [02:44<01:08, 931.12it/s][A[A[A


Progress::  73%|███████▎  | 168542/231936 [02:44<0

Progress::  77%|███████▋  | 178176/231936 [02:53<00:48, 1098.48it/s][A[A[A


Progress::  77%|███████▋  | 178290/231936 [02:53<00:51, 1042.27it/s][A[A[A


Progress::  77%|███████▋  | 178398/231936 [02:53<00:55, 957.27it/s] [A[A[A


Progress::  77%|███████▋  | 178498/231936 [02:54<00:56, 951.73it/s][A[A[A


Progress::  77%|███████▋  | 178603/231936 [02:54<00:54, 973.89it/s][A[A[A


Progress::  77%|███████▋  | 178703/231936 [02:54<00:54, 971.97it/s][A[A[A


Progress::  77%|███████▋  | 178811/231936 [02:54<00:53, 1001.62it/s][A[A[A


Progress::  77%|███████▋  | 178927/231936 [02:54<00:50, 1043.64it/s][A[A[A


Progress::  77%|███████▋  | 179033/231936 [02:54<00:50, 1044.10it/s][A[A[A


Progress::  77%|███████▋  | 179148/231936 [02:54<00:49, 1073.75it/s][A[A[A


Progress::  77%|███████▋  | 179257/231936 [02:54<00:49, 1070.89it/s][A[A[A


Progress::  77%|███████▋  | 179365/231936 [02:54<00:52, 993.16it/s] [A[A[A


Progress::  77%|███████▋  | 179466/231936 [

Progress::  81%|████████▏ | 188932/231936 [03:04<00:46, 926.31it/s][A[A[A


Progress::  82%|████████▏ | 189034/231936 [03:04<00:45, 949.24it/s][A[A[A


Progress::  82%|████████▏ | 189130/231936 [03:04<00:45, 945.96it/s][A[A[A


Progress::  82%|████████▏ | 189228/231936 [03:04<00:44, 953.90it/s][A[A[A


Progress::  82%|████████▏ | 189331/231936 [03:04<00:43, 974.20it/s][A[A[A


Progress::  82%|████████▏ | 189433/231936 [03:05<00:43, 984.62it/s][A[A[A


Progress::  82%|████████▏ | 189540/231936 [03:05<00:42, 1008.61it/s][A[A[A


Progress::  82%|████████▏ | 189642/231936 [03:05<00:42, 996.69it/s] [A[A[A


Progress::  82%|████████▏ | 189742/231936 [03:05<00:42, 983.27it/s][A[A[A


Progress::  82%|████████▏ | 189841/231936 [03:05<00:45, 922.86it/s][A[A[A


Progress::  82%|████████▏ | 189946/231936 [03:05<00:43, 956.37it/s][A[A[A


Progress::  82%|████████▏ | 190051/231936 [03:05<00:42, 982.49it/s][A[A[A


Progress::  82%|████████▏ | 190155/231936 [03:05<0

Progress::  87%|████████▋ | 200637/231936 [04:25<00:31, 1006.59it/s][A[A[A


Progress::  87%|████████▋ | 200743/231936 [04:26<00:30, 1011.28it/s][A[A[A


Progress::  87%|████████▋ | 200861/231936 [04:26<00:29, 1054.79it/s][A[A[A


Progress::  87%|████████▋ | 200970/231936 [04:26<00:29, 1045.12it/s][A[A[A


Progress::  87%|████████▋ | 201086/231936 [04:26<00:28, 1076.66it/s][A[A[A


Progress::  87%|████████▋ | 201198/231936 [04:26<00:28, 1082.64it/s][A[A[A


Progress::  87%|████████▋ | 201308/231936 [04:26<00:30, 1010.29it/s][A[A[A


Progress::  87%|████████▋ | 201420/231936 [04:26<00:29, 1039.62it/s][A[A[A


Progress::  87%|████████▋ | 201535/231936 [04:26<00:28, 1070.01it/s][A[A[A


Progress::  87%|████████▋ | 201644/231936 [04:26<00:30, 992.46it/s] [A[A[A


Progress::  87%|████████▋ | 201758/231936 [04:27<00:29, 1028.39it/s][A[A[A


Progress::  87%|████████▋ | 201863/231936 [04:27<00:34, 868.44it/s] [A[A[A


Progress::  87%|████████▋ | 201956/23193

Progress::  91%|█████████ | 211305/231936 [04:36<00:24, 857.59it/s][A[A[A


Progress::  91%|█████████ | 211401/231936 [04:36<00:23, 884.94it/s][A[A[A


Progress::  91%|█████████ | 211517/231936 [04:37<00:21, 951.71it/s][A[A[A


Progress::  91%|█████████ | 211623/231936 [04:37<00:20, 980.88it/s][A[A[A


Progress::  91%|█████████▏| 211726/231936 [04:37<00:20, 994.13it/s][A[A[A


Progress::  91%|█████████▏| 211827/231936 [04:37<00:20, 979.72it/s][A[A[A


Progress::  91%|█████████▏| 211933/231936 [04:37<00:19, 1002.10it/s][A[A[A


Progress::  91%|█████████▏| 212052/231936 [04:37<00:18, 1050.87it/s][A[A[A


Progress::  91%|█████████▏| 212159/231936 [04:37<00:18, 1049.64it/s][A[A[A


Progress::  92%|█████████▏| 212271/231936 [04:37<00:18, 1069.05it/s][A[A[A


Progress::  92%|█████████▏| 212379/231936 [04:37<00:18, 1061.91it/s][A[A[A


Progress::  92%|█████████▏| 212486/231936 [04:37<00:19, 1010.20it/s][A[A[A


Progress::  92%|█████████▏| 212588/231936 [04:

Progress::  96%|█████████▌| 222099/231936 [04:47<00:10, 966.07it/s][A[A[A


Progress::  96%|█████████▌| 222197/231936 [04:47<00:10, 969.86it/s][A[A[A


Progress::  96%|█████████▌| 222309/231936 [04:48<00:09, 1010.08it/s][A[A[A


Progress::  96%|█████████▌| 222415/231936 [04:48<00:09, 1021.79it/s][A[A[A


Progress::  96%|█████████▌| 222518/231936 [04:48<00:09, 1006.23it/s][A[A[A


Progress::  96%|█████████▌| 222637/231936 [04:48<00:08, 1052.80it/s][A[A[A


Progress::  96%|█████████▌| 222744/231936 [04:48<00:08, 1052.04it/s][A[A[A


Progress::  96%|█████████▌| 222850/231936 [04:48<00:08, 1035.02it/s][A[A[A


Progress::  96%|█████████▌| 222961/231936 [04:48<00:08, 1055.45it/s][A[A[A


Progress::  96%|█████████▌| 223071/231936 [04:48<00:08, 1064.72it/s][A[A[A


Progress::  96%|█████████▌| 223178/231936 [04:48<00:08, 1007.21it/s][A[A[A


Progress::  96%|█████████▋| 223286/231936 [04:48<00:08, 1025.69it/s][A[A[A


Progress::  96%|█████████▋| 223402/231936 

Unnamed: 0,reviewerID,asin,overall,excerpts,polarity
0,A2XQ5LZHTD4AFT,000100039X,5,"[There is much that hints at his birth place, ...",0.332292
1,A1NPNGWBVD9AK3,000100039X,5,[I believe that was my first taste of spiritua...,0.425
2,AWLFVCT9128JV,000100039X,5,[He died of cancer in a New York hospital at t...,0.133182
3,AFY0BT42DDYZV,000100039X,5,"[There is no political, religious, or commerci...",0.155729
4,A2ZZHMT58ZMVCZ,000100039X,5,[A local seeress who knows him best asks him t...,0.09658
