## 88 Project: Narrative Pace in "A Passage to India"

## Code Preparation

In [1]:
import numpy as np

In [2]:
import nltk

In [3]:
modules = ["averaged_perceptron_tagger", "maxent_ne_chunker", "punkt",\
           "words", "cmudict", "wordnet"]

for module in modules: 
    nltk.download(module)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Barbara Montano\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to C:\Users\Barbara
[nltk_data]     Montano\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\Barbara
[nltk_data]     Montano\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package words to C:\Users\Barbara
[nltk_data]     Montano\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package cmudict to C:\Users\Barbara
[nltk_data]     Montano\AppData\Roaming\nltk_data...
[nltk_data]   Package cmudict is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Barbara
[nltk_data]     Montano\AppData\Roaming\n

In [4]:
from nltk.tokenize import word_tokenize, sent_tokenize
from string import punctuation 

In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from urllib.request import urlopen 
from collections import Counter
from datascience import *
import re

def read_url(url): 
    return re.sub('\\s+', ' ', urlopen(url).read().decode())

## Data Preparation

In [6]:
with open('APassageToIndiaText.txt', 'r') as file_in: #with open opens and closes the file #file_in = variable
    passage_string = file_in.read()

In [7]:
passage_tokens = passage_string.split()
passage_tokens

['A',
 'PASSAGE',
 'TO',
 'INDIA',
 'PART',
 'I:',
 'MOSQUE',
 'CHAPTER',
 'I',
 'Except',
 'for',
 'the',
 'Marabar',
 'Cavesâ€”and',
 'they',
 'are',
 'twenty',
 'miles',
 'offâ€”the',
 'city',
 'of',
 'Chandrapore',
 'presents',
 'nothing',
 'extraordinary.',
 'Edged',
 'rather',
 'than',
 'washed',
 'by',
 'the',
 'river',
 'Ganges,',
 'it',
 'trails',
 'for',
 'a',
 'couple',
 'of',
 'miles',
 'along',
 'the',
 'bank,',
 'scarcely',
 'distinguishable',
 'from',
 'the',
 'rubbish',
 'it',
 'deposits',
 'so',
 'freely.',
 'There',
 'are',
 'no',
 'bathing-steps',
 'on',
 'the',
 'river',
 'front,',
 'as',
 'the',
 'Ganges',
 'happens',
 'not',
 'to',
 'be',
 'holy',
 'here;',
 'indeed',
 'there',
 'is',
 'no',
 'river',
 'front,',
 'and',
 'bazaars',
 'shut',
 'out',
 'the',
 'wide',
 'and',
 'shifting',
 'panorama',
 'of',
 'the',
 'stream.',
 'The',
 'streets',
 'are',
 'mean,',
 'the',
 'temples',
 'ineffective,',
 'and',
 'though',
 'a',
 'few',
 'fine',
 'houses',
 'exist',
 't

In [8]:
import string
passage_string = passage_string.lower()
passage_tokens = "".join([char for char in passage_string if char not in string.punctuation]).split()
passage_tokens

['a',
 'passage',
 'to',
 'india',
 'part',
 'i',
 'mosque',
 'chapter',
 'i',
 'except',
 'for',
 'the',
 'marabar',
 'cavesâ€”and',
 'they',
 'are',
 'twenty',
 'miles',
 'offâ€”the',
 'city',
 'of',
 'chandrapore',
 'presents',
 'nothing',
 'extraordinary',
 'edged',
 'rather',
 'than',
 'washed',
 'by',
 'the',
 'river',
 'ganges',
 'it',
 'trails',
 'for',
 'a',
 'couple',
 'of',
 'miles',
 'along',
 'the',
 'bank',
 'scarcely',
 'distinguishable',
 'from',
 'the',
 'rubbish',
 'it',
 'deposits',
 'so',
 'freely',
 'there',
 'are',
 'no',
 'bathingsteps',
 'on',
 'the',
 'river',
 'front',
 'as',
 'the',
 'ganges',
 'happens',
 'not',
 'to',
 'be',
 'holy',
 'here',
 'indeed',
 'there',
 'is',
 'no',
 'river',
 'front',
 'and',
 'bazaars',
 'shut',
 'out',
 'the',
 'wide',
 'and',
 'shifting',
 'panorama',
 'of',
 'the',
 'stream',
 'the',
 'streets',
 'are',
 'mean',
 'the',
 'temples',
 'ineffective',
 'and',
 'though',
 'a',
 'few',
 'fine',
 'houses',
 'exist',
 'they',
 'are'

In [9]:
from collections import Counter
passage_dict = Counter(passage_tokens)
passage_dict

Counter({'flareup': 1,
         'quivers': 1,
         'allâ€”': 1,
         'are': 368,
         'absorbed': 1,
         'evil': 23,
         'presence': 9,
         'dimensions': 1,
         'ache': 1,
         'jats': 1,
         'counterpoises': 1,
         'rebuke': 1,
         'nearest': 3,
         'sentimentally': 1,
         'revenge': 5,
         'prove': 8,
         'heroes': 1,
         'sweetest': 1,
         'debauched': 1,
         'evils': 1,
         'husband': 13,
         'azizs': 1,
         'howled': 3,
         'mans': 7,
         'classic': 1,
         'inevitably': 4,
         'rains': 4,
         'wear': 5,
         'christmas': 1,
         'reappeared': 2,
         'cautious': 5,
         'co': 1,
         'bethought': 1,
         'nowhere': 5,
         'quarrels': 3,
         'blithering': 1,
         'yourselves': 3,
         'striped': 1,
         'thrown': 10,
         'nested': 1,
         'cliff': 2,
         'porridge': 2,
         'educate': 3,
       

In [10]:
passage_dict.keys()



In [11]:
passage_dict.values()

dict_values([1, 2, 2, 1, 1, 368, 43, 23, 1, 3, 1, 21, 1, 2, 5, 1, 1, 1, 14, 1, 2, 1, 1, 3, 1, 3, 3, 1, 5, 11, 8, 1, 1, 7, 1, 1, 36, 1, 1, 3, 1, 1, 13, 1, 2, 1, 3, 1, 7, 1, 1, 11, 1, 4, 2, 1, 4, 5, 11, 1, 2, 1, 2, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 9, 2, 3, 1, 59, 1, 2, 4, 10, 1, 2, 1, 21, 87, 1, 1, 2, 1, 1, 1, 7, 3, 1, 1, 1, 51, 1, 1, 1, 1, 56, 6, 2, 1, 23, 2, 2, 5, 18, 1, 1, 1, 1, 9, 1, 2, 3, 3, 1, 1, 4, 2, 3, 1, 7, 1, 8, 1, 3, 1, 1, 1, 2, 1, 5, 6, 72, 1, 1, 1, 1, 7, 7, 1, 2, 24, 1, 7, 1, 1, 1, 2, 5, 1, 4, 1, 52, 2, 1, 6, 25, 1, 3, 1, 1, 1, 1, 1, 2, 2, 1, 244, 22, 1, 2, 2, 3, 1, 2, 1, 1, 1, 1, 2, 3, 1, 2, 14, 1, 1, 1, 10, 1, 7, 1, 1, 1, 1, 1, 1, 6, 1, 1, 9, 1, 7, 1, 7, 1, 1, 1, 2, 4, 4, 1, 1, 1, 3, 1, 4, 2, 1, 1, 2, 5, 7, 5, 3, 101, 5, 1, 1, 1, 7, 1, 1, 3, 7, 1, 2, 8, 1, 1, 5, 28, 5, 2, 3, 1, 1, 1, 12, 12, 1, 3, 3, 1, 1, 2, 8, 4, 1, 1, 1, 2, 4, 3, 5, 1, 1, 25, 1, 3, 7, 1, 272, 18, 1, 1, 4, 7, 1, 13, 1, 37, 38, 1, 6, 19, 1, 1, 50, 2, 3, 1, 1, 1, 5, 3, 1, 1, 5, 10, 1, 3, 6, 2, 5, 7, 3, 2, 1,

In [12]:
passage_dict.most_common(2)

[('the', 5410), ('and', 3059)]

In [13]:
len(passage_tokens) / len(passage_dict.keys())
#Lexical diversity of the novel! 

10.253432319739652

## Counting Sentiment Words

In [14]:
with open('PositiveWordstext.txt') as file_in: 
    positive_sentiment_words = file_in.read()
positive_words = positive_sentiment_words.split()
positive_dict = Counter(positive_words)
positive_keys = positive_dict.keys()
positive_keys

dict_keys(['prudence', 'afordable', 'constructive', 'genuine', 'brave', 'amenable', 'peach', 'satisfying', 'impartiality', 'examplar', 'magic', 'resolute', 'amaze', 'easy', 'sincerely', 'steadfastness', 'fortuitous', 'eminence', 'remission', 'trusted', 'admirably', 'unwavering', 'eloquent', 'amusingly', 'tranquility', 'self-satisfaction', 'affordably', 'well-wishers', 'sensible', 'adoringly', 'heroize', 'celebration', 'inpressed', 'tingle', 'competitive', 'terrifically', 'pleasurably', 'ennoble', 'valor', 'comfortably', 'liberation', 'neat', 'classic', 'favored', 'polished', 'persevere', 'peacekeepers', 'enjoyably', 'mesmerizingly', 'outperform', 'skilled', 'delectable', 'spontaneous', 'avidly', 'astonishment', 'halcyon', 'geekier', 'earnestly', 'comfy', 'enviousness', 'attractive', 'astound', 'accommodative', 'respectfully', 'wholeheartedly', 'kindliness', 'reforms', 'progressive', 'long-lasting', 'prodigious', 'fastest-growing', 'fondness', 'outstanding', 'warm', 'calming', 'defeats'

In [15]:
with open('NegativeWordstext.txt') as file_in: 
    negative_sentiment_words = file_in.read()
negative_words = negative_sentiment_words.split()
negative_dict = Counter(negative_words)
negative_keys = negative_dict.keys()
negative_keys



In [None]:
#Need to break up text, compare each segment to list of positive or negative words, then count how many positive or negative words
#are in the list, then map that across the text

In [18]:
#Segment by chapter: 
passage_list = passage_string.split('chapter')
passage_array = np.array(passage_list)

In [19]:
column_names = ['Chapter ' + str(i+1) for i in range(len(passage_array))]
column_names
column_rows = [i for i in passage_list]
column_rows
Table().with_column('Chapter', column_names).with_column('Segment', column_rows)

Chapter,Segment
Chapter 1,a passage to india part i: mosque
Chapter 2,i except for the marabar cavesâ€”and they are twenty ...
Chapter 3,"ii abandoning his bicycle, which fell before a servan ..."
Chapter 4,iii the third act of cousin kate_ was well advanced b ...
Chapter 5,iv the collector kept his word. next day he issued in ...
Chapter 6,v the bridge party was not a successâ€”at least it wa ...
Chapter 7,vi ! aziz had not gone to the bridge party. immediate ...
Chapter 8,vii this mr. fielding had been caught by india late. ...
Chapter 9,viii although miss quested had known ronny well in en ...
Chapter 10,ix aziz fell ill as he foretoldâ€”slightly ill. thr ...


In [27]:
#Example of old code: 
#sentence_list = passage_string.split('.')
#len(sentence_list)
#sentence_array = np.array(sentence_list)
#sentence_array

In [20]:
#Segment by sentence: 
sentence_list = sent_tokenize(passage_string)
sentence_list

['a passage to india \npart i: mosque \nchapter i \n\nexcept for the marabar cavesâ€”and they are twenty miles offâ€”the city of chandrapore presents \nnothing extraordinary.',
 'edged rather than washed by the river ganges, it trails for a couple \nof miles along the bank, scarcely distinguishable from the rubbish it deposits so freely.',
 'there \nare no bathing-steps on the river front, as the ganges happens not to be holy here; indeed \nthere is no river front, and bazaars shut out the wide and shifting panorama of the stream.',
 'the \nstreets are mean, the temples ineffective, and though a few fine houses exist they are hidden \naway in gardens or down alleys whose filth deters all but the invited guest.',
 'chandrapore was \nnever large or beautiful, but two hundred years ago it lay on the road between upper india, \nthen imperial, and the sea, and the fine houses date from that period.',
 'the zest for decoration \nstopped in the eighteenth century, nor was it ever democratic.'

In [22]:
sentence_number = ['Sentence ' + str(i+1) for i in range(len(sentence_list))]
sentence_number
sentence_text = [i for i in sentence_list]
sentence_text
sentence_table = Table().with_column('Sentence Number', sentence_number).with_column('Sentence Text', sentence_text)
sentence_table

Sentence Number,Sentence Text
Sentence 1,a passage to india part i: mosque chapter i except f ...
Sentence 2,"edged rather than washed by the river ganges, it trails ..."
Sentence 3,"there are no bathing-steps on the river front, as the g ..."
Sentence 4,"the streets are mean, the temples ineffective, and thou ..."
Sentence 5,"chandrapore was never large or beautiful, but two hundr ..."
Sentence 6,the zest for decoration stopped in the eighteenth centu ...
Sentence 7,there is no painting and scarcely any carving in the ba ...
Sentence 8,"the very wood seems made of mud, the inhabitants of mud ..."
Sentence 9,"so abased, so monotonous is everything that meets the ey ..."
Sentence 10,"houses do fall, people are drowned and left rotting, bu ..."


In [23]:
#Need a function that takes in a string of words. If a word in that string is in the negative words list, return that word. 
#Then do this for positive words
#Add this to the table so that there is another column of positive and negative words in each string. 
#Then, count the number of words in that column and put it into a new column. 
def negative(string): 
    word_list = []
    for i in string.split(): 
        if i in negative_keys: 
            word_list.append(i)
    return word_list
negative('an ugly dirty ugly bad dog')

['ugly', 'dirty', 'ugly', 'bad']

In [24]:
def positive(string): 
    word_list = []
    for i in string.split():
        if i in positive_keys:
            word_list.append(i)
    return word_list
positive('a beautiful happy happy wonderful dog')

['beautiful', 'happy', 'happy', 'wonderful']

In [25]:
negative_words = sentence_table.apply(negative, 'Sentence Text')
positive_words = sentence_table.apply(positive, 'Sentence Text')
almost_full_table = sentence_table.with_column('Negative Words', negative_words).with_column('Positive Words', positive_words)
almost_full_table

Sentence Number,Sentence Text,Negative Words,Positive Words
Sentence 1,a passage to india part i: mosque chapter i except f ...,[],[]
Sentence 2,"edged rather than washed by the river ganges, it trails ...","['scarcely', 'rubbish']",[]
Sentence 3,"there are no bathing-steps on the river front, as the g ...",[],['holy']
Sentence 4,"the streets are mean, the temples ineffective, and thou ...",['filth'],['fine']
Sentence 5,"chandrapore was never large or beautiful, but two hundr ...",[],['fine']
Sentence 6,the zest for decoration stopped in the eighteenth centu ...,[],['zest']
Sentence 7,there is no painting and scarcely any carving in the ba ...,['scarcely'],[]
Sentence 8,"the very wood seems made of mud, the inhabitants of mud ...",[],[]
Sentence 9,"so abased, so monotonous is everything that meets the ey ...",['monotonous'],[]
Sentence 10,"houses do fall, people are drowned and left rotting, bu ...",['swelling'],['like']


In [26]:
negative_lengths = [len(i) for i in almost_full_table.column('Negative Words')]
negative_lengths
positive_lengths = [len(i) for i in almost_full_table.column('Positive Words')]
positive_lengths
total_count = np.array(positive_lengths) + np.array(negative_lengths)
total_count

array([0, 2, 1, ..., 0, 0, 0])

In [31]:
nearly_there = almost_full_table.with_column('Negative Count', 
    negative_lengths).with_column('Positive Count', positive_lengths).with_column('Total', total_count)
nearly_there

Sentence Number,Sentence Text,Negative Words,Positive Words,Negative Count,Positive Count,Total
Sentence 1,a passage to india part i: mosque chapter i except f ...,[],[],0,0,0
Sentence 2,"edged rather than washed by the river ganges, it trails ...","['scarcely', 'rubbish']",[],2,0,2
Sentence 3,"there are no bathing-steps on the river front, as the g ...",[],['holy'],0,1,1
Sentence 4,"the streets are mean, the temples ineffective, and thou ...",['filth'],['fine'],1,1,2
Sentence 5,"chandrapore was never large or beautiful, but two hundr ...",[],['fine'],0,1,1
Sentence 6,the zest for decoration stopped in the eighteenth centu ...,[],['zest'],0,1,1
Sentence 7,there is no painting and scarcely any carving in the ba ...,['scarcely'],[],1,0,1
Sentence 8,"the very wood seems made of mud, the inhabitants of mud ...",[],[],0,0,0
Sentence 9,"so abased, so monotonous is everything that meets the ey ...",['monotonous'],[],1,0,1
Sentence 10,"houses do fall, people are drowned and left rotting, bu ...",['swelling'],['like'],1,1,2


In [33]:
sentence_length = [len(i) for i in almost_full_table.column('Sentence Text')]
percentage = total_count / sentence_length
percentage

array([ 0.        ,  0.01257862,  0.00526316, ...,  0.        ,
        0.        ,  0.        ])

In [43]:
sentiment_table = nearly_there.with_column('Percentage', percentage)
sentiment_table

Sentence Number,Sentence Text,Negative Words,Positive Words,Negative Count,Positive Count,Total,Percentage
Sentence 1,a passage to india part i: mosque chapter i except f ...,[],[],0,0,0,0.0
Sentence 2,"edged rather than washed by the river ganges, it trails ...","['scarcely', 'rubbish']",[],2,0,2,0.0125786
Sentence 3,"there are no bathing-steps on the river front, as the g ...",[],['holy'],0,1,1,0.00526316
Sentence 4,"the streets are mean, the temples ineffective, and thou ...",['filth'],['fine'],1,1,2,0.0113636
Sentence 5,"chandrapore was never large or beautiful, but two hundr ...",[],['fine'],0,1,1,0.00555556
Sentence 6,the zest for decoration stopped in the eighteenth centu ...,[],['zest'],0,1,1,0.0114943
Sentence 7,there is no painting and scarcely any carving in the ba ...,['scarcely'],[],1,0,1,0.016129
Sentence 8,"the very wood seems made of mud, the inhabitants of mud ...",[],[],0,0,0,0.0
Sentence 9,"so abased, so monotonous is everything that meets the ey ...",['monotonous'],[],1,0,1,0.00641026
Sentence 10,"houses do fall, people are drowned and left rotting, bu ...",['swelling'],['like'],1,1,2,0.0111732


In [44]:
sentiment_table.show(500)

Sentence Number,Sentence Text,Negative Words,Positive Words,Negative Count,Positive Count,Total,Percentage
Sentence 1,a passage to india part i: mosque chapter i except f ...,[],[],0,0,0,0.0
Sentence 2,"edged rather than washed by the river ganges, it trails ...","['scarcely', 'rubbish']",[],2,0,2,0.0125786
Sentence 3,"there are no bathing-steps on the river front, as the g ...",[],['holy'],0,1,1,0.00526316
Sentence 4,"the streets are mean, the temples ineffective, and thou ...",['filth'],['fine'],1,1,2,0.0113636
Sentence 5,"chandrapore was never large or beautiful, but two hundr ...",[],['fine'],0,1,1,0.00555556
Sentence 6,the zest for decoration stopped in the eighteenth centu ...,[],['zest'],0,1,1,0.0114943
Sentence 7,there is no painting and scarcely any carving in the ba ...,['scarcely'],[],1,0,1,0.016129
Sentence 8,"the very wood seems made of mud, the inhabitants of mud ...",[],[],0,0,0,0.0
Sentence 9,"so abased, so monotonous is everything that meets the ey ...",['monotonous'],[],1,0,1,0.00641026
Sentence 10,"houses do fall, people are drowned and left rotting, bu ...",['swelling'],['like'],1,1,2,0.0111732


In [48]:
#At first glance, looking at a very small portion of the text, it seems that sentiment words decline around quotation. I'll be 
#interested to see how this relates to narrative and description! 

In [49]:
#Next Steps: 
#Plot out the points! 
#Do the same anaylsis and mapping for concrete and abstract works
#Study the maps, compare the maps, see what inferences can be drawn
#Look at outliers, those areas that deviate pretty strongly from the average 