In [9]:
from collections import Counter, defaultdict
import re

# Non-speaking lines don't start with spaces, all speaking lines have some spaces. (2 or 4 usually)
non_speaking_pattern = '^\S'

# Stage directions appear inside speaking lines in square braces.
stage_direction_pattern = '\[.*?\]'

# Stage directions also sometimes indicated by more than 4 spaces at the start
alt_stage_direction = '^ {5,}'

# The start of the line, then two spaces, then a shortened version of the name, then a period, then a space
new_speaker_pattern = '^  ([A-Z1-9][a-z]* ?[A-Za-z]*?)\. '
current_speaker = ''
lines_by_speaker = Counter()
word_counts_by_speaker = defaultdict(Counter)

with open('book-texts/romeo-and-juliet-no-header-footer.txt', 'r') as rj_reader:
    line = 'will be ignored...'

    while line != '':
        line = rj_reader.readline()
        # Don't bother counting non-speaking lines
        if re.search(non_speaking_pattern, line) or re.search(alt_stage_direction, line):
            print("skipped", line)
            continue

        # There are other non-speaking lines, if the first nonwhitespace is "Enter" or "Exit"
        # these are stage directions.
        stripped_line = line.strip()
        if stripped_line.startswith("Enter") or stripped_line.startswith("Exit"):
            print("skipped", line)
            continue

        # Now we want to check if the current speaker has changed.
        match = re.search(new_speaker_pattern, line)
        if match:
            current_speaker = match.group(1)
            
            # We also don't want to count the names as spoken words.
            line_without_speaker = re.sub(new_speaker_pattern, '', line)
            print("speaker change: ", current_speaker, '\n', line_without_speaker)
        else:
            line_without_speaker = line # bit of a hack honestly.
            
           
        # After all these checks, we believe we have a speaking line.
        # But we want to remove any stage directions from it.
        if re.search(stage_direction_pattern, line_without_speaker):
            minus_stage_directions = re.sub(stage_direction_pattern, '', line_without_speaker)
            print("removed stage dir", line_without_speaker, minus_stage_directions)
        else:
            minus_stage_directions = line_without_speaker # another similar hack
        
        # If we are here, we know it's a speaking line, so lets update the count and words
        lines_by_speaker[current_speaker] += 1
        
        words_in_this_line = minus_stage_directions.strip().split()
        for word in words_in_this_line:
            word_counts_by_speaker[current_speaker][word] += 1

OFFENDER ['This', 'is', 'not', 'Romeo,', "he's", 'some', 'other', 'where.']     This is not Romeo, he's some other where.
     This is not Romeo, he's some other where.
 Rom
OFFENDER ['Nay,', 'sit,', 'nay,', 'sit,', 'good', 'cousin', 'Capulet,']     Nay, sit, nay, sit, good cousin Capulet,
     Nay, sit, nay, sit, good cousin Capulet,
 Cap
OFFENDER ['Henceforth', 'I', 'never', 'will', 'be', 'Romeo.']     Henceforth I never will be Romeo.
     Henceforth I never will be Romeo.
 Rom
OFFENDER ['I', 'can', 'tell', 'you;', 'but', 'young', 'Romeo', 'will', 'be', 'older', 'when', 'you'] I can tell you; but young Romeo will be older when you
 I can tell you; but young Romeo will be older when you
 Rom
OFFENDER ['This', 'is', 'the', 'truth,', 'or', 'let', 'Benvolio', 'die.']     This is the truth, or let Benvolio die.
     This is the truth, or let Benvolio die.
 Ben
OFFENDER ['Is', 'father,', 'mother,', 'Tybalt,', 'Romeo,', 'Juliet,']     Is father, mother, Tybalt, Romeo, Juliet,
     Is fathe

In [11]:
for speaker, lines in lines_by_speaker.most_common():
    print(f'{speaker} had {lines} lines.')
    print('  In those lines, they said these words...')
    for word, count in word_counts_by_speaker[speaker].most_common():
        print('    ', word, count)
        

Rom had 809 lines.
  In those lines, they said these words...
     I 127
     the 98
     and 83
     to 80
     my 74
     of 72
     that 63
     in 63
     a 63
     is 61
     me 52
     thou 50
     with 42
     And 41
     not 40
     thy 38
     for 29
     be 29
     this 29
     it 27
     her 26
     thee 26
     but 24
     love 20
     so 19
     have 19
     more 18
     O, 18
     from 18
     O 17
     What 17
     will 17
     am 16
     do 16
     A 16
     what 15
     no 15
     as 15
     But 15
     she 14
     The 14
     I'll 14
     That 14
     his 13
     may 13
     For 12
     To 12
     upon 12
     shall 11
     This 11
     too 11
     Thou 11
     than 11
     on 11
     As 11
     dear 11
     was 10
     tell 10
     at 10
     hath 10
     all 10
     by 10
     our 10
     It 10
     their 10
     My 10
     How 10
     Is 9
     love, 9
     With 9
     In 9
     when 9
     an 9
     must 9
     such 8
     mine 8
     man 8
     She 8
     By 8
  

     name. 1
     silver-sweet 1
     sound 1
     tongues 1
     softest 1
     music 1
     attending 1
     ears! 1
     dear? 1
     nine. 1
     forget, 1
     Forgetting 1
     any 1
     home 1
     bird. 1
     Sleep 1
     dwell 1
     peace 1
     breast! 1
     rest! 1
     father's 1
     cell, 1
     His 1
     crave 1
     hap 1
     tell. 1
     morrow, 1
     true-the 1
     sweeter 1
     Rosaline, 1
     father? 1
     No. 1
     forgot 1
     ere 1
     enemy, 1
     wounded 1
     That's 1
     wounded. 1
     remedies 1
     physic 1
     lies. 1
     hatred, 1
     man, 1
     for, 1
     lo, 1
     intercession 1
     likewise 1
     steads 1
     foe. 1
     plainly 1
     daughter 1
     Capulet; 1
     combin'd, 1
     save 1
     combine 1
     marriage. 1
     When, 1
     where, 1
     We 1
     met, 1
     woo'd, 1
     vow, 1
     pass; 1
     pray, 1
     consent 1
     marry 1
     to-day. 1
     chid'st 1
     Rosaline. 1
     bad'st 1
     pray 1
    

     how 5
     upon 5
     them 5
     day 5
     bid 5
     I, 5
     Tybalt 5
     Tybalt's 5
     ever 5
     so, 5
     let 5
     Madam, 4
     pray 4
     mine 4
     hands 4
     holy 4
     Come 4
     A 4
     Romeo! 4
     Romeo? 4
     name 4
     nor 4
     other 4
     that, 4
     death, 4
     see 4
     prove 4
     swear 4
     night! 4
     This 4
     again. 4
     hear 4
     lord 4
     beseech 4
     With 4
     years 4
     did 4
     poor 4
     cannot 4
     long 4
     news 4
     their 4
     night; 4
     heaven 4
     earth, 4
     Upon 4
     him! 4
     husband 4
     weep 4
     Some 4
     mother, 4
     die 4
     into 4
     help 4
     here. 3
     too, 3
     I. 3
     honour 3
     Than 3
     Ay, 3
     lips 3
     though 3
     sin 3
     nurse. 3
     even 3
     me! 3
     Or, 3
     'Tis 3
     hand, 3
     any 3
     were 3
     dear 3
     Take 3
     words 3
     find 3
     thee. 3
     take 3
     At 3
     gentle 3
     been 3
     ere 

     banishment. 1
     cords. 1
     Poor 1
     ropes, 1
     beguil'd, 1
     Both 1
     exil'd. 1
     highway 1
     maid, 1
     maiden-widowed. 1
     cords; 1
     maidenhead! 1
     ring 1
     knight 1
     last 1
     Wilt 1
     near 1
     day. 1
     nightingale, 1
     lark, 1
     pierc'd 1
     hollow 1
     thine 1
     ear. 1
     Nightly 1
     pomegranate 1
     tree. 1
     Believe 1
     nightingale. 1
     Yond 1
     daylight; 1
     it, 1
     meteor 1
     exhales 1
     torchbearer 1
     way 1
     Mantua. 1
     need'st 1
     gone. 1
     is, 1
     is! 1
     tune, 1
     Straining 1
     harsh 1
     discords 1
     unpleasing 1
     sharps. 1
     makes 1
     division; 1
     divideth 1
     us. 1
     toad 1
     voices 1
     Since 1
     us 1
     affray, 1
     Hunting 1
     hence 1
     hunt's-up 1
     day! 1
     gone! 1
     More 1
     grows. 1
     Nurse? 1
     window, 1
     life 1
     out. 1
     gone 1
     friend? 1
     hour, 1
    

     to-night. 1
     God 1
     Wast 1
     Rosaline? 1
     son! 1
     then? 1
     plain, 1
     homely 1
     drift 1
     Riddling 1
     confession 1
     finds 1
     riddling 1
     shrift. 1
     Francis! 1
     change 1
     here! 1
     Rosaline, 1
     didst 1
     dear, 1
     forsaken? 1
     men's 1
     truly 1
     hearts, 1
     eyes. 1
     Jesu 1
     Maria! 1
     deal 1
     brine 1
     sallow 1
     Rosaline! 1
     salt 1
     water 1
     thrown 1
     waste, 1
     season 1
     taste! 1
     sighs 1
     clears, 1
     groans 1
     ring 1
     mine 1
     Lo, 1
     cheek 1
     stain 1
     sit 1
     tear 1
     yet. 1
     e'er 1
     thine, 1
     were 1
     Rosaline. 1
     chang'd? 1
     Pronounce 1
     sentence 1
     then: 1
     Women 1
     there's 1
     men. 1
     doting, 1
     loving, 1
     pupil 1
     mine. 1
     grave 1
     in, 1
     another 1
     have. 1
     read 1
     rote, 1
     spell. 1
     young 1
     waverer, 1
     res

     bless 2
     Hark 2
     sir. 2
     ne'er 2
     toad, 2
     pale 2
     other 2
     hath 2
     Peter, 2
     stay 2
     give 2
     Fie, 2
     had! 2
     man. 2
     Though 2
     excels 2
     him, 2
     head 2
     back 2
     ah, 2
     Beshrew 2
     love 2
     honest 2
     Where 2
     Lady 2
     shrift 2
     hie 2
     Laurence' 2
     comes 2
     soon 2
     night. 2
     cell. 2
     Alack 2
     gone, 2
     saw 2
     piteous 2
     All 2
     Tybalt, 2
     best 2
     Tybalt 2
     kill'd 2
     did! 2
     day, 2
     where's 2
     these 2
     chamber. 2
     is. 2
     holy 2
     lady's 2
     up! 2
     sake, 2
     sir! 2
     falls 2
     down 2
     Exit. 2
     madam, 2
     bed! 2
     madam! 2
     County 2
     wake 2
     woful, 2
     put 2
     maidenhead 1
     twelve 1
     year 1
     old, 1
     bade 1
     ladybird! 1
     forbid! 1
     Where's 1
     girl? 1
     mother. 1
     age 1
     unto 1
     hour. 1
     fourteen 1
     tee

     ne'er 2
     Sirrah, 2
     much 2
     County. 2
     up. 2
     should 2
     go, 2
     fetch 2
     Now, 2
     nurse, 2
     to-morrow. 2
     well, 2
     County 2
     stir, 2
     jealous 2
     Montague, 2
     daughter's 2
     noise 1
     Give 1
     Old 1
     flourishes 1
     blade 1
     spite 1
     I, 1
     penalty 1
     alike; 1
     hard, 1
     think, 1
     peace. 1
     saying 1
     before: 1
     stranger 1
     world, 1
     fourteen 1
     years; 1
     summers 1
     wither 1
     pride 1
     Ere 1
     ripe 1
     bride. 1
     soon 1
     marr'd 1
     made. 1
     earth 1
     swallowed 1
     hopes 1
     she; 1
     hopeful 1
     lady 1
     earth. 1
     woo 1
     heart; 1
     part. 1
     agree, 1
     within 1
     scope 1
     choice 1
     Lies 1
     according 1
     voice. 1
     hold 1
     accustom'd 1
     feast, 1
     Whereto 1
     invited 1
     guest, 1
     store, 1
     One 1
     number 1
     more. 1
     poor 1
     behold

     whole 2
     Thy 2
     wit 2
     here's 2
     well 2
     hide 2
     come 2
     indeed 2
     Good 2
     upon 2
     bawd, 2
     hare, 2
     Farewell, 2
     lady. 2
     sword 2
     soon 2
     quarrel 2
     hair 2
     because 2
     head 2
     full 2
     egg 2
     yet 2
     before 2
     word 2
     us? 2
     Zounds, 2
     Marry, 2
     Your 2
     man. 2
     villain, 2
     gentle 1
     lover. 1
     Borrow 1
     Cupid's 1
     soar 1
     above 1
     common 1
     bound. 1
     sink 1
     burthen 1
     love- 1
     Too 1
     oppression 1
     tender 1
     thing. 1
     you, 1
     love. 1
     Prick 1
     pricking, 1
     beat 1
     Give 1
     put 1
     visage 1
     visor 1
     visor! 1
     curious 1
     doth 1
     quote 1
     deformities? 1
     Here 1
     beetle 1
     brows 1
     blush 1
     Tut! 1
     dun's 1
     constable's 1
     own 1
     word! 1
     Dun, 1
     we'll 1
     draw 1
     mire 1
     sir-reverence 1
     love, 1
 

     fighting 1
     approach. 1
     drew 1
     them. 1
     instant 1
     came 1
     fiery 1
     sword 1
     prepar'd; 1
     Which, 1
     breath'd 1
     defiance 1
     ears, 1
     swung 1
     about 1
     head 1
     cut 1
     winds, 1
     nothing 1
     hurt 1
     withal, 1
     hiss'd 1
     scorn. 1
     While 1
     interchanging 1
     thrusts 1
     blows, 1
     Came 1
     more 1
     more, 1
     fought 1
     part, 1
     Till 1
     came, 1
     parted 1
     part. 1
     Madam, 1
     before 1
     worshipp'd 1
     sun 1
     Peer'd 1
     forth 1
     golden 1
     window 1
     East, 1
     troubled 1
     mind 1
     drave 1
     walk 1
     abroad; 1
     Where, 1
     grove 1
     sycamore 1
     westward 1
     rooteth 1
     city's 1
     side, 1
     early 1
     walking 1
     see 1
     son. 1
     Towards 1
     made; 1
     ware 1
     stole 1
     into 1
     covert 1
     wood. 1
     I- 1
     measuring 1
     affections 1
     own, 1
     so

     tears, 1
     Which, 1
     too 1
     minded 1
     herself 1
     alone, 1
     May 1
     from 1
     society. 1
     Now 1
     know 1
     reason 1
     Happily 1
     met, 1
     lady 1
     wife! 1
     may 1
     be, 1
     love, 1
     next. 1
     Come 1
     make 1
     confession 1
     father? 1
     deny 1
     him 1
     ye, 1
     sure, 1
     Poor 1
     soul, 1
     abus'd 1
     Thou 1
     wrong'st 1
     more 1
     report. 1
     Thy 1
     mine, 1
     hast 1
     sland'red 1
     God 1
     shield 1
     should 1
     disturb 1
     devotion! 1
     Juliet, 1
     early 1
     rouse 1
     ye. 1
     Till 1
     then, 1
     adieu, 1
     holy 1
     kiss. 1
     Exit. 1
     Have 1
     thought 1
     long 1
     see 1
     morning's 1
     face, 1
     such 1
     sight 1
     this? 1
     Beguil'd, 1
     divorced, 1
     wronged, 1
     spited, 1
     Most 1
     detestable 1
     Death, 1
     beguil'd, 1
     By 1
     quite 1
     overthrown! 1
     

     learn 2
     friend; 2
     will 2
     As 2
     but 2
     would 2
     give 2
     thou 2
     thy 2
     To 2
     true 2
     what 2
     her 2
     that 2
     Thou 1
     villain 1
     Capulet!- 1
     Hold 1
     not, 1
     let 1
     go. 1
     Who 1
     ancient 1
     quarrel 1
     new 1
     abroach? 1
     Speak, 1
     nephew, 1
     were 1
     you 1
     when 1
     began? 1
     Many 1
     morning 1
     there 1
     been 1
     seen, 1
     With 1
     tears 1
     augmenting 1
     fresh 1
     morning's 1
     dew, 1
     Adding 1
     more 1
     deep 1
     sighs; 1
     all 1
     soon 1
     all-cheering 1
     sun 1
     Should 1
     furthest 1
     East 1
     bean 1
     draw 1
     shady 1
     curtains 1
     Aurora's 1
     bed, 1
     Away 1
     light 1
     steals 1
     home 1
     heavy 1
     son 1
     private 1
     chamber 1
     pens 1
     himself, 1
     Shuts 1
     up 1
     windows, 1
     locks 1
     fair 1
     daylight 1
     m

     No, 1
     not 1
     till 1
     Thursday. 1
     There 1
     enough. 1
     We 1
     shall 1
     be 1
     short 1
     in 1
     our 1
     provision. 1
     'Tis 1
     now 1
     near 1
     What, 1
     are 1
     busy, 1
     ho? 1
     Need 1
     help? 1
     Good 1
     Get 1
     thee 1
     bed, 1
     rest; 1
     for 1
     thou 1
     hast 1
     need. 1
     noise 1
     here? 1
     matter? 1
     me, 1
     me! 1
     My 1
     only 1
     life! 1
     Revive, 1
     look 1
     up, 1
     or 1
     I 1
     will 1
     die 1
     with 1
     thee! 1
     Help, 1
     help! 1
     Call 1
     help. 1
     Alack 1
     day, 1
     dead! 1
     woful 1
     time! 1
     Accurs'd, 1
     unhappy, 1
     wretched, 1
     hateful 1
     day! 1
     Most 1
     miserable 1
     hour 1
     that 1
     e'er 1
     saw 1
     In 1
     lasting 1
     labour 1
     of 1
     his 1
     pilgrimage! 1
     loving 1
     thing 1
     rejoice 1
     solace 1
     in, 1
   

Page had 5 lines.
  In those lines, they said these words...
     I 3
     the 2
     will 2
     am 1
     almost 1
     afraid 1
     to 1
     stand 1
     alone 1
     Here 1
     in 1
     churchyard; 1
     yet 1
     adventure. 1
     O 1
     Lord, 1
     they 1
     fight! 1
     go 1
     call 1
     watch. 1
Citizens had 4 lines.
  In those lines, they said these words...
     Down 2
     with 2
     the 2
     Capulets! 1
     Montagues! 1
Officer had 2 lines.
  In those lines, they said these words...
     Clubs, 1
     bills, 1
     and 1
     partisans! 1
     Strike! 1
     beat 1
     them 1
     down! 1
Father had 2 lines.
  In those lines, they said these words...
     For 1
     shame, 1
     bring 1
     Juliet 1
     forth; 1
     her 1
     lord 1
     is 1
     come. 1
 had 1 lines.
  In those lines, they said these words...
