-
Notifications
You must be signed in to change notification settings - Fork 0
/
book_score.py
80 lines (57 loc) · 2.57 KB
/
book_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys, os
from labMTsimple.storyLab import *
import codecs ## handle utf8
import glob
from textstat.textstat import textstat as ts
from natsort import natsorted
import argparse
WORDS_PAGE = 250
def split_pages(text, page_words=WORDS_PAGE):
paragraphs = text.split("\n\n")
pages = []
working = ''
for para in paragraphs:
working = working + para
if ts.lexicon_count(working) >= page_words:
pages.append(working)
working = ''
if not ts.lexicon_count(working) == 0:
pages.append(working)
return pages
def score_pages(pages,labMT,labMTvector,labMTwordList):
scores = []
for page in pages:
pageValence, pageFvec = emotion(page,labMT,shift=True,happsList=labMTvector)
pageStoppedVec = stopper(pageFvec,labMTvector,labMTwordList,stopVal=1.0)
scores.append(emotionV(pageStoppedVec,labMTvector))
return scores
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Book Stats Generator')
parser.add_argument('-d', '--dir', required = True, help="Directory with the book text files in it")
parser.add_argument('-w', '--words_page', required = False, type=int, default=WORDS_PAGE,help="The number of words per page")
args = parser.parse_args()
lang = 'english'
args.dir = args.dir.rstrip('/')
labMT,labMTvector,labMTwordList = emotionFileReader(stopval=0.0,lang=lang,returnVector=True)
fout = open('book_stats_'+args.dir+'.csv','w')
fout1 = open('page_sparklines_'+args.dir+'.csv','w')
fout.write('chapter,WC,RL,HS\n')
fout1.write('Page Happiness\n')
for fname in natsorted(glob.glob(args.dir+'/*.txt')):
f = codecs.open(fname,'r','utf8')
chapter = f.read()
f.close()
chapter_name = fname.split(args.dir+'/')[1].split('.txt')[0]
## compute valence score and return frequency vector for generating wordshift
chapterValence, chapterFvec = emotion(chapter,labMT,shift=True,happsList=labMTvector)
## but we didn't apply a lens yet, so stop the vectors first
chapterStoppedVec = stopper(chapterFvec,labMTvector,labMTwordList,stopVal=1.0)
chapterValence = emotionV(chapterStoppedVec,labMTvector)
# Chapters less than WORD_PAGE should be ignored
if ts.lexicon_count(chapter) < WORDS_PAGE:
fout.write(('"{}",,,\n'.format(chapter_name)))
text = ''
else:
fout.write(('"{}",{},{},{}\n'.format(chapter_name,ts.lexicon_count(chapter),ts.automated_readability_index(chapter),round(chapterValence,2))))
text = ','.join(format(x, "1.1f") for x in score_pages(split_pages(chapter,args.words_page),labMT,labMTvector,labMTwordList))
fout1.write(text+"\n")