-
Notifications
You must be signed in to change notification settings - Fork 17
/
scrub_mhc.py
46 lines (33 loc) · 1.08 KB
/
scrub_mhc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 18 11:04:05 2016
@author: Chris
"""
from os import listdir
from os.path import isfile, join
# Get all the source text files.
mhcFiles = [f for f in listdir('./mhc/') if isfile(join('./mhc/', f))]
print 'Parsing Matthew Henry\'s Commentary...'
# For each text file:
for mhcFile in mhcFiles:
# Only parse the .txt files.
if not (mhcFile[-4:] == '.txt'):
continue
print ' Parsing file:', mhcFile
# No tags in this example.
entry_tags = []
# Read in the text file
with open('./mhc/' + mhcFile) as f:
content = f.readlines()
last_line = -1
# Search for the indexes section.
for i in range(0, len(content)):
if 'Indexes' in content[i]:
print 'Found it at line', i, '!'
last_line = i - 2
break
# Write the contents back minus the indexes.
if not last_line == -1:
print 'Writing back without indexes'
with open('./mhc/' + mhcFile, 'wb') as f:
f.writelines(content[0:last_line])