Skip to content

Commit

Permalink
grebook: keep some context around matches
Browse files Browse the repository at this point in the history
  • Loading branch information
akkana committed Feb 13, 2019
1 parent 9001b23 commit e3d5f18
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions grebook.py
Expand Up @@ -4,26 +4,29 @@

import sys, os
import subprocess
import re

def grep_ebook(pat, filename):

# unzip only html and content files to stdin
p1 = subprocess.Popen(['unzip', '-p', filename,
'*.htm*', '*.xml', '*.opf'],
shell=False, stdout=subprocess.PIPE)
# get rid of small html <b>tags
p2 = subprocess.Popen(['perl', '-lpe', 's![<][^>]{1,200}?[>]!!g;'],
shell=False, stdin=p1.stdout, stdout=subprocess.PIPE)
p1.stdout.close()

output = p1.communicate()[0]
p1 = re.sub('[<][^>]{1,200}?[>]', '', output)





# keep some context around matches
p3 = subprocess.Popen(['grep', '-Piaso', '.{0,30}%s.{0,30}' % pat],
shell=False, stdin=p2.stdout,stdout=subprocess.PIPE)
p2.stdout.close()
shell=False, stdin=output, stdout=subprocess.PIPE)

# color the matches (doesn't work for me)
p4 = subprocess.Popen(['grep', '-Pi', '--color', pat],
shell=False, stdin=p3.stdout,stdout=subprocess.PIPE)
shell=False, stdin=p3.stdout, stdout=subprocess.PIPE)
p3.stdout.close()

output = p4.communicate()[0]
Expand Down

0 comments on commit e3d5f18

Please sign in to comment.