-
Notifications
You must be signed in to change notification settings - Fork 0
/
word_mapper.py
executable file
·53 lines (44 loc) · 1.54 KB
/
word_mapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
import subprocess
import re
class mapper(object):
def __init__(self, path):
self.file = path + 'WORK/all.frequencies'
def __getitem__(self, key):
if type(key) is int:
return self._id_grep(key)
return self._grep(key)
def __iter__(self):
word_id = -1
for line in open(self.file):
word = line.strip().split()[1]
word_id += 1
yield word, word_id
def _grep(self, word_key):
pattern = re.compile(word_key + '\W')
word_id = 0
for line in open(self.file):
if pattern.search(line):
return word_id
word_id += 1
def _id_grep(self, id_key):
word_id = 0
for line in open(self.file):
if word_id == id_key:
return line.strip().split()[1]
word_id += 1
def id_and_freq(self, word):
pattern = re.compile('\W' + word + '\W')
word_id = 0
for line in open(self.file):
if pattern.search(line):
fields = line.split()
freq = float(line.split()[0])
return (word_id, freq)
word_id += 1
## Version with GNU grep
def sys_grep(self, word_key):
process = subprocess.Popen(['grep', '-wn', word, self.file], stdout=subprocess.PIPE)
match, stderr = process.communicate()
match = match.strip().split()[0].replace(':', '')
return int(match) - 1