-
Notifications
You must be signed in to change notification settings - Fork 1
/
filehelp.py
56 lines (49 loc) · 1.83 KB
/
filehelp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# file: filehelp.py
# author: Colin Woodbury
# contact: colingw AT gmail
# about: A library of functions that aids in file processing.
from random import randrange as _randrange
def random_line(in_file):
'''Given a file, returns a random line.'''
in_file.seek(0, 2) # Initial seek. (for python 2 compatibility below)
pos = _randrange(0, in_file.tell()) # Find a random position.
in_file.seek(lseek(in_file, pos)) # Seek to the start of its line.
return in_file.readline()
def lseek(in_file, pos, jump_dist=1):
'''Returns the cursor position of the start of the line containing pos.
If you know the average line length of the file, you can increase
jump_dist to speed up the search. Default is one byte at a time.
'''
if pos < 1:
return 0
elif off_end(in_file, pos):
pos = get_end(in_file)
pos = jump_back(in_file, pos, jump_dist) # Initial jump back.
while True:
if pos < 1: # We've reached the start of the file.
result = 0
break
sample = in_file.read(jump_dist)
if '\n' in sample: # We found the start of the next line back.
result = in_file.tell()
break
else:
pos = jump_back(in_file, pos, jump_dist)
return result
def off_end(in_file, pos):
'''Determines if the position given is off the end of the file.'''
cursor = get_end(in_file)
return True if pos > cursor else False
def get_end(in_file):
'''Gets the cursor position of the last char of the file, before
the EOF marker.
'''
in_file.seek(0, 2)
return in_file.tell() - 1 # Fucking python 2.
def jump_back(in_file, pos, jump_dist):
'''Given a position and a distance to jump back by, moves the
cursor there.
'''
pos -= jump_dist
in_file.seek(pos)
return pos