Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
52 lines (37 sloc) 1.04 KB
from hadoop.io.NullWritable import NullWritable
from hadoop.io.Text import Text
from hadoop.io import SequenceFile
from argparse import ArgumentParser
import sys
def make_text_null_seq(filename, reader):
writer = SequenceFile.createWriter(filename, Text, NullWritable)
key = Text()
value = NullWritable()
count = 0
for x in reader:
key.set(x)
writer.append(key, value)
count += 1
writer.close()
return count
def count_file(filename):
reader = SequenceFile.Reader(filename)
key = Text()
value = NullWritable()
count = 0
while reader.next(key, value):
count += 1
return count
def main():
parser = ArgumentParser()
parser.add_argument('seqfile')
parser.add_argument('--copyfrom')
parser.add_argument('--count', action='store_true')
r = parser.parse_args()
if r.count:
print(count_file(r.seqfile))
elif r.copyfrom:
with open(r.copyfrom) as fh:
make_text_null_seq(r.seqfile, fh)
if __name__ == "__main__":
main()
You can’t perform that action at this time.