Skip to content

Commit

Permalink
Merge pull request #2 from zjijz/master
Browse files Browse the repository at this point in the history
Updated repo to python3
  • Loading branch information
jegesh committed Jan 1, 2018
2 parents 4551a57 + b947e26 commit 4291d93
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
19 changes: 10 additions & 9 deletions randomAccessReader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# =============

import csv
import StringIO
from io import StringIO

# ==========
# classes
Expand Down Expand Up @@ -68,7 +68,7 @@ def get_lines(self, line_number, amount=1):
"""
lines = []
with open(self._filepath) as f:
for x in xrange(amount):
for x in range(amount):
line_data = self._lines[line_number]
f.seek(line_data['position'])
lines.append(f.read(line_data['length']))
Expand All @@ -84,17 +84,18 @@ def __init__(self, filepath, has_header=True, **kwargs):
:param has_header:
:param kwargs: endline_character='\n', values_delimiter=',', quotechar='"', ignore_corrupt=False, ignore_blank_lines=True
"""
super(CsvRandomAccessReader, self).__init__(filepath, kwargs.get('endline_character','\n'), kwargs.get('ignore_blank_lines', True))
super(CsvRandomAccessReader, self).__init__(filepath, kwargs.get('endline_character', '\n'),
kwargs.get('ignore_blank_lines', True))
self._headers = None
self._delimiter = kwargs.get('values_delimiter', ',')
self._quotechar = kwargs.get('quotechar', '"')
self._ignore_bad_lines = kwargs.get('ignore_corrupt', False)
self.has_header = has_header
if has_header:
dialect = self.MyDialect(self._endline, self._quotechar, self._delimiter)
b = StringIO.StringIO(self.get_lines(0)[0])
b = StringIO(self.get_lines(0)[0])
r = csv.reader(b, dialect)
values = tuple(r.next())
values = tuple(next(r))
self._headers = values

@property
Expand All @@ -113,9 +114,9 @@ def _get_line_values(self, line):
:return: tuple of str
"""
dialect = self.MyDialect(self._endline, self._quotechar, self._delimiter)
b = StringIO.StringIO(line)
b = StringIO(line)
r = csv.reader(b, dialect)
values = tuple(r.next())
values = tuple(next(r))
if len(self._headers) != len(values):
if not self._ignore_bad_lines:
raise ValueError("Corrupt csv - header and row have different lengths")
Expand All @@ -135,10 +136,10 @@ def get_line_dicts(self, line_number, amount=1):
line_number += 1
lines = []
text_lines = self.get_lines(line_number, amount)
for x in xrange(amount):
for x in range(amount):
vals = self._get_line_values(text_lines[x])
if vals is None:
lines.append(dict(zip(self._headers, range(len(self._headers)))))
lines.append(dict(zip(self._headers, list(range(len(self._headers))))))
else:
lines.append(dict(zip(self._headers, vals)))
return lines
Expand Down
Binary file not shown.
6 changes: 5 additions & 1 deletion tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,8 @@ def test_ignore_corrupt(self):
path = os.path.dirname(os.path.abspath(__file__)) + "/corrupt.csv"
reader = CsvRandomAccessReader(path, ignore_corrupt=True)
corrupt_lines = reader.get_line_dicts(0,8)
self.assertEqual(len(corrupt_lines), 8)
self.assertEqual(len(corrupt_lines), 8)


if __name__ == '__main__':
unittest.main()

0 comments on commit 4291d93

Please sign in to comment.