Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Sep 26, 2020
2 parents 3b38d5f + 9580d8d commit 43969db
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 9 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ language:
python

python:
- 2.7
# NOTE: Recover after a fix:
# https://github.com/tobgu/pyrsistent/issues/208
# - 2.7
- 3.6
- 3.7
- 3.8
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

A library for reading and writing tabular data (csv/xls/json/etc).

> **[Important Notice]** We have released [Frictionless Framework](https://github.com/frictionlessdata/frictionless-py). This framework is logical continuation of `tabulator` that was extended to be a complete data solution. The change in not breaking for the existing software so no actions are required. Please read the [Migration Guide](https://github.com/frictionlessdata/frictionless-py/blob/master/docs/target/migration-guide/README.md) from `tabulator` to Frictionless Framework.
> - we continue to bug-fix `tabulator@1.x` in this [repository](https://github.com/frictionlessdata/tabulator-py) as well as it's available on [PyPi](https://pypi.org/project/tabulator/) as it was before
> - please note that `frictionless@3.x` version's API, we're working on at the moment, is not stable
> - we will release `frictionless@4.x` by the end of 2020 to be the first SemVer/stable version
## Features

- **Supports most common tabular formats**: CSV, XLS, ODS, JSON, Google Sheets, SQL, and others. See complete list [below](#supported-file-formats).
Expand Down
5 changes: 5 additions & 0 deletions data/matrix.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
f1,f2,f3,f4
11,12,13,14
21,22,23,24
31,32,33,34
41,42,43,44
Binary file added data/special/number_format_multicode.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion tabulator/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.52.1
1.52.3
9 changes: 8 additions & 1 deletion tabulator/parsers/xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,14 @@ def convert_excel_number_format_string(excel_number, value):
percentage = True
if excel_number == "General":
return value
code = excel_number.split(".")
multi_codes = excel_number.split(";")
if value < 0 and len(multi_codes) > 1:
excel_number = multi_codes[1]
else:
excel_number = multi_codes[0]

code = excel_number.split('.')

if len(code) > 2:
return None
if len(code) < 2:
Expand Down
8 changes: 2 additions & 6 deletions tabulator/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,18 +888,14 @@ def builtin_processor(extended_rows):
if headers and self.__headers:
keyed_row = dict(zip(headers, row))
row = [keyed_row.get(header) for header in self.__headers]
elif self.__ignored_headers_indexes:
row = [value for index, value in enumerate(row) if index not in self.__ignored_headers_indexes]
headers = self.__headers

# Skip rows by numbers/comments
if self.__check_if_row_for_skipping(row_number, headers, row):
continue

# Ignore headers
if self.__ignored_headers_indexes:
for index in self.__ignored_headers_indexes:
if index < len(row):
row = row[:index] + row[index+1:]

yield (row_number, headers, row)

# Skip nagative rows processor
Expand Down
10 changes: 10 additions & 0 deletions tests/formats/test_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,16 @@ def test_stream_xlsx_preserve_formatting_percentage():
]



def test_stream_xlsx_preserve_formatting_number_multicode():
source = "data/special/number_format_multicode.xlsx"
with Stream(
source, headers=1, ignore_blank_headers=True, preserve_formatting=True
) as stream:
assert stream.read() == [["4.5"], ["-9.032"], ["15.8"]]



def test_stream_xlsx_workbook_cache():
workbook_cache = {}
source = BASE_URL % "data/special/sheets.xlsx"
Expand Down
58 changes: 58 additions & 0 deletions tests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,22 @@ def test_stream_pick_fields_position_and_prefix():
]


def test_stream_pick_fields_keyed_source():
source = [{'id': 1, 'name': 'london'}, {'id': 2, 'name': 'paris'}]
with Stream(source, headers=1, skip_fields=['id']) as stream:
assert stream.headers == ['name']
assert stream.read() == [['london'], ['paris']]
with Stream(source, headers=1, skip_fields=[1]) as stream:
assert stream.headers == ['name']
assert stream.read() == [['london'], ['paris']]
with Stream(source, headers=1, skip_fields=['name']) as stream:
assert stream.headers == ['id']
assert stream.read() == [[1], [2]]
with Stream(source, headers=1, skip_fields=[2]) as stream:
assert stream.headers == ['id']
assert stream.read() == [[1], [2]]


def test_stream_limit_fields():
source = 'text://header1,header2,header3\nvalue1,value2,value3'
with Stream(source, format='csv', headers=1, limit_fields=1) as stream:
Expand Down Expand Up @@ -522,6 +538,48 @@ def test_stream_limit_offset_fields():
]


def test_stream_matrix_pick_fields():
with Stream('data/matrix.csv', headers=1, pick_fields=[2, 'f3']) as stream:
assert stream.headers == ['f2', 'f3']
assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]


def test_stream_matrix_pick_fields_regex():
with Stream('data/matrix.csv', headers=1, pick_fields=[{'type': 'regex', 'value': 'f[23]'}]) as stream:
assert stream.headers == ['f2', 'f3']
assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]


def test_stream_matrix_skip_fields():
with Stream('data/matrix.csv', headers=1, skip_fields=[1, 'f4']) as stream:
assert stream.headers == ['f2', 'f3']
assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]


def test_stream_matrix_skip_fields_regex():
with Stream('data/matrix.csv', headers=1, skip_fields=[{'type': 'regex', 'value': 'f[14]'}]) as stream:
assert stream.headers == ['f2', 'f3']
assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]


def test_stream_matrix_limit_fields():
with Stream('data/matrix.csv', headers=1, limit_fields=1) as stream:
assert stream.headers == ['f1']
assert stream.read() == [['11'], ['21'], ['31'], ['41']]


def test_stream_matrix_offset_fields():
with Stream('data/matrix.csv', headers=1, offset_fields=3) as stream:
assert stream.headers == ['f4']
assert stream.read() == [['14'], ['24'], ['34'], ['44']]


def test_stream_matrix_limit_and_offset_fields():
with Stream('data/matrix.csv', headers=1, limit_fields=2, offset_fields=1) as stream:
assert stream.headers == ['f2', 'f3']
assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]


# Pick/skip/limit/offset rows

def test_stream_pick_rows():
Expand Down

0 comments on commit 43969db

Please sign in to comment.