From bbc416fdddf088e9206230e984baca5cc55728f1 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Mon, 15 Oct 2018 09:52:47 +0300 Subject: [PATCH 1/3] Fix skip rows behaviour with Excel files --- README.md | 4 +++- tabulator/stream.py | 7 +++++-- tests/test_stream.py | 6 ++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ebe497fd..a332ab12 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,9 @@ with Stream([[1], 'bad', [3]], force_parse=True) as stream: ##### Skip rows -List of row numbers and/or strings to skip. If it's a string, all rows that begin with it will be skipped (e.g. '#' and '//'). +List of row numbers and/or strings to skip. +If it's a string, all rows that begin with it will be skipped (e.g. '#' and '//'). +If it's the empty string, all rows that begin with an empty column will be skipped. ```python source = [['John', 1], ['Alex', 2], ['#Sam', 3], ['Mike', 4], ['John', 5]] diff --git a/tabulator/stream.py b/tabulator/stream.py index c9ff9600..97e29a81 100644 --- a/tabulator/stream.py +++ b/tabulator/stream.py @@ -517,8 +517,11 @@ def builtin_processor(extended_rows): continue # Skip row by comments - match = lambda comment: row[0].startswith(comment) - if list(filter(match, self.__skip_rows_by_comments)): + match = lambda comment: ( + (isinstance(row[0], six.string_types) and row[0].startswith(comment)) if len(comment) > 0 + else row[0] in ('', None) + ) + if any(map(match, self.__skip_rows_by_comments)): continue # Ignore blank headers diff --git a/tests/test_stream.py b/tests/test_stream.py index 05ae4197..cd41f31a 100644 --- a/tests/test_stream.py +++ b/tests/test_stream.py @@ -328,6 +328,12 @@ def test_stream_skip_rows_no_double_skip(): with Stream(source, skip_rows=[4, -1]) as stream: assert stream.read() == [['id', 'name'], ['1', 'english'], ["# it's a comment!"]] +def test_stream_skip_rows_excel_empty_column(): + source = 'data/special/skip-rows.xlsx' + with Stream(source, headers=1, skip_rows=['']) as stream: + assert stream.read() == [['A', 'B'], [8, 9]] + + # Post parse From 0659898510774cec1ed1702e77e4efd1c56fb170 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Mon, 15 Oct 2018 09:56:17 +0300 Subject: [PATCH 2/3] lint --- tabulator/stream.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tabulator/stream.py b/tabulator/stream.py index 97e29a81..17636598 100644 --- a/tabulator/stream.py +++ b/tabulator/stream.py @@ -518,7 +518,8 @@ def builtin_processor(extended_rows): # Skip row by comments match = lambda comment: ( - (isinstance(row[0], six.string_types) and row[0].startswith(comment)) if len(comment) > 0 + (isinstance(row[0], six.string_types) and + row[0].startswith(comment)) if len(comment) > 0 else row[0] in ('', None) ) if any(map(match, self.__skip_rows_by_comments)): From 1aa43cc8628cdea46720812b1c661080463ae632 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Mon, 15 Oct 2018 10:05:52 +0300 Subject: [PATCH 3/3] missing file :( --- data/special/skip-rows.xlsx | Bin 0 -> 5850 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/special/skip-rows.xlsx diff --git a/data/special/skip-rows.xlsx b/data/special/skip-rows.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..13b51e18cb7ddc5af2ad7a28994134509a5ac5a5 GIT binary patch literal 5850 zcmaJ_1z42Z)*eFX2FamOLK;*=M|$Y)?wp~9Aw{~orI7~dQb6e%kuG5*4j^SvLkr%y z$NQXLyx0Gm=lf>Qp66XN@7imxwf3v7goRBB003|SDavXp#o0PlNc1OhOaOoYKnXB& zw}HU;`TqUH0Fa`;xPDb9cRTh95Qgvix{|uMcZU8aDRgDDKC&Ww1CD%XDjy%bC)Qt;)pQ% zAF$oLD_BF3aW7`jcqQB~RZhHNuwgVmsxnd3_}0ReX=K}|(NktWVwz9M)T6-@FWPIY zHQQ`G4)f)P`(C|&)J?yCDkbu)wQCh;VFpM2nqRP4W~tiH-03j@05Y^+tX-_s-CbN^ z{8ldRHhkVt$p7*VG+xCiRDx7y_XlUVhd95^Z!XlTXRGc>8+3kks zO7XI9eW!gseldKeWxZxnyU?b=A%;qEgVlnT@?YK&CGMw{jsQ(Mp@Ir&R)uyMyl8xz zBSVP;-B@0>j)o3@DofB+igyby*e|sU)|z@0%rTL2i>R}Qn?Vk)r|Qkwt7q)h9V)?N z;NCMn%lo}vYT}?XJF+7cBVlvH$04d)*%!xE6#pHdLQdL`IGom{#Z2m zIrL5XD|ri7*FSO39UZ~%5g^50@ylmjUn=z_<+^O89Am6JNro&0&J!UFcaZdCPXP(N z0!&lH?E#TQmhPdskF=X91G8d#OHy8KYvz3#r~WFc88}3oJcOm3Yn&-vq&>-VDyGvq z@KHbd*#^T};gDgO1^Xo`uqqzzuURKtfT0!6#Mx=txZhdMa(R~VZHmWQ?WmvrI-6Ow z<8S+DCJE@JBtf(BhVXm2xI0<8xH$cZOkBcT3*8>}&lU+wf0TEemEUY8~S^U++dhMgj|SO**ZEv2Q%C6P^Ni~4f*V?S2eKFHuf6V7;B)}&XQ)hRIRZfjn1n)7<1=cTDf z-unli8`Cv7`qrHN#^TGxskHR#f>j{rIG|UCs+M7r#Qcq;f$#rPP)&>Ehm&8iAJW#p zDN<_pJ^F3^G2R<7E~7nGfldl^KRxy{FZ}Aa|0hR`pmW6E&Z|E$^fXlDN`m2q86!~` zp>JB0Aqw`dp7>liYsp*UzQD=vi)|9lPt^Uq>!zOnydt4nLMOFA;PnU83*o&EzgE9s z_*?zAtL6sriMteBnRwP+!ew^V2py+MU;@GrLX^8>S*ZDkMCyZcQ?2HFq^%| zNEKT$7x2(bc5R37=S_lQ*4PW}44PE9^knY@3l&OewW_*1808duI0l%9x_n7d9E8ci zu__2yZ`ZIEoq{}C-HtLSU-pAfF3cuYUF1e%$_)(EGrA4OnN_G0&QN=xR zl@5=`!1HagZk|Fxh<}~S8XtMhJ1TwNrxFxL;Fi15mo3ue?Sido5P}0ZjvCcQLWkOO zBS1dq2+>7J7>Odxa5hxdg6u8uwR9~h6cH)VHtos(u)OpWRoM`*X~SM6y{YWAC}=Lb z>vMlKEl4RUL-3%$%DSvQi)MMmz^v?;9VLZ~mIpOkxe_HR!?x58(x7B5!IvP-egKnM zj2~Mbs_`8VJEE$Cjg?w6m#me~FtgI}VlD2e^t8g370P%!gpKE9fpDX*L9kI1(IPb;(E zMHw2MW69@7Mo#!Tb&m2`99J)Z4ILdjfz%2b)O-?~ZceUkFPWM{VsUCacUxk=G6@`; z?>!tK_JaUH5XZ81ncP{n90Yt+dw!K*2o!?T$IGK=P29>VNWG}@Y;UUxOMK;k2}bvc zY>PHNjUfs$oB;(ddqR9EpBWg%*(LGrZb(gOOpPS_h$s~t60pwX;1*U?jE(Q8okkkm zTpN0q3au7)Th^=DcO?urM4oe_ygxlLVl9W(FSkx>-M>eH9F6p}ofzGNo(o#Zk&TbC z5Bn=|+BPJzKf?6s_q2h+lB#AaO`n`?>)JFHqrUb>j=UMN#nPYjCiaw%dMyByTAgGk z?+7*E41Eqvn!goTi&))Ze3L_uUBZ2{P>rzcrkuoWLVYG9zEhlf!5JrSV#QO(j!zoZ zj9-F$w6|zSibmO9G{&5&PY7o57~~Q1gf}^C4sq~SVu|ru!u*FLgC8{S%KT!id4&r?DUnNYn1U7y8>%65zrayMwJwi z3(cnWht@sGe&-eSX;+($Ca3ZkW&ygGvc;M$~fCzm1=jgUi znsb=9O`7@lHWB3;1-rP!JqbUH#n+GzA zp(oa_PcU=?9=osW5s%Ng`M@vq1DB5E76JYQX}6>CV>len?g(RD1bj#Og>}9eG+SEZ z-*sE-=>oZ9V_ndEK$|p2lMnJon>9ueP_ie}&@aXc+w$|P49Vc4<3 zhtK*U(AMOf@y=aGU=9H=e5X5I7rdF1))YNz`YyOyg1(}XcGm~PPMw20ZN`0oiD@JM z&iA{Z=vl=l*$iDmeR>-*W8ThC|J94zh!>OtTB6zyGPq3eW2E1rbkY&nHQJM@>IP2; zgOh4sklt!m=5y*bYDsX-Gz^hPNEtL~mn2dputoEFFw9uuJ?R3?5{w8rFim4(J*PNX z8`@Q`kP+rHDrL#uKU_lecd_8^Y4t38L1hkKC{)2SU+YgG~}3mspwy-^r|nBI78zJ62>H$HLnCvwj;%E|K88ciP1*YOk_)wUw= zaK+-`Bvp~~v>N(qxy7at_JW_sM%GDbUuccR?sN5N-Tj{FFZscq5VjLX=ibSh-Od`O z_^X6&m$-3NV?3T!`In+aiD?o7@7fo~}dTR-5bR@EV%MXs^X9)r@<#h6o*?)#$Y?e}KWE}&Wu%4@7o4NF?imFDK=dDe z9zGD8KN4HiH*(1qphWk2b>BOrq$Zmw)S+n&UrVy4br)V@NGHa|#ZDz%ouA3M6&8e+ z=wt-owA7*((@k|>x&}mg5XloM_O-#L2Mg-r{ebOd#*}2d&4O7HlORTAs<)BM! zprR7md5QacaO`fd5E+)|pb$g%?sLm{T-54#<2SeG!V5+33&{E`4l(n{l3sTRsnJ?6 zq>A+NroWz3l+y@{8zE87tLN0h{fCgf<*w8;25xai3G;ggRqEQ;T2-|AnyT}vO9q|arDXN%Jd zD~KnybiDeeVQW>m6>(R)$NM%lRIk$ML)2>Gim|+x=`E!r$|~JcWn5i^oNi&ThKzRL zXzvlWZwXla?vdhRmYQv{&pTJP9!b;58YSz5w|}`PpOk$}dd!Ij^*PTW!GwjMF<*qV zs>rz$s3Lf226-RYEC!!c8@Z~&guIu zw$RQ+$7%_@?re?o0NUb^$MqlQ`S{5XZ}HHRy@#2Zy|XZ~s#(lpVOrM9U!*Icj}}ht zPE1H9aTqw1JYT5DeCd~Q8=?7RKI+yMdZaX6t<&dvY1giR2da(9oeukafu5)!fE; z?ww4t8Udl`S@vSUuS}$ns8dL#&G)k#P!-C$R~YRmkj^6(AF<6GFJT+)W)QoU-w!?@ zSGz+})UO&6=UUyg_S!?WliaeA9J{$>d5WBeC5Tznt$B-@MBGIJN0;Uo}1^XWcVfAyD@+V)}8)}%5ZWi z-0RzUc|dtc)009(FTc7H1||rd`~J6zh@QT7{h}%U4!kb2{feSNN6LSRZok8?3st|u zsn8kvANYS{tKZSrg`r>3YiRUuGSTng>l(?gU@Gjt!GBOpez$X7K={>;1iCjw|B2t_ zgx?KZZzX><;6?Bci~njYf46eIYX53QllX5d|5d|(w{X2K{%S#pzUzK fs1rE=@IUf|y3!4F5&;14(XT-CTFLx76aoJN$d=LR literal 0 HcmV?d00001