From 5ce6f462b524c7440cd8455702387699b8bae2e5 Mon Sep 17 00:00:00 2001 From: Irakli Mchedlishvili Date: Thu, 26 Oct 2017 16:18:07 +0400 Subject: [PATCH] [bb tests][xl]: testing entire flow by running the pipelines (#59) * Fixtures for simple, excel, multi-resource and processing datasets * inputs and expected results * new setup for requirements, tox and Travis * using moto_server to allow S3 operations fixes #48 --- .travis.yml | 9 + datapackage_pipelines_assembler/generator.py | 4 +- setup.py | 5 +- tests/data/sample_birthdays.csv | 21 ++ tests/data/sample_birthdays.xlsx | Bin 0 -> 5563 bytes tests/data/sample_birthdays_invalid.csv | 24 ++ tests/data/sample_emails.csv | 21 ++ tests/inputs/excel/assembler.source-spec.yaml | 16 + tests/inputs/excel/datapackage.json | 27 ++ .../multiple_files/assembler.source-spec.yaml | 17 + tests/inputs/multiple_files/datapackage.json | 44 +++ .../assembler.source-spec.yaml | 26 ++ .../inputs/needs_processing/datapackage.json | 27 ++ .../single_file/assembler.source-spec.yaml | 16 + tests/inputs/single_file/datapackage.json | 27 ++ tests/outputs/csv/sample_birthdays.csv | 21 ++ .../outputs/csv/sample_birthdays_invalid.csv | 24 ++ tests/outputs/csv/sample_emails.csv | 21 ++ tests/outputs/excel/sample_birthdays.xlsx | Bin 0 -> 5563 bytes tests/outputs/json/sample_birthdays.json | 102 +++++ tests/outputs/json/sample_emails.json | 82 +++++ tests/outputs/zip/excel.zip | Bin 0 -> 6850 bytes tests/outputs/zip/multiple-files.zip | Bin 0 -> 4009 bytes tests/outputs/zip/single-file.zip | Bin 0 -> 2303 bytes tests/test_flow.py | 347 ++++++++++++++++++ tests/{test_main.py => test_processors.py} | 0 tox.ini | 4 +- 27 files changed, 882 insertions(+), 3 deletions(-) create mode 100644 tests/data/sample_birthdays.csv create mode 100644 tests/data/sample_birthdays.xlsx create mode 100644 tests/data/sample_birthdays_invalid.csv create mode 100644 tests/data/sample_emails.csv create mode 100644 tests/inputs/excel/assembler.source-spec.yaml create mode 100644 tests/inputs/excel/datapackage.json create mode 100644 tests/inputs/multiple_files/assembler.source-spec.yaml create mode 100644 tests/inputs/multiple_files/datapackage.json create mode 100644 tests/inputs/needs_processing/assembler.source-spec.yaml create mode 100644 tests/inputs/needs_processing/datapackage.json create mode 100644 tests/inputs/single_file/assembler.source-spec.yaml create mode 100644 tests/inputs/single_file/datapackage.json create mode 100644 tests/outputs/csv/sample_birthdays.csv create mode 100644 tests/outputs/csv/sample_birthdays_invalid.csv create mode 100644 tests/outputs/csv/sample_emails.csv create mode 100644 tests/outputs/excel/sample_birthdays.xlsx create mode 100644 tests/outputs/json/sample_birthdays.json create mode 100644 tests/outputs/json/sample_emails.json create mode 100644 tests/outputs/zip/excel.zip create mode 100644 tests/outputs/zip/multiple-files.zip create mode 100644 tests/outputs/zip/single-file.zip create mode 100644 tests/test_flow.py rename tests/{test_main.py => test_processors.py} (100%) diff --git a/.travis.yml b/.travis.yml index 6c4ff72..6c77c6c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,9 @@ sudo: required +services: + - elasticsearch + dist: trusty @@ -21,5 +24,11 @@ install: script: - make test +before_script: + - moto_server & + - sleep 30 + - curl localhost:9200 + - curl localhost:5000 + after_success: - coveralls diff --git a/datapackage_pipelines_assembler/generator.py b/datapackage_pipelines_assembler/generator.py index b2e1cd7..8bfbdf2 100644 --- a/datapackage_pipelines_assembler/generator.py +++ b/datapackage_pipelines_assembler/generator.py @@ -56,7 +56,9 @@ def s3_path(*parts): else: path = '/'.join(str(p) for p in parts) bucket = os.environ['PKGSTORE_BUCKET'] - return 'https://{}/{}'.format(bucket, path) + # Handle other s3 compatible server as well (for testing) + protocol = os.environ.get('S3_ENDPOINT_URL') or 'https://' + return '{}{}/{}'.format(protocol, bucket, path) class Generator(GeneratorBase): diff --git a/setup.py b/setup.py index 3389a8c..46b699c 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def read(*paths): INSTALL_REQUIRES = [ 'datapackage-pipelines', 'datapackage-pipelines-elasticsearch>=0.0.3', - 'datapackage-pipelines-aws>=0.0.8', + 'datapackage-pipelines-aws>=0.0.9', 'psycopg2', 'tweepy', 'facebook-sdk', @@ -29,6 +29,9 @@ def read(*paths): TESTS_REQUIRE = [ 'pylama', 'tox', + 'moto', + 'boto3', + 'google-compute-engine' ] README = read('README.md') VERSION = read(PACKAGE, 'VERSION') diff --git a/tests/data/sample_birthdays.csv b/tests/data/sample_birthdays.csv new file mode 100644 index 0000000..e22488e --- /dev/null +++ b/tests/data/sample_birthdays.csv @@ -0,0 +1,21 @@ +date,first_name,last_name +2016-10-15,Shaylynn,Eallis +2017-01-18,Patricia,Eefting +2017-03-01,Karrah,Couser +2017-03-17,Rhetta,Price +2016-12-23,Alexandros,Farrand +2017-05-01,Ado,Matejic +2016-10-28,Keene,Tonna +2017-01-31,Helena,Aiskovitch +2017-02-11,Leigh,Butner +2017-01-19,Perle,Work +2016-11-16,Delora,Pavolillo +2017-09-21,Marshall,Leall +2017-04-28,Olwen,Mullin +2016-12-27,Nerta,Enrique +2016-12-07,Ashlie,Bracey +2017-05-18,Dode,Ritmeier +2016-10-16,Agace,Kew +2017-04-08,Beckie,Dove +2017-01-20,Filippa,McPolin +2017-03-19,Madison,Sheekey diff --git a/tests/data/sample_birthdays.xlsx b/tests/data/sample_birthdays.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4c5d646b5e858f8fb6b942faddde972da3d53b97 GIT binary patch literal 5563 zcmaJ_1z42bwx$_Cx6Y%Up`=?tx}=6~Mg$2#Y3ULqC1mLC#t{@@Bt3)w zxks+|9@qCgduH~t*WUBJEB0Eix-vQj843UZK&dlPQbxHUc*yT&?$(YTAkOQtJo%Fn zIyX_&K|oBRzfV2647j%K`Dl(pKyc>Jm4!eS7qwS_e;B~9v5i0FBj#rVTvXv1Y6T9NPK``kpn$! zC06?+qowomwrYzb$mi5w6~HoORK4=t4ce$(8<*MPCE66_4izWe4zIZK5NZU-C0w;8 ztLzTiO}>XQ+liy7D`Nv}%0e4fkgqL*hJyV4-+&=Pt^m7Osk^(ldVnlkT{)jRIYJZF z96oT}>Nz+k>-?TAh-1Xk<2FkrLS8S;?~PYc{*Lx4syy}k{HOVsa9;s2v9BrLXS}P6 z1tlNPwY=cfQNeW`V}}!n`Q9`A$j3^8TN3`2z|TEWrv3ubCVLN95UC9gRTdMXi^MNd zr0tB+ww|}P{RYDw;-P(=$Mj%zc5pUSdTz=~-a^t9SDTts;+yT`R$; zp>;XqCkXRV!O0a@d&YbbhcPiH<;u0@{^`&w$x%c);WTL-*u1n@tG?kz^tQXpYKJ3 z&MsV%Mq>~I77Qe0sPwoWi&0)15fmkAl{I8wpsX;8dMmp`k6Vwpl23HsFr*$WZac0p zx>0F#%8=Z`@dpK)4O&SeWMQJ&a;m8Leau%iT@(VGrXF(r^oN74wmWZN#83{8M}}ok zPXDl6E0%mTd;DV5o@GP4W*$n=_o1vBl9QJ!1)e~8FNbax$807$rnZS=wXgf%>?afU z@Mguxu~U0XK4-IatavQzmW$@h7T1zpIvmwrs^E{(iH$#Ygurl6H ztt+w@8F=pLND%3e|JDdRmm-dmk@1*91hL1e5Y)EENIwCu1^03;ZV8}$I}bZ*490iT zVg7v2Ln9fMvoi-hbgbxnS^3ri)J&Ke835ykH}kh32v<%7$1)pdaE-c%X}iSpcrMYd zZKVp`DR)BJY2~j@eH&@3r;eb10^Y~P-NDku#o^js{{(-M%C&n(9$b>kA$kOgR z;v_3Cf1vhitc`Z_Cr{P=4CJIS!nOW9nld_l)UI&6#~Z)*yj%JfR#^c?L}!mTpe^L` z3w)JYcer3_#f~wd>K@6PR??YZN=1-i?UA4Xr)5{7h)Lmql4|MnKyw&e12p(eaXc4w zm|Uye2>qQiyLyX9*Uu`eX?y~+SN?fi{rEV;?zBk{VVVh+E9@X1?C(yen_`>dba!Xy zKURk2Y4ZEvOq$uy9{Az8;LMk+2Y$X_IQrI*C0ky(wD{~7GHQpXeq+e@X8bEMg#RFO z6O(wY0q1!xqBaCO#*)(mZ{zu@JB>9F7(0=IDkG`9HTqhG5|9S6O9X_4T?Iz#m)LFa zV{3S5eC@Y#JR0R^+uN!FBq z!WVX$THNZa^Y&r(*G+f#u*hVUoSs0y*-#f6^wuhJl&P_VQ1i8EW0tA=L&HDKcWTIl z-g*VTmbAdOlyGM_sV9lI9+YKe5cR_MZMA=Nlx@dsyftLjK6dq_HE#P{Iz`iaNQv`- z`m3}@0R!#QkO|_9WM52qfNf`^+9|pey|pH_dp(31mHAv`DVyM{D$Umda`&d;_;el{ zJA&yaGGww>mHqX1C+I&p9*BzL>Fj6?*~w-IY^R7kibZ2LTmkdmMYM|$wcuLuf6e#lw(3jef*JM}#HQDRHVuiBV1W&f&s~PtPBpL`dTfmgG!V z7qz&d=kgm!VeyffVKUvr(b7JgXkTo4%aip>hHQp$Mc(JSt9C?@IXRtCupc&w&SR;) z>2FgE&QfGH)wjPh+zp70yh_AoX`nRx+(bIVwvg+Q$lfyIfsiZ@&G}WexRq2T?ohUbSin=l2yi^#-!5X z+8L~4ML%yZofp_g1t@$|4^Bx}YU;WYx*GXZ4W$Y_(&>cJ>trhuufdWMh{>xTXZA}y zYsl;n?@^$kBk3x7dB5Uy?>4MOA}3~iK36j=80h)j!|Eg9i<0$R3YmKY$+(RvN;*X7 zCTYIS+Nlh41Z<50Czn}zL`<$^&#k#ZadOqhxTI@t%=djxlY<*(G3cXOl5zA;A7ko- zTODL{j$0hc(`@ni768*Mq0XUDBFr;tF%p6gm)T0VduNm}O){*kRF%1c^-{T{Mfm_{ zeQ0;SH_Dm+!`Aj09_|tBTFVF)mS}G6XdzGaaZ_zZZG-o>-7fQqu7_oXmk&jkV-P;sKUPa*1xAhcj#Gj6 zo1l;Nr5+izyzeO5FF~L-E17XBX!w{f>mR6$CuI|`Dw_@$MNrih#=9Cs-73Tz;n5h4 zfA7GEAFt9T?jVG#K=a(abmrkpIEKw|VAcF=)r`^?Uv3^xYv;oI5VRLB$XF?X-m+l2 zQqm7%P^?@zzu7|h8zHpiB_CaPm zHsdz!#+ZGZaZVp67;!& zyzz?C7>iz!(ODGUfV=Om@#<0m zwem(SR!8DpVl$dcW6@e>AuiDqe)h_|Rs(D8ikD?;uE~{)2|G44Kt@XAfiGRSVtb;nOn05kcdN1k@_314$iBUEX-#CAD4)SXjDr370UY?Ppd>d2-fxW2K z<11V{d-C%gzRoGu8~SP6RbhPy?LgwNyXfe!*WAyuVM9}Xtq58(!g=q?gfvm?fCu1R zabg|)EWf%*%$#G#?1_yf<&uT z4nX=>62{eM#Bz9P@Je}t^Zt+rHR8(8Hn0OXnC)mIe}ZaqsSus?%7>dAs@m;G)|ng> zZf|23CS@u23GiAC+`QczPoT7hon;l_8D2UeVFqYCI};yhzlXgQ=^)%tP=exe(Os5q zPJDH&lTy_+L+^|4_y%F?G7mazVn*fS>!)F~t`nu=DpQO!t)tb!s3Xrg=^ zl4QJtj-oGz^E+k^1)>IMwvkgDpRXuL&y6I4qE}r|Qv@J~6iR5zUqD$iW@s0fy%|?d zXxEcdbU5&GZt4>1oX91W7|F^(j*-aGYCJP{JziK7zohP| zqi4ztD?oPMPZ`Bv(nTAMf|ihV!_C^4(W8 z8<1{A_eG@fL%exB=f@W~1||UpI|NlE-2leGXIE~}TyBe?4e0rNRIk0O^bbgTN(H*m z-uPB(gOrz5$DOh^$cA^@p-TW>b%&Ubw26E~z=rUvIOyJX=tRO;-GaYrk^O>KD@`wE zpLXL&KBHRMR`{~l@T9U6f>G5>d}_8KG{$3laX^svxJFAe-AK2TBIr#sj1PAIV7NPK z-ES$oC9+~sJ?qZN22XNLBuT&-7}|Wcw67^lJ?zosQa6fc?0!4)UhU_E_03B2T1$he zx}8mK8k506PD_Dj?X(~!mrp;1XD@zL7s7*(Rc=faltAjgrCqXrR~L2`?$%%}Pj`D~ z+h0XSn(p9rb=rDeosx?!2ofsbZU{@F>Xj!I?ow2>3bdufIIzV^xCDnQknKWK) zn)}%J#I{UJ%E|NS+UeAMrn4q4)%B5rE1z7R-fy=_@-p@^ej>{#l4R{O8P#P-d1{vS zz~ErvX%0g@+os#%1(0) zA(OX6Sk@I&_OxYV3etM)bIy`Jl%zlDCNH~IMkf?@g}#7ewW!vEeT&lO*bdk4Tyi{(wE5)x1yz zupZGxHS>0rwqwAQlxI@kDO%`svT0jqT<^zA*O8n(v>LbQ^ujN^;SQ~rfzkZ^3_`^t z_(@$g7jM(L*bvh|JPy1(MG~qeij$WKU3HldTRPud3z_!B8LJSz6fH@tuZwF~T4L1E zlj~9ZQ7E!cv6KPrOr`jC8i5m<3elxcN4iyVttRantJ@Afn(wM_46V`cCAfTxsKOb0 z92jaoiinKh8?FSuf8kp>-=|a19{J*7)iT0BTFAoGi_g2YY&rOw|J_g zGLrm>K=yyi6*W2K*u=9=grV{e^gofL>^ko_lh{};UimXSq|r$k8McA}S%~(5u)=J@ zfD9*;0YAN1dySY|xhK8N#T1~ys0-a8+mt6w_!86z;WAkotHzXMPrSB*E^OClHQ#35 z^cy*X)&0tECr(@Y(%1PA8xL81BdXntcC*6=<3)VcI9A6yjTuei7-OqB2T58^Nkxnr zl+%4;78toG*c8#!gosVdN$4#AH~BKEmi*JnxT?m22hfes z*NLRUIk1SK1vq!Y_Ss;typd~W8Mqu9$E?5Fksb^P>(bW?*EWbx0B|D{e!IG z?r72Ot6gG&dzcJ;JZ*s8X3xOp6w>lbj`E%@_QZFUU6Nzq1E_?sKzwy6&!j3jmq)ko(;1wf|JUS(0A&SHDFX`nUMe7;ihL?2i$KtMq1_n W2|smZOswlgILJpFX(^juTK@t_$%RG$ literal 0 HcmV?d00001 diff --git a/tests/data/sample_birthdays_invalid.csv b/tests/data/sample_birthdays_invalid.csv new file mode 100644 index 0000000..fec398d --- /dev/null +++ b/tests/data/sample_birthdays_invalid.csv @@ -0,0 +1,24 @@ +First three rows need to be removed +headers need to be reset +and dates need to be normalized +DATE,FIRST NAME ,LAST NAME +2016-10-15,Shaylynn,Eallis +2017-1-18,Patricia,Eefting +2017-3-1,Karrah,Couser +2017-3-17,Rhetta,Price +2016-12-23,Alexandros,Farrand +2017-5-1,Ado,Matejic +2016-10-28,Keene,Tonna +2017-1-31,Helena,Aiskovitch +2017-2-11,Leigh,Butner +2017-1-19,Perle,Work +2016-11-16,Delora,Pavolillo +2017-9-21,Marshall,Leall +2017-4-28,Olwen,Mullin +2016-12-27,Nerta,Enrique +2016-12-7,Ashlie,Bracey +2017-5-18,Dode,Ritmeier +2016-10-16,Agace,Kew +2017-4-8,Beckie,Dove +2017-1-20,Filippa,McPolin +2017-3-19,Madison,Sheekey diff --git a/tests/data/sample_emails.csv b/tests/data/sample_emails.csv new file mode 100644 index 0000000..a1d43f2 --- /dev/null +++ b/tests/data/sample_emails.csv @@ -0,0 +1,21 @@ +id,email +1,cjozsika0@github.io +2,smegainey1@twitter.com +3,eyesson2@mail.ru +4,aigoe3@usa.gov +5,tkalinowsky4@tamu.edu +6,eprime5@paypal.com +7,fwinchcum6@drupal.org +8,rrivilis7@nationalgeographic.com +9,mbrisley8@creativecommons.org +10,dmacavddy9@stumbleupon.com +11,lbromwicha@hostgator.com +12,kvargab@fotki.com +13,hlintsc@ning.com +14,hravenscraftd@nhs.uk +15,dtrencharde@chicagotribune.com +16,zkurtisf@ucla.edu +17,aindeg@wordpress.com +18,nadaneth@eepurl.com +19,kwerneri@msn.com +20,nmeardonj@springer.com diff --git a/tests/inputs/excel/assembler.source-spec.yaml b/tests/inputs/excel/assembler.source-spec.yaml new file mode 100644 index 0000000..b6fdc2b --- /dev/null +++ b/tests/inputs/excel/assembler.source-spec.yaml @@ -0,0 +1,16 @@ +meta: + dataset: excel + findability: published + owner: datahub + ownerid: datahub + version: 1 +inputs: +- kind: datapackage + parameters: + resource-mapping: + birthdays: ../../data/sample_birthdays.xlsx + url: datapackage.json +outputs: +- kind: zip + parameters: + out-file: 'excel.zip' diff --git a/tests/inputs/excel/datapackage.json b/tests/inputs/excel/datapackage.json new file mode 100644 index 0000000..4f59641 --- /dev/null +++ b/tests/inputs/excel/datapackage.json @@ -0,0 +1,27 @@ +{ + "name": "excel", + "resources": [ + { + "name": "birthdays", + "path": "data/birthdays.xlsx", + "format": "xlsx", + "schema": { + "fields": [ + { + "name": "date", + "type": "date" + }, + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + } + ], + "primaryKey": "date" + } + } + ] +} diff --git a/tests/inputs/multiple_files/assembler.source-spec.yaml b/tests/inputs/multiple_files/assembler.source-spec.yaml new file mode 100644 index 0000000..8224672 --- /dev/null +++ b/tests/inputs/multiple_files/assembler.source-spec.yaml @@ -0,0 +1,17 @@ +meta: + dataset: multiple-files + findability: published + owner: datahub + ownerid: datahub + version: 1 +inputs: +- kind: datapackage + parameters: + resource-mapping: + birthdays: ../../data/sample_birthdays.csv + emails: ../../data/sample_emails.csv + url: datapackage.json +outputs: +- kind: zip + parameters: + out-file: 'multiple-files.zip' diff --git a/tests/inputs/multiple_files/datapackage.json b/tests/inputs/multiple_files/datapackage.json new file mode 100644 index 0000000..465a000 --- /dev/null +++ b/tests/inputs/multiple_files/datapackage.json @@ -0,0 +1,44 @@ +{ + "name": "multiple-files", + "resources": [ + { + "name": "birthdays", + "path": "data/birthdays.csv", + "format": "csv", + "schema": { + "fields": [ + { + "name": "date", + "type": "date" + }, + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + } + ], + "primaryKey": "date" + } + }, + { + "name": "emails", + "path": "data/emails.csv", + "format": "csv", + "schema": { + "fields": [ + { + "name": "id", + "type": "number" + }, + { + "name": "email", + "type": "string" + } + ] + } + } + ] +} diff --git a/tests/inputs/needs_processing/assembler.source-spec.yaml b/tests/inputs/needs_processing/assembler.source-spec.yaml new file mode 100644 index 0000000..258c15b --- /dev/null +++ b/tests/inputs/needs_processing/assembler.source-spec.yaml @@ -0,0 +1,26 @@ +meta: + dataset: single-file-processed + findability: published + owner: datahub + ownerid: datahub + version: 1 +inputs: +- kind: datapackage + parameters: + resource-mapping: + birthdays: ../../data/sample_birthdays_invalid.csv + url: datapackage.json +processing: + - + input: birthdays + tabulator: + skip_rows: 4 + headers: + - date + - first_name + - last_name + output: birthdays +outputs: +- kind: zip + parameters: + out-file: 'single-file-processed.zip' diff --git a/tests/inputs/needs_processing/datapackage.json b/tests/inputs/needs_processing/datapackage.json new file mode 100644 index 0000000..c81f48e --- /dev/null +++ b/tests/inputs/needs_processing/datapackage.json @@ -0,0 +1,27 @@ +{ + "name": "single-file-processed", + "resources": [ + { + "name": "birthdays", + "path": "data/birthdays.csv", + "format": "csv", + "schema": { + "fields": [ + { + "name": "date", + "type": "date" + }, + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + } + ], + "primaryKey": "date" + } + } + ] +} diff --git a/tests/inputs/single_file/assembler.source-spec.yaml b/tests/inputs/single_file/assembler.source-spec.yaml new file mode 100644 index 0000000..2b07836 --- /dev/null +++ b/tests/inputs/single_file/assembler.source-spec.yaml @@ -0,0 +1,16 @@ +meta: + dataset: single-file + findability: published + owner: datahub + ownerid: datahub + version: 1 +inputs: +- kind: datapackage + parameters: + resource-mapping: + birthdays: ../../data/sample_birthdays.csv + url: datapackage.json +outputs: +- kind: zip + parameters: + out-file: 'single-file.zip' diff --git a/tests/inputs/single_file/datapackage.json b/tests/inputs/single_file/datapackage.json new file mode 100644 index 0000000..1a0a5b9 --- /dev/null +++ b/tests/inputs/single_file/datapackage.json @@ -0,0 +1,27 @@ +{ + "name": "single-file", + "resources": [ + { + "name": "birthdays", + "path": "data/birthdays.csv", + "format": "csv", + "schema": { + "fields": [ + { + "name": "date", + "type": "date" + }, + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + } + ], + "primaryKey": "date" + } + } + ] +} diff --git a/tests/outputs/csv/sample_birthdays.csv b/tests/outputs/csv/sample_birthdays.csv new file mode 100644 index 0000000..e22488e --- /dev/null +++ b/tests/outputs/csv/sample_birthdays.csv @@ -0,0 +1,21 @@ +date,first_name,last_name +2016-10-15,Shaylynn,Eallis +2017-01-18,Patricia,Eefting +2017-03-01,Karrah,Couser +2017-03-17,Rhetta,Price +2016-12-23,Alexandros,Farrand +2017-05-01,Ado,Matejic +2016-10-28,Keene,Tonna +2017-01-31,Helena,Aiskovitch +2017-02-11,Leigh,Butner +2017-01-19,Perle,Work +2016-11-16,Delora,Pavolillo +2017-09-21,Marshall,Leall +2017-04-28,Olwen,Mullin +2016-12-27,Nerta,Enrique +2016-12-07,Ashlie,Bracey +2017-05-18,Dode,Ritmeier +2016-10-16,Agace,Kew +2017-04-08,Beckie,Dove +2017-01-20,Filippa,McPolin +2017-03-19,Madison,Sheekey diff --git a/tests/outputs/csv/sample_birthdays_invalid.csv b/tests/outputs/csv/sample_birthdays_invalid.csv new file mode 100644 index 0000000..fec398d --- /dev/null +++ b/tests/outputs/csv/sample_birthdays_invalid.csv @@ -0,0 +1,24 @@ +First three rows need to be removed +headers need to be reset +and dates need to be normalized +DATE,FIRST NAME ,LAST NAME +2016-10-15,Shaylynn,Eallis +2017-1-18,Patricia,Eefting +2017-3-1,Karrah,Couser +2017-3-17,Rhetta,Price +2016-12-23,Alexandros,Farrand +2017-5-1,Ado,Matejic +2016-10-28,Keene,Tonna +2017-1-31,Helena,Aiskovitch +2017-2-11,Leigh,Butner +2017-1-19,Perle,Work +2016-11-16,Delora,Pavolillo +2017-9-21,Marshall,Leall +2017-4-28,Olwen,Mullin +2016-12-27,Nerta,Enrique +2016-12-7,Ashlie,Bracey +2017-5-18,Dode,Ritmeier +2016-10-16,Agace,Kew +2017-4-8,Beckie,Dove +2017-1-20,Filippa,McPolin +2017-3-19,Madison,Sheekey diff --git a/tests/outputs/csv/sample_emails.csv b/tests/outputs/csv/sample_emails.csv new file mode 100644 index 0000000..a1d43f2 --- /dev/null +++ b/tests/outputs/csv/sample_emails.csv @@ -0,0 +1,21 @@ +id,email +1,cjozsika0@github.io +2,smegainey1@twitter.com +3,eyesson2@mail.ru +4,aigoe3@usa.gov +5,tkalinowsky4@tamu.edu +6,eprime5@paypal.com +7,fwinchcum6@drupal.org +8,rrivilis7@nationalgeographic.com +9,mbrisley8@creativecommons.org +10,dmacavddy9@stumbleupon.com +11,lbromwicha@hostgator.com +12,kvargab@fotki.com +13,hlintsc@ning.com +14,hravenscraftd@nhs.uk +15,dtrencharde@chicagotribune.com +16,zkurtisf@ucla.edu +17,aindeg@wordpress.com +18,nadaneth@eepurl.com +19,kwerneri@msn.com +20,nmeardonj@springer.com diff --git a/tests/outputs/excel/sample_birthdays.xlsx b/tests/outputs/excel/sample_birthdays.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4c5d646b5e858f8fb6b942faddde972da3d53b97 GIT binary patch literal 5563 zcmaJ_1z42bwx$_Cx6Y%Up`=?tx}=6~Mg$2#Y3ULqC1mLC#t{@@Bt3)w zxks+|9@qCgduH~t*WUBJEB0Eix-vQj843UZK&dlPQbxHUc*yT&?$(YTAkOQtJo%Fn zIyX_&K|oBRzfV2647j%K`Dl(pKyc>Jm4!eS7qwS_e;B~9v5i0FBj#rVTvXv1Y6T9NPK``kpn$! zC06?+qowomwrYzb$mi5w6~HoORK4=t4ce$(8<*MPCE66_4izWe4zIZK5NZU-C0w;8 ztLzTiO}>XQ+liy7D`Nv}%0e4fkgqL*hJyV4-+&=Pt^m7Osk^(ldVnlkT{)jRIYJZF z96oT}>Nz+k>-?TAh-1Xk<2FkrLS8S;?~PYc{*Lx4syy}k{HOVsa9;s2v9BrLXS}P6 z1tlNPwY=cfQNeW`V}}!n`Q9`A$j3^8TN3`2z|TEWrv3ubCVLN95UC9gRTdMXi^MNd zr0tB+ww|}P{RYDw;-P(=$Mj%zc5pUSdTz=~-a^t9SDTts;+yT`R$; zp>;XqCkXRV!O0a@d&YbbhcPiH<;u0@{^`&w$x%c);WTL-*u1n@tG?kz^tQXpYKJ3 z&MsV%Mq>~I77Qe0sPwoWi&0)15fmkAl{I8wpsX;8dMmp`k6Vwpl23HsFr*$WZac0p zx>0F#%8=Z`@dpK)4O&SeWMQJ&a;m8Leau%iT@(VGrXF(r^oN74wmWZN#83{8M}}ok zPXDl6E0%mTd;DV5o@GP4W*$n=_o1vBl9QJ!1)e~8FNbax$807$rnZS=wXgf%>?afU z@Mguxu~U0XK4-IatavQzmW$@h7T1zpIvmwrs^E{(iH$#Ygurl6H ztt+w@8F=pLND%3e|JDdRmm-dmk@1*91hL1e5Y)EENIwCu1^03;ZV8}$I}bZ*490iT zVg7v2Ln9fMvoi-hbgbxnS^3ri)J&Ke835ykH}kh32v<%7$1)pdaE-c%X}iSpcrMYd zZKVp`DR)BJY2~j@eH&@3r;eb10^Y~P-NDku#o^js{{(-M%C&n(9$b>kA$kOgR z;v_3Cf1vhitc`Z_Cr{P=4CJIS!nOW9nld_l)UI&6#~Z)*yj%JfR#^c?L}!mTpe^L` z3w)JYcer3_#f~wd>K@6PR??YZN=1-i?UA4Xr)5{7h)Lmql4|MnKyw&e12p(eaXc4w zm|Uye2>qQiyLyX9*Uu`eX?y~+SN?fi{rEV;?zBk{VVVh+E9@X1?C(yen_`>dba!Xy zKURk2Y4ZEvOq$uy9{Az8;LMk+2Y$X_IQrI*C0ky(wD{~7GHQpXeq+e@X8bEMg#RFO z6O(wY0q1!xqBaCO#*)(mZ{zu@JB>9F7(0=IDkG`9HTqhG5|9S6O9X_4T?Iz#m)LFa zV{3S5eC@Y#JR0R^+uN!FBq z!WVX$THNZa^Y&r(*G+f#u*hVUoSs0y*-#f6^wuhJl&P_VQ1i8EW0tA=L&HDKcWTIl z-g*VTmbAdOlyGM_sV9lI9+YKe5cR_MZMA=Nlx@dsyftLjK6dq_HE#P{Iz`iaNQv`- z`m3}@0R!#QkO|_9WM52qfNf`^+9|pey|pH_dp(31mHAv`DVyM{D$Umda`&d;_;el{ zJA&yaGGww>mHqX1C+I&p9*BzL>Fj6?*~w-IY^R7kibZ2LTmkdmMYM|$wcuLuf6e#lw(3jef*JM}#HQDRHVuiBV1W&f&s~PtPBpL`dTfmgG!V z7qz&d=kgm!VeyffVKUvr(b7JgXkTo4%aip>hHQp$Mc(JSt9C?@IXRtCupc&w&SR;) z>2FgE&QfGH)wjPh+zp70yh_AoX`nRx+(bIVwvg+Q$lfyIfsiZ@&G}WexRq2T?ohUbSin=l2yi^#-!5X z+8L~4ML%yZofp_g1t@$|4^Bx}YU;WYx*GXZ4W$Y_(&>cJ>trhuufdWMh{>xTXZA}y zYsl;n?@^$kBk3x7dB5Uy?>4MOA}3~iK36j=80h)j!|Eg9i<0$R3YmKY$+(RvN;*X7 zCTYIS+Nlh41Z<50Czn}zL`<$^&#k#ZadOqhxTI@t%=djxlY<*(G3cXOl5zA;A7ko- zTODL{j$0hc(`@ni768*Mq0XUDBFr;tF%p6gm)T0VduNm}O){*kRF%1c^-{T{Mfm_{ zeQ0;SH_Dm+!`Aj09_|tBTFVF)mS}G6XdzGaaZ_zZZG-o>-7fQqu7_oXmk&jkV-P;sKUPa*1xAhcj#Gj6 zo1l;Nr5+izyzeO5FF~L-E17XBX!w{f>mR6$CuI|`Dw_@$MNrih#=9Cs-73Tz;n5h4 zfA7GEAFt9T?jVG#K=a(abmrkpIEKw|VAcF=)r`^?Uv3^xYv;oI5VRLB$XF?X-m+l2 zQqm7%P^?@zzu7|h8zHpiB_CaPm zHsdz!#+ZGZaZVp67;!& zyzz?C7>iz!(ODGUfV=Om@#<0m zwem(SR!8DpVl$dcW6@e>AuiDqe)h_|Rs(D8ikD?;uE~{)2|G44Kt@XAfiGRSVtb;nOn05kcdN1k@_314$iBUEX-#CAD4)SXjDr370UY?Ppd>d2-fxW2K z<11V{d-C%gzRoGu8~SP6RbhPy?LgwNyXfe!*WAyuVM9}Xtq58(!g=q?gfvm?fCu1R zabg|)EWf%*%$#G#?1_yf<&uT z4nX=>62{eM#Bz9P@Je}t^Zt+rHR8(8Hn0OXnC)mIe}ZaqsSus?%7>dAs@m;G)|ng> zZf|23CS@u23GiAC+`QczPoT7hon;l_8D2UeVFqYCI};yhzlXgQ=^)%tP=exe(Os5q zPJDH&lTy_+L+^|4_y%F?G7mazVn*fS>!)F~t`nu=DpQO!t)tb!s3Xrg=^ zl4QJtj-oGz^E+k^1)>IMwvkgDpRXuL&y6I4qE}r|Qv@J~6iR5zUqD$iW@s0fy%|?d zXxEcdbU5&GZt4>1oX91W7|F^(j*-aGYCJP{JziK7zohP| zqi4ztD?oPMPZ`Bv(nTAMf|ihV!_C^4(W8 z8<1{A_eG@fL%exB=f@W~1||UpI|NlE-2leGXIE~}TyBe?4e0rNRIk0O^bbgTN(H*m z-uPB(gOrz5$DOh^$cA^@p-TW>b%&Ubw26E~z=rUvIOyJX=tRO;-GaYrk^O>KD@`wE zpLXL&KBHRMR`{~l@T9U6f>G5>d}_8KG{$3laX^svxJFAe-AK2TBIr#sj1PAIV7NPK z-ES$oC9+~sJ?qZN22XNLBuT&-7}|Wcw67^lJ?zosQa6fc?0!4)UhU_E_03B2T1$he zx}8mK8k506PD_Dj?X(~!mrp;1XD@zL7s7*(Rc=faltAjgrCqXrR~L2`?$%%}Pj`D~ z+h0XSn(p9rb=rDeosx?!2ofsbZU{@F>Xj!I?ow2>3bdufIIzV^xCDnQknKWK) zn)}%J#I{UJ%E|NS+UeAMrn4q4)%B5rE1z7R-fy=_@-p@^ej>{#l4R{O8P#P-d1{vS zz~ErvX%0g@+os#%1(0) zA(OX6Sk@I&_OxYV3etM)bIy`Jl%zlDCNH~IMkf?@g}#7ewW!vEeT&lO*bdk4Tyi{(wE5)x1yz zupZGxHS>0rwqwAQlxI@kDO%`svT0jqT<^zA*O8n(v>LbQ^ujN^;SQ~rfzkZ^3_`^t z_(@$g7jM(L*bvh|JPy1(MG~qeij$WKU3HldTRPud3z_!B8LJSz6fH@tuZwF~T4L1E zlj~9ZQ7E!cv6KPrOr`jC8i5m<3elxcN4iyVttRantJ@Afn(wM_46V`cCAfTxsKOb0 z92jaoiinKh8?FSuf8kp>-=|a19{J*7)iT0BTFAoGi_g2YY&rOw|J_g zGLrm>K=yyi6*W2K*u=9=grV{e^gofL>^ko_lh{};UimXSq|r$k8McA}S%~(5u)=J@ zfD9*;0YAN1dySY|xhK8N#T1~ys0-a8+mt6w_!86z;WAkotHzXMPrSB*E^OClHQ#35 z^cy*X)&0tECr(@Y(%1PA8xL81BdXntcC*6=<3)VcI9A6yjTuei7-OqB2T58^Nkxnr zl+%4;78toG*c8#!gosVdN$4#AH~BKEmi*JnxT?m22hfes z*NLRUIk1SK1vq!Y_Ss;typd~W8Mqu9$E?5Fksb^P>(bW?*EWbx0B|D{e!IG z?r72Ot6gG&dzcJ;JZ*s8X3xOp6w>lbj`E%@_QZFUU6Nzq1E_?sKzwy6&!j3jmq)ko(;1wf|JUS(0A&SHDFX`nUMe7;ihL?2i$KtMq1_n W2|smZOswlgILJpFX(^juTK@t_$%RG$ literal 0 HcmV?d00001 diff --git a/tests/outputs/json/sample_birthdays.json b/tests/outputs/json/sample_birthdays.json new file mode 100644 index 0000000..786ff0c --- /dev/null +++ b/tests/outputs/json/sample_birthdays.json @@ -0,0 +1,102 @@ +[ + { + "date": "2016-10-15", + "first_name": "Shaylynn", + "last_name": "Eallis" + }, + { + "date": "2017-01-18", + "first_name": "Patricia", + "last_name": "Eefting" + }, + { + "date": "2017-03-01", + "first_name": "Karrah", + "last_name": "Couser" + }, + { + "date": "2017-03-17", + "first_name": "Rhetta", + "last_name": "Price" + }, + { + "date": "2016-12-23", + "first_name": "Alexandros", + "last_name": "Farrand" + }, + { + "date": "2017-05-01", + "first_name": "Ado", + "last_name": "Matejic" + }, + { + "date": "2016-10-28", + "first_name": "Keene", + "last_name": "Tonna" + }, + { + "date": "2017-01-31", + "first_name": "Helena", + "last_name": "Aiskovitch" + }, + { + "date": "2017-02-11", + "first_name": "Leigh", + "last_name": "Butner" + }, + { + "date": "2017-01-19", + "first_name": "Perle", + "last_name": "Work" + }, + { + "date": "2016-11-16", + "first_name": "Delora", + "last_name": "Pavolillo" + }, + { + "date": "2017-09-21", + "first_name": "Marshall", + "last_name": "Leall" + }, + { + "date": "2017-04-28", + "first_name": "Olwen", + "last_name": "Mullin" + }, + { + "date": "2016-12-27", + "first_name": "Nerta", + "last_name": "Enrique" + }, + { + "date": "2016-12-07", + "first_name": "Ashlie", + "last_name": "Bracey" + }, + { + "date": "2017-05-18", + "first_name": "Dode", + "last_name": "Ritmeier" + }, + { + "date": "2016-10-16", + "first_name": "Agace", + "last_name": "Kew" + }, + { + "date": "2017-04-08", + "first_name": "Beckie", + "last_name": "Dove" + }, + { + "date": "2017-01-20", + "first_name": "Filippa", + "last_name": "McPolin" + }, + { + "date": "2017-03-19", + "first_name": "Madison", + "last_name": "Sheekey" + } +] diff --git a/tests/outputs/json/sample_emails.json b/tests/outputs/json/sample_emails.json new file mode 100644 index 0000000..17eaacd --- /dev/null +++ b/tests/outputs/json/sample_emails.json @@ -0,0 +1,82 @@ +[ + { + "id": 1, + "email": "cjozsika0@github.io" + }, + { + "id": 2, + "email": "smegainey1@twitter.com" + }, + { + "id": 3, + "email": "eyesson2@mail.ru" + }, + { + "id": 4, + "email": "aigoe3@usa.gov" + }, + { + "id": 5, + "email": "tkalinowsky4@tamu.edu" + }, + { + "id": 6, + "email": "eprime5@paypal.com" + }, + { + "id": 7, + "email": "fwinchcum6@drupal.org" + }, + { + "id": 8, + "email": "rrivilis7@nationalgeographic.com" + }, + { + "id": 9, + "email": "mbrisley8@creativecommons.org" + }, + { + "id": 10, + "email": "dmacavddy9@stumbleupon.com" + }, + { + "id": 11, + "email": "lbromwicha@hostgator.com" + }, + { + "id": 12, + "email": "kvargab@fotki.com" + }, + { + "id": 13, + "email": "hlintsc@ning.com" + }, + { + "id": 14, + "email": "hravenscraftd@nhs.uk" + }, + { + "id": 15, + "email": "dtrencharde@chicagotribune.com" + }, + { + "id": 16, + "email": "zkurtisf@ucla.edu" + }, + { + "id": 17, + "email": "aindeg@wordpress.com" + }, + { + "id": 18, + "email": "nadaneth@eepurl.com" + }, + { + "id": 19, + "email": "kwerneri@msn.com" + }, + { + "id": 20, + "email": "nmeardonj@springer.com" + } +] diff --git a/tests/outputs/zip/excel.zip b/tests/outputs/zip/excel.zip new file mode 100644 index 0000000000000000000000000000000000000000..7f16d9d15b28e5daf152d9fa5121d959bc1e0b92 GIT binary patch literal 6850 zcmZ{p1yG&KvbGm)!Cis`cP9i15Foe(cNXsMuyA*GSx9hqC%C%=cXxNlWuO1Q_v~9| z-*4)xsi~TNYO3Fv?y7EiDJWe3Wc zuN!|dwfB#2`&3~(@F5a6_r=u75WgGvNyZf5;m3(uYS0F^a_-3q%7u!vllO3HUVwLQF+3y1VG4cVA>l}5~RIKcvA2XgxA=L^ZN>sC2g$U~{w%K^wMACCta<=l9H?dWT zzeNI?14`!&6IX>=hwfR7dF`;Aj4XX*)J=O3uj)l7=U^*4|H!Y2HW2c2z$!^HpiL)jovRTclH1Y!que#~W(TCc zzOG`&gsUnplH}Jc3%9P)JilX}eiUl7>vof;*yu#)Mv51D-)xV3=~tM1nLFYt(jSCG zB@9#|xS_iF;>-AXR{SZ9{F@sHL+K6OYW zB!;)%aw=Y?5tswDM10}fd!5blw$Syj4=k$-tXT$5cd2FOEQ|YW%N(F#I**A@x7k%i zeALB8zOIMEW@x>a!wC2l0Pqs%_& z0E+W#tT<}{9rLCS63ChZw^0$f2bpHnK$TWFxUW%zfV4pHH>tz6cA zh2#(sM}HUrXS@NMRTv&k?;iS!_(y_V$M_P!=fdio6x7VBhf(pH!Q z=DPQti*i&an`2?I28DM?gHy@@csJpT7^@$9NeEWdcDdY#MmCxY>yz|JO@uQj_ual% zTL+;%>KV0VYp5DBvt6vr#F9>v{V7LxWfCQ#B;RHYgx8N&CM&Xvj`f1nK|0&jTEP%} zb|!Y7#fv!4?|FI53jWTSaVelQ8 zpK%V|ztqarK2M}`EL=fomZSH^zO~wMJ%WE4*aT^=bdpqwUgTqV=Z@FcDi% z#PQA*YUvwO=P%V_r(mQcl@mP7lC#)0Gs;Ya0wrktd13?tTMcK*uoNs5n&Ko`RF2>- zPC@^5tN{mw;Gz2A$jS9DwWW=&j<01kY5mkJ9=q8CbJ5Z4C$i}`Xa%Wy&D|(lU2kWO zEwl}^vMw$`Zw5wcSyY{<3^L3uXF>NUHGQsEO`ta;9^N1uZ{ zZTdUegdb4ZKYt1RO<f8Wy_2jrf)SSZ4q1Cux4JRG7W?oX{(5! z+JdrPomc28tUyChaQ|=Fh(uICd(&4S3B4GRe*ODP1Gpm_H<*JB}d6<7sk&vlh&F|kGG(SA)~<5L4wIgRax ziJyub?ZFA$Bt@N)9#|}sOvtIE*=li{JJw;?5e6nvcn}4Ep69HkhZz;oO7Ga;`1X z3a@}vfi2L%c&U)fNcQrR(i^2rHlD6wtNG@r7jS8z2#0)FN;KF#sBSdm7r?JdpEn(T zJ;}exK}wZRzsDN7C@jgw6mZg?Lo$W=7N^bk;c(_ajKNVvjV%+SFf0LY&heaxsqNwB zv?uhgub`1nz0+{e7*BNvRMBcKhLsvW zgAelzj6dK@T1tQ8>f2xtHC6;Y^Y*OzEDV;4SSGLzS;)?QuD>bY!g}-`BpdHfXv}($ zrx9m&BS&Iz6rlOh&@oDrXn4l*@(~9x!;mwcBmZfX-JBJ}4>fsFvdP0FfmMdBtV}7f z+3)s8bH3;!zyxIK((!pX^-s_I9X|0H`iaH}JWi6S^{FYD+^wCW)pslZ`^Htaz)tvI zH3w9uBAXADLZR?8Y!S4!Q8rEJCkGzjk^u139N8kP=#;Rlq%a?o6h%-QVOG(lqu`3l&tdns z5{RtQ^%8DQ;YE6%{|X_goTfS>4JZ}7UTJsS+mzxC8ne}#d$F90n8ww$CYH0+qxD^@(QElJjjAv2Or?Q%AG_yvyOp*S)8FLbY_KQ{YdZZVA!Qj}smM#$e_9au zNQ|%{W1LaQKXj0>@AW`4(`aJa>%H!Iyxxes+mmqXQyaI^6d&EQF&xO<`|H9I?az~4 zcNcDj(PalzCwJ{9G%Pze1;>>Q?UNd%-5##>SO*&LxT+c23fJpYsz($8b(6 zKGqb_X4mVqArX0B)9tk}D%#F+KSw0WEl7NP>zRoTXtQ80)CPHKryDU?+!}FnX0r1es`e!tXH? zo)32WThB2u%<4N)tWk31HFMyU8{uqW@Nb$_j#_Y5%~!apH|(WpHK$Y=2tyeL z^g&m{&$ZhxBrQh!cc4)ik=n?9cGJ4jGybil#!`Pd{U$F{kFy-36mlx|)*en|`nrr1 z1PX$^=nGT-U1oFP8ZC`Cu%Xo~8_ni~xE}uiS2oO3G!te<4KBFPJ(JH~V~fW#g1U^nR^2DL(&~MMlZqUY_51vG!Kxqw+Lh%@T*_<|NO! zuaFkU>_c${tYK5nPff6Ck-LU)n)NKq+s?~VVQ1z^t$nCTEQ@6SYIds8K*gi%*pv~8 zJ_%eIt)dmL)7S;%ds70nK;IdHlVK|psJYKk-Q>wDUM!(_ZT%OcxUQ&+sFD$rkSs zkx6gZ3E*nhQ@T&3G`1kbm^pfcpV5kWN=jvZ+sZ9Abt0UL za2%wgvG<`G9w3wG=RKf#vghtJ*pnwU_c11H?!9~r^SfCQ|3u?H2g4}}uouq>Xn281 z;4}zw_FYW3+YNAT=N1iy>Xa0FlcOubwS%Ds+Ber~FpbREGJ!Z14xQu{xG{G?HuTK3 z)4cQjjV2%iYDc3#XC8dEh*OefWpD+L2hiW~;C_Yes~kAQS+bSdZ7b91ba;Uw<-9mh zN!5oc=w9Zj39S-_?>C1%8_Nx2ahH;bGDb z=ytQ9%dRt$4=!Tm&x}TmA}S0eCaYf-HzX=TDG<1!gwrRI?l=RBgZb06j=o<;tlEgJ zmA+v*qg#+uit)ndeh337vkOemO&Y z(YP`YRbI;nTDh@lVmu-SzA<1?@N#8_I3<>jq2S{bsDer$K5RwDzdH}thc=fEAL^W( zfpd+;hHj2fb0 z%Es;qGGNZU+#!6W&5q~#1u{N0+$UHcKkccwou`@0dhH<-8EKY93;u-iMjW&12dD`7 z1zqC0i=?WsHVwA~Fi+l6WV}`}?ffAn?n?TIR}?8zmdolm;`r6|kSDG-SY;b0Q|BS@ z^GePw8%tVewos8Pv_Ns`-j8s5xXn4oKhxCeX0F)3$mFkZUgMx zfEV=DBMY5KgL-9t*in)Ak1E=LKv1{$y z$Q;h6cIadqbY*UXXei+^G4xn;&z{7MduC7iV9$}Nc&sLgqYcsXL%Qu=7}iJP)&f;m zQ8PYD2iPDyH>PTbK$!{oTb}a^dB||+(#t<< zL|dX{ed8hF(W)}xq4;-nk0i=83S&E1H|@!{&s@llZ#e$qt$=ptOSC@4=~jd^h;OV8 zq+QN0?UlJz95;NhB7#2nB-flvRY;WXIv>iS^yd3U9hacJ|Iq}4aHNmk+c=nmhnBoY zg@qJp>MNQ`iVR-NW&qnpA+ALwx&!@veq(bABnO^T4zFSQRX&2Q9|4&bm6X9;O5924 zZ^O*?OK>H2}6?RcAS96V2#XnUIAz zZytQrIkgKw3wj!nj;>Q?+s10C&tbRNkY5&~^HwhQNV)N-htR4pJp>_anFhvDY*5!L z3C5B0bX%2o4Cl&6#wAd_6oV~ld`@)XX^i*_U081v-z@I5u3HP$(b{@3J@2X=&eV^vq5KO3DRtKKIbg z*~oLP{K@WmGutg7k9{?!%)ao3+Yw+~QGz|R!HEj}Q}|p=Z=kRJC&It2FE+a2G4Kgj z{(tIbXc3Z9^6fWNA`k!oEhGSd^B;9HT>}SaruSbaR>-ECLGnxL}rxs^g?Tf8rppi48- z1jGf4Q^ff!z!L-RLgNt>v=EeGz%%o*gbYEehy-n{68J;ulx&1@;0I)Y45EpKTU6u4 zX@!QJ*oUwWg343%_Nwx)(R?YsHez=L^<+Irf);1+t};E7 zXk`7}_H+`y@kd_^^=h+k0>F(wS_d?Ug|NPDGnD2h4ZIwob ze)spg+W{p1=yzRPdn0EvBNrxf2OI0~C>X%Mg+qS_939R2c)bhf{a%X1`%l1MM*p(3 zN>GsNX2A;lgSf~p(?CYrf$W>|E5Qc=wLitUwdzdA5Qe#YG{ zJ>AdXg9^-_c5G-83~^qwry`nl4g&+Bd7OJ@{eV=tGRV z3_lyy!1+WA?z6nGvq%hxX;KzOqAd3Xc6#cPZ21YwFl&meBVhD5VZYDC;vQIWv=0d_ zb^Yj*Wy-};-{rDb>FAMxJDZ&@40vCv#qq=U0R-^7i>8c=Dee(baPr_{X8e{VqA6T# zVM)>M7#ts7f|;hvv`E@IYNPR(cO!q*ooszybkMoYs63l0ZT?+UIm5=NN^?GX1FbIPuiOm=BpET}i^6kUlr>NygK@0s z(b9R>fbzac?Il{{*U#Ed9NK-($r!>%9d*7Q6PCO#o|Df(3GJlA9)jw>%j*^NkS3!g zxo9+*UN`-}c*>;7{wmlRo=vd4y;3>QEJL-Tq_y$hie0M`jS`|#7XA=KwYURH2Kawu zt#DJ_G&_=&=!cOG>zk!JJ1hwbYU;p)6B@WHWK%geRGhH3;$5Z3Q(dxLITADp_Gu@g zg>e&dJ@mW^Dmpb;IFoAr@I2z`iHn8Rxi2CpJNMR|Wp4fN@UX#mnim1Xuxa1_aZ+;Zg>^PJN5^C~$#jml!3PUxHoJgm}(rl<_ zIYY?{%2x$(pzw;^4Y#EGc{y(e+4MeI{+kIlr*PIy{f3CvM1C8^sl)O~2IN{6rmMC=cA712hAUE?J~T?8&IkF4jzsM@ z-1X6T?_X7_@N|Moyu*s<7!3d7H z5_f!*yq^7t4}xpb{`&HMVId%~p#JZ^%KNST*H`EJ*YpovmcIl3DZ%_da{$1{m;1d> z^S=TAoo@cl_@`j`Z${PoN&l1aze4BVVgD33{|&qRPuTy2(Z5sv=_&u4^7B27{&l3k YKWup^7}&pdz`PHY_n@tU|JUlj0155HyZ`_I literal 0 HcmV?d00001 diff --git a/tests/outputs/zip/multiple-files.zip b/tests/outputs/zip/multiple-files.zip new file mode 100644 index 0000000000000000000000000000000000000000..a3e8560b8c5223d2d286e1bbc680bdb7e6ab99bc GIT binary patch literal 4009 zcmaKv2Q=GjAID?WC~BmLqSW58wWv{Ad)KC7?-6^XsL|RhG)6TvRMZHiMq9HswJE7p zHCj}S;#Tpdz1P+IUhkWnoSfvG-}m{Q&-eF#p6{cpK|lxu001NaGnl%@^-_qmJRSf5 z#0LOqaR1std?CU%&QM<`J4ld^kgbouN~)q~E0{cb<3O-{6s|aLVl>FnVN|Imqiy6` zJduLoh~C^-jw`h&;1S+E$eXriefNBOzIQHF!S zgjQ)b<$7F4tC3ezJuAVATrLG;Bfwg5IecOB z3FPFSH162c3DbGJq`Pk<)-nvynh>e$tE;AHNoq^oj|su{lE+1cX5d-eob{#DD3NhC zm=a4IssfIe{^6_iZagDVxz&Hyhre*Vg?21qhTE&#=lbNsTyLmKUTF%{x3)>xc@gYu2*^iMhxHLR0i%bxY zEs{vaGM{gss;cOx4U~-i(VX=ntBCz8(d_L}it-Nb;AdgMRfDfuH0iQ9nsm{s`q}tT zt;H{Vpj;)k@l4~MnORBL%a0C_8h5$RaND#W_*>=&J`YwV zeW3{TYv%sO4D?kBa(4|ti$G8CTA8|kal!oP7chfh=_Cjie~TGv-dPk9F5D3!EIP2V zo6d`}88%)V`6Ft!(k%5y`)96eBz_f-3WF)@xVE3KqXy=sc!KnW2t)_QwEd@vJ2JE; z32m>tS_&gm_sZ15z#Fe5euPcSp923Judbo{dWY*La9-Se)13hSd%o}B=KJT2Yn{w^ z`mT-z+L<`o7KBtwD6>iT&n+F-tzd(wkuA&)F#9gjs03BcE-#Z2v}8WI@c7&wA!9}D z!AlOK1=aRc#uoELN0{U}x?+g~dBJ#_i)d0H1JlC<4AR~D`D(kNuSvd-N!t^5xq3xC zkedq`UVh3{SOtf=ewV}{5}o@EvsR)tHj&tn6l4z(9K2&e`JkK7NI7P1nlX~Vs$?!! z92AucvrT5|X)sP#aZGz)6rxRc-oU9wxDZC?miN{DYxjRLb@(3YfeovOf5M$J%txo z%n#&`3`+(?uCY%eIo3iguuG7wC^3!q!_~8Fwmi)B>M%w#IDwpX3#{c2Ff41M!{(NG zD{XG3JZp((POaWzhNHGHRUZ?Pqs|b&5Igq94-ts2w%EESMm?iOpGXPri`b!Gz&Epa zgq2T$lD28u$3j;QX8IyKFqO>I9f+zApIgBR+`;a4PWizXX_01WwKSKhd&7?!|&p( z4)mRXyIFm}ViY zHj@6^%*6ECYW;HV?jk>5;r*3z+CpzpDm8($yQAyM=GAYToa>wV#xrD}Js)3J?!FU= zHYBs$nLca-#Yt0l3oZ&2)|wzuOz(n6WzLPB7a+^laJ5$f?-hAlo2rYV;J1lp3`Ba7 z)JHe&NugCkfN8w@@P_IR>cP_6XpNgXkEi+gDIg!`n*szFu2!mo;+Fx8B#VT{(9{O$>5AY*(R1!`K|4pFt< zCwj$jNOgj8j{sa_wL~}go|(cyhKpfuuFvS<%Sl=T{SLB2mJ}`+%n^a8vXq8fFL~J{CW4r4d0ZTFVz1KN5`t;i z_8{_G+aj*z&}+1P^nqC+W|pdy9i94C$&eneVTX99j<4f9+njVE^u&4gga81zc;eY# zj{j|#WqPki6qsBktkA|XKYzo2@wm7F9LrsoLU@(*fzjcfgXBmD)@$vO^uxW4B-yRu z*VDdBVs|<7HVKoq4LkB|142eB%P!nM+VMF^*2-goB5lK8`Ml+L9eWfv%bQE7&Ko0B z$F!qgKKB|w)e;rLLmEO~%fE}n$S$<9eR00d!umv5ZnCK``5Nrv?hXk*3%^jm%41;g z%vD4;@R-o3Cpzgv`OfnKmB5AQ5i!C(CIX?P0$K}hNv*zk#+@f#W-Ebs?1s0bN~)9D zB)ltJdV(nuZkBsxafv4DhmTKzI7 zukXSQT|f4TP9569Idm7_F3kexPBU&drTWVuFNm!x#L@oO)3k16?urEikN7zUYq=WD zp?8QQE1y=1Fj$hN!A707%c|a|WghLRRWV}|)7_obi9u0Z*REfCxDmSSbJ@{XFhAQe z?~lgIR=NE~C^Pg(LOq8+?=DXqo~|l;)DtQ^e}ah`%bN~8QrsY?*-~vzVFQF%o>EQd zT;W|lB%cd)&YW>_j%>to$$ipPyW0kScXPY_oLj}eWO7t%$0FpMYpY(Bq5v5@LBesG zF9qq0wueMY4QsvjPa{bS*J6K%0lR+(Sgn|svAD&5#ovJ3hdC?;WFYBjwP;PtWS(T9b_ ziXel$HhL=1K&(K*M!=@r#FyHs&FMJtjx4;7+`{r7ElR{6YtXEM6Hph|7|}I+q8Jum zPUYkpo~zycp~kgzXud8(#a4aRuAUD;rq3#>AE>$bF=E6St!oBNY@O@gBdZKaOcsbn ziafet?i4|qLt%YSwcGIHU^WNZ;fU3PH;$6+5#J-2ABqB_(#bGufs$zawkvxgdcZwz zz2yeJiJAVs#+_}??QOTadE~C&Yj>`I6W10(Up5_+&eAqUpd3TOt_zLv#(S^5VE2%7N>bP&w2AhZqHxKtn0ZhjB-4wjTX5dl4ih`l&ey;T zVEUqyR?S{x{z6r=*l*8i^9kS9jk+JV*-IUV=tMmU-g1v1u70~T$hmIBsu&T;B-|?9 zN^)hu=ZafMK5_ZRTrp$~HRbtXrLu1lTf>-dZW>W;3W$3&Z0+&ZWm1EUh+Tz^(4`wyYO;O%|q{E zIX3(B+#J45k|>!ZAxrr~d|eGZd?3O9zBJ(;$6v4Nh@W5ozC!(8f7UJF&uRdmI^yRZ z{Hy--PVjs2S&8{P=97aV?T;4Fc&BF3q}DDKq%cN71J z48M1CR@^uhyLj$j-TY6bv!chTlEI168A0Ute$F}!PgRIX{@KspN8|4jXYJOf5@^yt aC4S$#bv1}^?EwI!xVJUVL@|n=zy1UKx2-Dx literal 0 HcmV?d00001 diff --git a/tests/outputs/zip/single-file.zip b/tests/outputs/zip/single-file.zip new file mode 100644 index 0000000000000000000000000000000000000000..99df91dc40c1480a04a523c191030da9449e9648 GIT binary patch literal 2303 zcmZ{lc|4T+9>*Vq)07e;21mM$HB**Ir81T*b1`J!$JizfGO|y$OIb!}NMjpob`!&7 zDTlc^wxrV7M+zg75|fhSj_y5&^U8gGe>|_}_jvJ z8FQBP{BRT(A|p3q)A`9P(ky=#xn`}_{sLt(r1G%uAyuF~g`Tz+A3oN*KQT5UgUj*K zWZ(fr#W~F7Q8mKzIxwT^YoN|BBZHtvyRHP)r8C9t)Xe1=k;uPr*C!bxkdIbm^*Fc{ zIJgi0gxiIKdkeYApUCg9%^b@y{BdXq*(kZ%?M_fm6$4p&6lDZ@JNfL_z!@R^&n2P8c!tip z(`Z`u$y?nAyH3Io<=X=uXGRrQ<~&*%c>ZW!7-CCH&uhA;5i1&y-qE4?hp%72EwwIu z^*z^FO7UmGXy^r+NP9dzZ_OA#@=|O$HzHHsG&WQ#8j?W>B#j*tP?J?rvim$0MLDr= z4Clm9%dk^qGrDFINd23l+uR#D+!b2>DX@pbn}Z!VvMYBN{{W95j7RW^D>y&jq!eCY zTY$2kwxaJs@8v6aqJk$D*rMNQW0Q6eH1Dv)6@_S)+iZob3ByurP5*q&_v>{F9krH^mB-ea3K zJh>ge1jw&w`755; z31@^1PT&#fo~(indi2?BQ3QP&hj-3m&erL9dG{@V$i9#88(iC-(KwzdO3f1zIgXP- z03i53&-_tnAC#BJ53gp3PCjGG;P_2rm6e*5w&~&iMOWn@bq&5E7(DZ~7q)T6H~?Y) zZSwxHNGN84a$Qsc%$-nd!)j&CoeA|{&gGlA+OLzImZWySutN2?9CwfHr56zjE_%J6%8ao@O6w3R7$u%hrYa3#5*u`FHJDY?YVE}7X= z8nZY`Y_cIbDn4qcdpGxIpsckqybnN%Ur?Z>+0A{99E zZD5tWOgX|PXjVtRQ_*=UQ3Egh$XzW`_{?W*a3A4r!~6EDjMli5(h^JySQXpYxk0`S zKGl+VAUg#+Z^p!XH)&ul1QStN=?swb8+xT45A*!lg05S0npl|9D}q5rOs80S9wx?G z%CChP@hax|sDw20&F3(*ODhlMH11+!Y0#3`!Eut{NQ*c?4T{X}#(aOHB{L`o&mkUF zskiwSDxQ4=BRK0JR4e**!jyPIHNtxt{JX`t%5&PB;)=^`nBT_t>~Q8o){}z!n@Q~Z zk!K@2!{nwi=@SmK_;YO6==FErtGWqnb)JtR;Yqd1&HlefeQ*KBwhxKm;s*2l|EbLRd4F8g z=&jq&GkRzJZe!-Z)c_C`?Z?Tmzt{gcbara&7Ma@`EKX1Ttns%1-6^(Pd~S=$^ZX?C mSD5Zp*{z|sRmM5tzICLX?Ttk6@@>7r%UM>O&nEJ3J^cq(uiiNT literal 0 HcmV?d00001 diff --git a/tests/test_flow.py b/tests/test_flow.py new file mode 100644 index 0000000..5fa2353 --- /dev/null +++ b/tests/test_flow.py @@ -0,0 +1,347 @@ +import json +import os +import requests +import subprocess +import time +import unittest + +import boto3 +from elasticsearch import Elasticsearch, NotFoundError + +ES_SERVER = os.environ['DPP_ELASTICSEARCH'] = 'http://localhost:9200' +S3_SERVER = os.environ['S3_ENDPOINT_URL'] = 'http://localhost:5000/' +os.environ['PKGSTORE_BUCKET'] = 'testing.datahub.io' +os.environ['AWS_ACCESS_KEY_ID'] = 'foo' +os.environ['AWS_SECRET_ACCESS_KEY'] = 'bar' + +def run_factory(dir='.'): + os.chdir(dir) + subprocess.call(['dpp', 'run', 'dirty']) + os.remove('.dpp.db') + +class TestFlow(unittest.TestCase): + + @classmethod + def setup_class(self): + es = Elasticsearch(hosts=[ES_SERVER]) + es.indices.delete(index='datahub', ignore=[400, 404]) + es.indices.delete(index='events', ignore=[400, 404]) + s3 = boto3.resource( + service_name='s3', + endpoint_url=S3_SERVER, + ) + self.bucket_name = os.environ['PKGSTORE_BUCKET'] + self.bucket = s3.Bucket(self.bucket_name) + + + def test_single_file(self): + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/single_file')) + + res = requests.get( + '{}{}/datahub/single-file/latest/datapackage.json'.format(S3_SERVER, self.bucket_name)) + + res = requests.get( + '{}{}/datahub/single-file:birthdays/data/birthdays.csv'.format(S3_SERVER, self.bucket_name)) + exp_csv = open('../../outputs/csv/sample_birthdays.csv').read() + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv, res.text) + + res = requests.get( + '{}{}/datahub/single-file:birthdays_csv/data/birthdays_csv.csv'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv.replace('\n', '\r\n'), res.text) + + + res = requests.get( + '{}{}/datahub/single-file:birthdays_json/data/birthdays_json.json'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + exp_json = json.load(open('../../outputs/json/sample_birthdays.json')) + self.assertListEqual(exp_json, res.json()) + + res = requests.get( + '{}{}/datahub/single-file:single-file_zip/data/single-file.zip'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + # TODO: compare zip files + + # Elasticsearch + res = requests.get('http://localhost:9200/datahub/_search') + self.assertEqual(res.status_code, 200) + + meta = res.json() + hits = [hit['_source'] for hit in meta['hits']['hits'] + if hit['_source']['datapackage']['name'] == 'single-file'] + + self.assertEqual(len(hits), 1) + + datahub = hits[0]['datahub'] + datapackage = hits[0]['datapackage'] + self.assertEqual(datahub['findability'],'published') + self.assertEqual(datahub['owner'],'datahub') + self.assertEqual(datahub['stats']['rowcount'], 20) + self.assertEqual(len(datapackage['resources']), 4) + + res = requests.get('http://localhost:9200/events/_search') + self.assertEqual(res.status_code, 200) + + events = res.json() + hits = [hit['_source'] for hit in events['hits']['hits'] + if hit['_source']['dataset'] == 'single-file'] + self.assertEqual(len(hits), 1) + + event = hits[0] + self.assertEqual(event['dataset'],'single-file') + self.assertEqual(event['event_action'],'finished') + self.assertEqual(event['event_entity'], 'flow') + self.assertEqual(event['owner'], 'datahub') + self.assertEqual(event['status'], 'OK') + + def test_multiple_file(self): + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/multiple_files')) + + res = requests.get( + '{}{}/datahub/multiple-files/latest/datapackage.json'.format(S3_SERVER, self.bucket_name)) + + res = requests.get( + '{}{}/datahub/multiple-files:birthdays/data/birthdays.csv'.format(S3_SERVER, self.bucket_name)) + exp_csv = open('../../outputs/csv/sample_birthdays.csv').read() + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv, res.text) + + res = requests.get( + '{}{}/datahub/multiple-files:birthdays_csv/data/birthdays_csv.csv'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv.replace('\n', '\r\n'), res.text) + + res = requests.get( + '{}{}/datahub/multiple-files:birthdays_json/data/birthdays_json.json'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + exp_json = json.load(open('../../outputs/json/sample_birthdays.json')) + self.assertListEqual(exp_json, res.json()) + + res = requests.get( + '{}{}/datahub/multiple-files:emails/data/emails.csv'.format(S3_SERVER, self.bucket_name)) + exp_csv = open('../../outputs/csv/sample_emails.csv').read() + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv, res.text) + + res = requests.get( + '{}{}/datahub/multiple-files:emails_csv/data/emails_csv.csv'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv.replace('\n', '\r\n'), res.text) + + res = requests.get( + '{}{}/datahub/multiple-files:emails_json/data/emails_json.json'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + exp_json = json.load(open('../../outputs/json/sample_emails.json')) + self.assertListEqual(exp_json, res.json()) + + res = requests.get( + '{}{}/datahub/multiple-files:multiple-files_zip/data/multiple-files.zip'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + + # Elasticsearch + res = requests.get('http://localhost:9200/datahub/_search') + self.assertEqual(res.status_code, 200) + + meta = res.json() + hits = [hit['_source'] for hit in meta['hits']['hits'] + if hit['_source']['datapackage']['name'] == 'multiple-files'] + self.assertEqual(len(hits), 1) + + datahub = hits[0]['datahub'] + datapackage = hits[0]['datapackage'] + self.assertEqual(datahub['findability'],'published') + self.assertEqual(datahub['owner'],'datahub') + self.assertEqual(datahub['stats']['rowcount'], 40) + self.assertEqual(len(datapackage['resources']), 7) + + res = requests.get('http://localhost:9200/events/_search') + self.assertEqual(res.status_code, 200) + + events = res.json() + hits = [hit['_source'] for hit in events['hits']['hits'] + if hit['_source']['dataset'] == 'multiple-files'] + self.assertEqual(len(hits), 1) + + event = hits[0] + self.assertEqual(event['event_action'],'finished') + self.assertEqual(event['event_entity'], 'flow') + self.assertEqual(event['owner'], 'datahub') + self.assertEqual(event['status'], 'OK') + + def test_excel_file(self): + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/excel')) + + res = requests.get( + '{}{}/datahub/excel/latest/datapackage.json'.format(S3_SERVER, self.bucket_name)) + + res = requests.get( + '{}{}/datahub/excel:birthdays/data/birthdays.xlsx'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + + res = requests.get( + '{}{}/datahub/excel:birthdays_csv/data/birthdays_csv.csv'.format(S3_SERVER, self.bucket_name)) + exp_csv = open('../../outputs/csv/sample_birthdays.csv').read() + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv.replace('\n', '\r\n'), res.text) + + res = requests.get( + '{}{}/datahub/excel:birthdays_json/data/birthdays_json.json'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + exp_json = json.load(open('../../outputs/json/sample_birthdays.json')) + self.assertListEqual(exp_json, res.json()) + + res = requests.get( + '{}{}/datahub/excel:excel_zip/data/excel.zip'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + + # Elasticsearch + res = requests.get('http://localhost:9200/datahub/_search') + self.assertEqual(res.status_code, 200) + + meta = res.json() + hits = [hit['_source'] for hit in meta['hits']['hits'] + if hit['_source']['datapackage']['name'] == 'excel'] + self.assertEqual(len(hits), 1) + + datahub = hits[0]['datahub'] + datapackage = hits[0]['datapackage'] + self.assertEqual(datahub['findability'],'published') + self.assertEqual(datahub['owner'],'datahub') + self.assertEqual(datahub['stats']['rowcount'], 20) + self.assertEqual(len(datapackage['resources']), 4) + + res = requests.get('http://localhost:9200/events/_search') + self.assertEqual(res.status_code, 200) + + events = res.json() + hits = [hit['_source'] for hit in events['hits']['hits'] + if hit['_source']['dataset'] == 'excel'] + self.assertEqual(len(hits), 1) + + event = hits[0] + self.assertEqual(event['event_action'],'finished') + self.assertEqual(event['event_entity'], 'flow') + self.assertEqual(event['owner'], 'datahub') + self.assertEqual(event['status'], 'OK') + + def test_needs_processing(self): + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/needs_processing')) + + res = requests.get( + '{}{}/datahub/single-file-processed/latest/datapackage.json'.format(S3_SERVER, self.bucket_name)) + + res = requests.get( + '{}{}/datahub/single-file-processed:birthdays/data/birthdays.csv'.format(S3_SERVER, self.bucket_name)) + exp_csv = open('../../outputs/csv/sample_birthdays_invalid.csv').read() + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv, res.text) + + res = requests.get( + '{}{}/datahub/single-file-processed:birthdays_csv/data/birthdays_csv.csv'.format(S3_SERVER, self.bucket_name)) + exp_csv = open('../../outputs/csv/sample_birthdays.csv').read() + self.assertEqual(res.status_code, 200) + self.assertEqual(exp_csv.replace('\n', '\r\n'), res.text) + + + res = requests.get( + '{}{}/datahub/single-file-processed:birthdays_json/data/birthdays_json.json'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + exp_json = json.load(open('../../outputs/json/sample_birthdays.json')) + self.assertListEqual(exp_json, res.json()) + + res = requests.get( + '{}{}/datahub/single-file-processed:single-file-processed_zip/data/single-file-processed.zip'.format(S3_SERVER, self.bucket_name)) + self.assertEqual(res.status_code, 200) + + # Elasticsearch + res = requests.get('http://localhost:9200/datahub/_search') + self.assertEqual(res.status_code, 200) + + meta = res.json() + hits = [hit['_source'] for hit in meta['hits']['hits'] + if hit['_source']['datapackage']['name'] == 'single-file-processed'] + self.assertEqual(len(hits), 1) + + datahub = hits[0]['datahub'] + datapackage = hits[0]['datapackage'] + self.assertEqual(datahub['findability'],'published') + self.assertEqual(datahub['owner'],'datahub') + self.assertEqual(datahub['stats']['rowcount'], 20) + self.assertEqual(len(datapackage['resources']), 4) + + res = requests.get('http://localhost:9200/events/_search') + self.assertEqual(res.status_code, 200) + + events = res.json() + hits = [hit['_source'] for hit in events['hits']['hits'] + if hit['_source']['dataset'] == 'single-file-processed'] + self.assertEqual(len(hits), 1) + + event = hits[0] + self.assertEqual(event['event_action'],'finished') + self.assertEqual(event['event_entity'], 'flow') + self.assertEqual(event['owner'], 'datahub') + self.assertEqual(event['status'], 'OK') + + def test_elasticsearch_saves_multiple_datasets_and_events(self): + # Make sure ES is empty + es = Elasticsearch(hosts=[ES_SERVER]) + es.indices.delete(index='datahub', ignore=[400, 404]) + es.indices.delete(index='events', ignore=[400, 404]) + + # Run flow + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/single_file')) + res = requests.get('http://localhost:9200/datahub/_search') + meta = res.json() + res = requests.get('http://localhost:9200/events/_search') + events = res.json() + self.assertEqual(meta['hits']['total'], 1) + self.assertEqual(events['hits']['total'], 1) + + # Second flow + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/multiple_files')) + res = requests.get('http://localhost:9200/datahub/_search') + meta = res.json() + res = requests.get('http://localhost:9200/events/_search') + events = res.json() + self.assertEqual(meta['hits']['total'], 2) + self.assertEqual(events['hits']['total'], 2) + + # Third flows + run_factory(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'inputs/excel')) + res = requests.get('http://localhost:9200/datahub/_search') + meta = res.json() + res = requests.get('http://localhost:9200/events/_search') + events = res.json() + self.assertEqual(meta['hits']['total'], 3) + self.assertEqual(events['hits']['total'], 3) + + # Clear again to not mess up with other tests + es.indices.delete(index='datahub', ignore=[400, 404]) + es.indices.delete(index='events', ignore=[400, 404]) + + + ## TODO run flow, update metadata, run again + # def test_quick_succession_local(self): + # start_time = time.time() + # run_factory(os.path.join(os.path.dirname( + # os.path.realpath(__file__)), 'inputs/local/needs_processing')) + # time_elapsed_first_run = time.time() - start_time + # start_time = time.time() + # run_factory(os.path.join(os.path.dirname( + # os.path.realpath(__file__)), 'inputs/local/needs_processing')) + # elapsed_time_second_run = time.time() - start_time + # self.assertTrue(time_elapsed_first_run > elapsed_time_second_run) + @classmethod + def teardown_class(self): + for obj in self.bucket.objects.all(): + obj.delete() + self.bucket.delete() diff --git a/tests/test_main.py b/tests/test_processors.py similarity index 100% rename from tests/test_main.py rename to tests/test_processors.py diff --git a/tox.ini b/tox.ini index 986da50..2d05fec 100755 --- a/tox.ini +++ b/tox.ini @@ -6,11 +6,13 @@ envlist= [testenv] deps= + google-compute-engine mock - requests-mock pytest + elasticsearch pytest-cov coverage + boto3 -rtest_requirements.txt passenv= CI