Permalink
Browse files

Add tests and file comparisons

  • Loading branch information...
m3nu committed May 20, 2018
1 parent 086a1e0 commit d89c4dedd7726d4fa9bee9d2459bf4fbd6721815
@@ -22,8 +22,6 @@ def to_text(path):
import shutil
from distutils import spawn #py2 compat
if spawn.find_executable("pdftotext"): #shutil.which('pdftotext'):
out, err = subprocess.Popen(
["pdftotext", '-layout', '-enc', 'UTF-8', path, '-'],
@@ -0,0 +1,10 @@
import os
import pkg_resources
def get_sample_files(extension):
compare_files = []
for path, subdirs, files in os.walk(pkg_resources.resource_filename(__name__, 'compare')):
for file in files:
if file.endswith(extension):
compare_files.append(os.path.join(path, file))
return compare_files
@@ -0,0 +1,31 @@
[
{
"amount": 4.11,
"amount_untaxed": 4.11,
"currency": "USD",
"date": "03/08/2014",
"desc": "Invoice 42183017 from Amazon Web Services",
"invoice_number": "42183017",
"issuer": "Amazon Web Services",
"lines": [
{
"description": "AWS Data Transfer",
"price_unit": "0.01"
},
{
"description": "Amazon Elastic Compute Cloud",
"price_unit": "1.87"
},
{
"description": "Amazon Glacier",
"price_unit": "2.22"
},
{
"description": "Amazon Simple Storage Service",
"price_unit": "0.01"
}
],
"partner_name": "Amazon Web Services, Inc.",
"partner_website": "aws.amazon.com"
}
]
Binary file not shown.
@@ -0,0 +1,11 @@
[
{
"amount": 319.0,
"currency": "INR",
"date": "20/10/2015",
"desc": "Invoice #BLR_WFLD20151000982590 from Flipkart",
"invoice_number": "#BLR_WFLD20151000982590",
"issuer": "Flipkart",
"order_id": "OD304175096047380001"
}
]
Binary file not shown.
@@ -0,0 +1,56 @@
[
{
"amount": 34.73,
"amount_untaxed": 34.73,
"currency": "EUR",
"date": "07/05/2014",
"desc": "Invoice 30064443 from QualityHosting AG",
"invoice_number": "30064443",
"issuer": "QualityHosting AG",
"lines": [
{
"desc": "Small Business StandardExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_strukan\n01.05.14-31.05.14",
"pos": "1",
"price": 3.89,
"qty": 1.0
},
{
"desc": "Small Business QualityExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_schneider\n01.05.14-31.05.14",
"pos": "2",
"price": 5.39,
"qty": 1.0
},
{
"desc": "Small Business QualityExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_minar\n01.05.14-31.05.14",
"pos": "3",
"price": 5.39,
"qty": 1.0
},
{
"desc": "Small Business QualityExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_mayr\n01.05.14-31.05.14",
"pos": "4",
"price": 5.39,
"qty": 1.0
},
{
"desc": "Small Business QualityExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_jenewein\n01.05.14-31.05.14",
"pos": "5",
"price": 5.39,
"qty": 1.0
},
{
"desc": "Small Business QualityExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_jauernik\n01.05.14-31.05.14\nQualityHosting AG - Uferweg 40-42 - D-63571 Gelnhausen\niViveLabs Ltd.\n93B Sai Yu Chung\nYuen Long, N.T.\nHong Kong\nPos. Menge Beschreibung Rabatt % VK-Preis Zeilenbetrag\nOhne Ohne MwSt.\nMwSt.",
"pos": "6",
"price": 5.39,
"qty": 1.0
},
{
"desc": "Small Business StandardExchange 2010\nGrundgeb\u00fchr pro Einheit\nDienst: OUDJQ_office\n01.05.14-31.05.14\n",
"pos": "7",
"price": 3.89,
"qty": 1.0
}
],
"vat": "DE 232 446 240"
}
]
@@ -0,0 +1,13 @@
[
{
"amount": 29.99,
"amount_untaxed": 24.99,
"currency": "EUR",
"date": "02/07/2015",
"date_due": "2015-07-05 00:00:00",
"desc": "Invoice 562044387 from Free",
"invoice_number": "562044387",
"issuer": "Free",
"vat": "FR60421938861"
}
]

This file was deleted.

Oops, something went wrong.

This file was deleted.

Oops, something went wrong.

This file was deleted.

Oops, something went wrong.
@@ -0,0 +1,10 @@
[
{
"amount": 1939.0,
"currency": "INR",
"date": "31/12/2017",
"desc": "Invoice IBZY2087 from OYO",
"invoice_number": "IBZY2087",
"issuer": "OYO"
}
]
File renamed without changes.
Binary file not shown.
@@ -1,6 +1,8 @@
import os
import glob
import filecmp
import json
import shutil
try:
from StringIO import StringIO
@@ -12,100 +14,109 @@
from invoice2data.main import *
from invoice2data.extract.loader import read_templates
from .common import *
class TestCLI(unittest.TestCase):
def setUp(self):
self.templates = read_templates()
self.parser = create_parser()
def _get_test_file_path(self):
out_files = [None]*4
for path, subdirs, files in os.walk(pkg_resources.resource_filename(__name__, 'pdfs')):
for file in files:
if file == "2014-05-07 Invoice 30064443 from QualityHosting.pdf":
out_files[0] = os.path.join(path, file)
elif file == "2014-08-03 SALES Amazon Web Services aws.amazon.coUS.pdf":
out_files[1] = os.path.join(path, file)
elif file == "2015-01-29 PAYPAL ENVATO MKPL EN 4029357733 AU.pdf":
out_files[2] = os.path.join(path, file)
elif file == "2015-07-02-invoice_free_fiber.pdf":
out_files[3] = os.path.join(path, file)
return out_files
def _get_test_file_content():
pass
def compare_json_content(self, test_file, json_file):
with open(test_file) as json_test_file, open(json_file) as json_json_file:
jdatatest = json.load(json_test_file)
jdatajson = json.load(json_json_file)
# logger.info(jdatajson)
# logger.info(jdatatest)
if jdatajson == jdatatest:
logger.info("True")
return True
else:
logger.info("False")
return False
def test_input(self):
args = self.parser.parse_args(['--input-reader', 'pdftotext'] + self._get_test_file_path())
args = self.parser.parse_args(['--input-reader', 'pdftotext'] + get_sample_files('.pdf'))
main(args)
def test_output_name(self):
test_file = 'inv_test_8asd89f78a9df.csv'
args = self.parser.parse_args(['--output-name', test_file, '--output-format', 'csv'] + self._get_test_file_path())
args = self.parser.parse_args(['--output-name', test_file, '--output-format', 'csv']
+ get_sample_files('.pdf'))
main(args)
self.assertTrue(os.path.exists(test_file))
os.remove(test_file)
def test_debug(self):
args = self.parser.parse_args(['--debug'] + self._get_test_file_path())
args = self.parser.parse_args(['--debug'] + get_sample_files('.pdf'))
main(args)
# TODO: move result comparison to own test module.
# TODO: parse output files instaed of comparing them byte-by-byte.
def test_content_csv(self):
for path, subdirs, files in os.walk(pkg_resources.resource_filename(__name__, 'compare')):
for file in files:
if file.endswith(".csv"):
cmp_file = os.path.join(path, file)
test_files = 'inv_test.csv'
args = self.parser.parse_args(['--output-name', test_files, '--output-format', 'csv'] + self._get_test_file_path())
def test_content_json(self):
pdf_files = get_sample_files('.pdf')
json_files = get_sample_files('.json')
test_files = 'test_compare.json'
for pfile in pdf_files:
for jfile in json_files:
if pfile[:-4] == jfile[:-5]:
args = self.parser.parse_args(
['--output-name', test_files, '--output-format', 'json', pfile])
main(args)
compare_verified = self.compare_json_content(test_files, jfile)
print(compare_verified)
if not compare_verified:
self.assertTrue(False)
os.remove(test_files)
self.assertTrue(True)
def test_copy(self):
# folder = pkg_resources.resource_filename(__name__, 'pdfs')
directory = os.path.dirname("invoice2data/test/copy_test/pdf/")
os.makedirs(directory)
args = self.parser.parse_args(['--copy', 'invoice2data/test/copy_test/pdf'] + get_sample_files('.pdf'))
main(args)
# self.assertTrue(filecmp.cmp(test_files, cmp_file, shallow=False))
os.remove(test_files)
def test_content_xml(self):
for path, subdirs, files in os.walk(pkg_resources.resource_filename(__name__, 'compare')):
i = 0
for path, subdirs, files in os.walk(pkg_resources.resource_filename(__name__, 'copy_test/pdf')):
for file in files:
if file.endswith(".xml"):
cmp_file = os.path.join(path, file)
if file.endswith(".pdf"):
i += 1
shutil.rmtree('invoice2data/test/copy_test/', ignore_errors=True)
self.assertEqual(i, len(get_sample_files('.json')))
'''
if i != len(self._get_test_file_json_path()):
print(i)
self.assertTrue(True)
else:
print(i)
self.assertTrue(False, "Number of files not equal")
'''
test_files = 'inv_test.xml'
args = self.parser.parse_args(['--output-name', test_files, '--output-format', 'xml'] + self._get_test_file_path())
main(args)
# self.assertTrue(filecmp.cmp(test_files, cmp_file, shallow=False))
os.remove(test_files)
# def test_template(self):
# directory = os.path.dirname("invoice2data/test/temp_test/")
# os.makedirs(directory)
# args = self.parser.parse_args(['--template-folder', 'ACME-templates', self._get_test_file_path()])
# main(args)
# shutil.rmtree('invoice2data/test/temp_test/', ignore_errors=True)
# self.assertTrue(args.template_folder)
def test_content_json(self):
def test_exclude_template(self):
for path, subdirs, files in os.walk(pkg_resources.resource_filename(__name__, 'compare')):
for file in files:
if file.endswith(".json"):
cmp_file = os.path.join(path, file)
test_files = 'inv_test.json'
args = self.parser.parse_args(['--output-name', test_files, '--output-format', 'json'] + self._get_test_file_path())
if file.endswith("oyo.pdf"):
my_file = os.path.join(path, file)
directory = os.path.dirname("invoice2data/test/temp_test/")
os.makedirs(directory)
shutil.copy('invoice2data/extract/templates/com/com.oyo.invoice.yml', 'invoice2data/test/temp_test/')
args = self.parser.parse_args(['--exclude-built-in-templates',
'--template-folder',
directory,
my_file])
main(args)
# self.assertTrue(filecmp.cmp(test_files, cmp_file, shallow=False))
os.remove(test_files)
# def test_copy(self):
# parser = create_parser()
# folder = pkg_resources.resource_filename(__name__, 'pdfs')
# args = parser.parse_args(['--copy', '/invoice2data/test/', self._get_test_file_path()])
# self.assertTrue(args.copy)
# def test_template(self):
# parser = create_parser()
# folder = pkg_resources.resource_filename(__name__, 'pdfs')
# args = parser.parse_args(['--template-folder', 'ACME-templates', self._get_test_file_path()])
# self.assertTrue(args.template_folder)
shutil.rmtree('invoice2data/test/temp_test/')
# def test_exclude_template(self):
# parser = create_parser()
# folder = pkg_resources.resource_filename(__name__, 'pdfs')
# args = parser.parse_args(['--exclude-built-in-templates', '--template-folder', 'ACME-templates', self._get_test_file_path()])
# self.assertTrue(args.exclude_built_in_templates)
if __name__ == '__main__':
unittest.main()
unittest.main()
Oops, something went wrong.

0 comments on commit d89c4de

Please sign in to comment.