From 87fd483d9ff304c28698b86de39e9c4082997f47 Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Tue, 5 Mar 2024 22:36:23 +0400 Subject: [PATCH 1/6] Use write_entity instead of deprecated write_object --- ftmstore/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ftmstore/cli.py b/ftmstore/cli.py index 769122f..235df71 100644 --- a/ftmstore/cli.py +++ b/ftmstore/cli.py @@ -28,11 +28,11 @@ def write_stream(dataset, file, origin=NULL_ORIGIN): def iterate_stream(dataset, file, entity_id=None): - from followthemoney.cli.util import write_object + from followthemoney.cli.util import write_entity for entity in dataset.iterate(entity_id=entity_id): log.debug("[%s]: %s", entity.id, entity.caption) - write_object(file, entity) + write_entity(file, entity) @click.group(help="Store FollowTheMoney object data") From fd9fcd69b7bc9e7ce354f45e6f69e46a735dbffc Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Tue, 5 Mar 2024 23:07:44 +0400 Subject: [PATCH 2/6] Maybe fix postgres CI test setup --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e4fcc25..18713b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,7 +10,7 @@ jobs: image: postgres:10.8 env: POSTGRES_USER: postgres - POSTGRES_PASSWORD: "" + POSTGRES_PASSWORD: postgres POSTGRES_DB: postgres ports: - 5432:5432 From f6e67c624140cb535b2b7e0dca444935fd62d844 Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Wed, 6 Mar 2024 13:20:06 +0400 Subject: [PATCH 3/6] chore: fix build by using docker-compose postgres --- .github/workflows/build.yml | 22 +--------------------- Makefile | 5 ++++- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 18713b7..9cb710b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,33 +5,13 @@ on: [push] jobs: python: runs-on: ubuntu-latest - services: - postgres: - image: postgres:10.8 - env: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: postgres - ports: - - 5432:5432 steps: - uses: actions/checkout@v1 - name: Show ref run: | echo "$GITHUB_REF" - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: "3.x" - - name: Install dependencies - env: - DEBIAN_FRONTEND: noninteractive - run: | - sudo apt-get install libleveldb-dev - pip install coverage wheel pytest - pip install -e ".[postgresql]" - name: Run the tests - run: make test + run: make build db test - name: Build a distribution run: | python setup.py sdist bdist_wheel diff --git a/Makefile b/Makefile index 4b16101..8bd473a 100644 --- a/Makefile +++ b/Makefile @@ -8,4 +8,7 @@ test: docker-compose run --rm ftmstore pytest -s tests stop: - docker-compose down --remove-orphans \ No newline at end of file + docker-compose down --remove-orphans + +dist: + docker-compose run --rm ftmstore python3 setup.py sdist bdist_wheel From 4530b65726ddecb9b3334ca22d354cdc9ad6409a Mon Sep 17 00:00:00 2001 From: Alex Stefanescu Date: Tue, 26 Mar 2024 15:08:04 +0100 Subject: [PATCH 4/6] Complete fix for #33 --- ftmstore/cli.py | 27 +++------------------------ ftmstore/utils.py | 27 +++++++++++++++++++++++++++ tests/fixtures/entities | 5 +++++ tests/test_io.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 24 deletions(-) create mode 100644 tests/fixtures/entities create mode 100644 tests/test_io.py diff --git a/ftmstore/cli.py b/ftmstore/cli.py index 235df71..76a2845 100644 --- a/ftmstore/cli.py +++ b/ftmstore/cli.py @@ -9,32 +9,11 @@ from ftmstore import get_dataset from ftmstore.settings import DATABASE_URI from ftmstore.store import Store -from ftmstore.utils import NULL_ORIGIN +from ftmstore.utils import NULL_ORIGIN, write_stream, iterate_stream log = logging.getLogger("ftmstore") -def write_stream(dataset, file, origin=NULL_ORIGIN): - bulk = dataset.bulk() - for idx in count(1): - line = file.readline() - if not line: - break - entity = json.loads(line) - bulk.put(entity, fragment=str(idx), origin=origin) - if idx % 10000 == 0: - log.info("Write [%s]: %s entities", dataset.name, idx) - bulk.flush() - - -def iterate_stream(dataset, file, entity_id=None): - from followthemoney.cli.util import write_entity - - for entity in dataset.iterate(entity_id=entity_id): - log.debug("[%s]: %s", entity.id, entity.caption) - write_entity(file, entity) - - @click.group(help="Store FollowTheMoney object data") @click.option("-v", "--verbose", default=False, is_flag=True) def cli(verbose): @@ -59,7 +38,7 @@ def write(db, dataset, infile, origin): @cli.command("iterate", help="Iterate entities") @click.option("--db", metavar="URI", default=DATABASE_URI, show_default=True) @click.option("-d", "--dataset", required=True) -@click.option("-o", "--outfile", type=click.File("w"), default="-") +@click.option("-o", "--outfile", type=click.File("w+b"), default="-") def iterate(db, dataset, outfile): dataset = get_dataset(dataset, database_uri=db) try: @@ -70,7 +49,7 @@ def iterate(db, dataset, outfile): @cli.command("aggregate", help="Combination of write and iterate.") @click.option("-i", "--infile", type=click.File("r"), default="-") -@click.option("-o", "--outfile", type=click.File("w"), default="-") +@click.option("-o", "--outfile", type=click.File("w+b"), default="-") def aggregate(infile, outfile): dataset = get_dataset("aggregate_%s" % uuid4().hex) try: diff --git a/ftmstore/utils.py b/ftmstore/utils.py index d57fba0..75cd95b 100644 --- a/ftmstore/utils.py +++ b/ftmstore/utils.py @@ -1,8 +1,14 @@ +import json +import logging from hashlib import sha1 from normality import stringify +from itertools import count + NULL_ORIGIN = "null" +log = logging.getLogger("ftmstore") + class StoreException(Exception): pass @@ -14,3 +20,24 @@ def safe_fragment(fragment): if fragment is not None: fragment = fragment.encode("utf-8", errors="replace") return sha1(fragment).hexdigest() + + +def write_stream(dataset, file, origin=NULL_ORIGIN): + bulk = dataset.bulk() + for idx in count(1): + line = file.readline() + if not line: + break + entity = json.loads(line) + bulk.put(entity, fragment=str(idx), origin=origin) + if idx % 10000 == 0: + log.info("Write [%s]: %s entities", dataset.name, idx) + bulk.flush() + + +def iterate_stream(dataset, file, entity_id=None): + from followthemoney.cli.util import write_entity + + for entity in dataset.iterate(entity_id=entity_id): + log.debug("[%s]: %s", entity.id, entity.caption) + write_entity(file, entity) diff --git a/tests/fixtures/entities b/tests/fixtures/entities new file mode 100644 index 0000000..566844a --- /dev/null +++ b/tests/fixtures/entities @@ -0,0 +1,5 @@ +{"id":"637427924ad445058bd0f19d67add26a.b97aa090345d3f0725a2135954cba8d51a912e54","mutable":true,"role_id":4,"created_at":null,"updated_at":"2024-03-18T18:34:44.510358","origin":"model","schema":"Person","properties":{"name":["Carlos Danger"],"nationality":["us"]}} +{"id":"98c8d8d56eb241ccb0996a9e6f69007a.80b47167fc0bb2551b90583596a23a54d75608be","mutable":true,"role_id":4,"created_at":null,"updated_at":"2024-03-18T18:34:44.574715","origin":"model","schema":"LegalEntity","properties":{"name":["Carlos Danger"],"country":["gb"]}} +{"id":"caa695fa11c44488b064aa22971fccdc.4a41a3439e6d2ab55fba7f7fec1d8be53de21fa2","mutable":true,"role_id":null,"created_at":"2024-03-18T18:34:43.110728","updated_at":"2024-03-18T18:34:43.109976","origin":"model","schema":"Note","properties":{"entity":["fbc529fbcee44a588438cdb2f3ff9b3f.e4fae2145ec67c7ed22a2ab38932e7c004ed29f7"],"description":["note"]}} +{"id":"d8fa04818236482693955c2d74acb7e8.68311e6a7c472bd286c01bf578d3dfd04e8458bd","mutable":true,"role_id":4,"created_at":null,"updated_at":"2024-03-18T18:34:44.631827","origin":"model","schema":"Person","properties":{"name":["Pure Risk"],"nationality":["us"]}} +{"id":"fbc529fbcee44a588438cdb2f3ff9b3f.e4fae2145ec67c7ed22a2ab38932e7c004ed29f7","mutable":true,"role_id":null,"created_at":"2024-03-18T18:34:43.110727","updated_at":"2024-03-18T18:34:43.109784","origin":"model","schema":"Company","properties":{"name":["KwaZulu"],"alias":["kwazulu"]}} diff --git a/tests/test_io.py b/tests/test_io.py new file mode 100644 index 0000000..3a36ca1 --- /dev/null +++ b/tests/test_io.py @@ -0,0 +1,35 @@ +from pathlib import Path +import tempfile + +from ftmstore import init +from ftmstore.utils import write_stream, iterate_stream + + +def test_input_output(): + uri = "sqlite://" + dataset = init("IO-TEST", database_uri=uri) + assert dataset.name == "IO-TEST" + assert len(dataset.store) == 0 + + input_file = Path("./tests/fixtures/entities") + + with open(input_file, "r") as f: + data = f.readlines() + + number_of_entities = len(data) + + # test writing FTM entities to FTM Store + write_stream(dataset, open(input_file, "r")) + assert len(dataset) == number_of_entities + + # test reading FTM entities from FTM Store + output_file = Path("./tests/fixtures/temp_output") + + fh = open(output_file, "w+b") + iterate_stream(dataset, fh) + fh.close() + + with open(output_file, "r") as f: + assert len(f.readlines()) == number_of_entities + + output_file.unlink() From a994d7969818141b2de0090d46f1d9026a6372f9 Mon Sep 17 00:00:00 2001 From: Alex Stefanescu Date: Tue, 26 Mar 2024 15:12:38 +0100 Subject: [PATCH 5/6] Clean up Store at the end of test --- tests/test_sqlite.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py index 95f78d7..103285f 100644 --- a/tests/test_sqlite.py +++ b/tests/test_sqlite.py @@ -29,4 +29,5 @@ def test_sqlite(): assert len(list(dataset.iterate(entity_id="key3"))) == 1 assert len(dataset.store) == 1 + dataset.drop() dataset.store.close() From 8502fba1e041bf943feffa03d0fa722e1f10df0a Mon Sep 17 00:00:00 2001 From: Alex Stefanescu Date: Tue, 26 Mar 2024 15:17:12 +0100 Subject: [PATCH 6/6] Clean up Store at the end of (another) test --- tests/test_io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_io.py b/tests/test_io.py index 3a36ca1..090067c 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -33,3 +33,6 @@ def test_input_output(): assert len(f.readlines()) == number_of_entities output_file.unlink() + + dataset.drop() + dataset.store.close()