Skip to content

Commit

Permalink
Merge pull request #34 from alephdata/feature/write-object-deprecated
Browse files Browse the repository at this point in the history
Use write_entity instead of deprecated write_object
  • Loading branch information
stchris committed Mar 28, 2024
2 parents e1c5718 + 8502fba commit 09b6386
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 46 deletions.
22 changes: 1 addition & 21 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,13 @@ on: [push]
jobs:
python:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:10.8
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: ""
POSTGRES_DB: postgres
ports:
- 5432:5432
steps:
- uses: actions/checkout@v1
- name: Show ref
run: |
echo "$GITHUB_REF"
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: "3.x"
- name: Install dependencies
env:
DEBIAN_FRONTEND: noninteractive
run: |
sudo apt-get install libleveldb-dev
pip install coverage wheel pytest
pip install -e ".[postgresql]"
- name: Run the tests
run: make test
run: make build db test
- name: Build a distribution
run: |
python setup.py sdist bdist_wheel
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ test:
docker-compose run --rm ftmstore pytest -s tests

stop:
docker-compose down --remove-orphans
docker-compose down --remove-orphans

dist:
docker-compose run --rm ftmstore python3 setup.py sdist bdist_wheel
27 changes: 3 additions & 24 deletions ftmstore/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,11 @@
from ftmstore import get_dataset
from ftmstore.settings import DATABASE_URI
from ftmstore.store import Store
from ftmstore.utils import NULL_ORIGIN
from ftmstore.utils import NULL_ORIGIN, write_stream, iterate_stream

log = logging.getLogger("ftmstore")


def write_stream(dataset, file, origin=NULL_ORIGIN):
bulk = dataset.bulk()
for idx in count(1):
line = file.readline()
if not line:
break
entity = json.loads(line)
bulk.put(entity, fragment=str(idx), origin=origin)
if idx % 10000 == 0:
log.info("Write [%s]: %s entities", dataset.name, idx)
bulk.flush()


def iterate_stream(dataset, file, entity_id=None):
from followthemoney.cli.util import write_object

for entity in dataset.iterate(entity_id=entity_id):
log.debug("[%s]: %s", entity.id, entity.caption)
write_object(file, entity)


@click.group(help="Store FollowTheMoney object data")
@click.option("-v", "--verbose", default=False, is_flag=True)
def cli(verbose):
Expand All @@ -59,7 +38,7 @@ def write(db, dataset, infile, origin):
@cli.command("iterate", help="Iterate entities")
@click.option("--db", metavar="URI", default=DATABASE_URI, show_default=True)
@click.option("-d", "--dataset", required=True)
@click.option("-o", "--outfile", type=click.File("w"), default="-")
@click.option("-o", "--outfile", type=click.File("w+b"), default="-")
def iterate(db, dataset, outfile):
dataset = get_dataset(dataset, database_uri=db)
try:
Expand All @@ -70,7 +49,7 @@ def iterate(db, dataset, outfile):

@cli.command("aggregate", help="Combination of write and iterate.")
@click.option("-i", "--infile", type=click.File("r"), default="-")
@click.option("-o", "--outfile", type=click.File("w"), default="-")
@click.option("-o", "--outfile", type=click.File("w+b"), default="-")
def aggregate(infile, outfile):
dataset = get_dataset("aggregate_%s" % uuid4().hex)
try:
Expand Down
27 changes: 27 additions & 0 deletions ftmstore/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import json
import logging
from hashlib import sha1
from normality import stringify
from itertools import count


NULL_ORIGIN = "null"

log = logging.getLogger("ftmstore")


class StoreException(Exception):
pass
Expand All @@ -14,3 +20,24 @@ def safe_fragment(fragment):
if fragment is not None:
fragment = fragment.encode("utf-8", errors="replace")
return sha1(fragment).hexdigest()


def write_stream(dataset, file, origin=NULL_ORIGIN):
bulk = dataset.bulk()
for idx in count(1):
line = file.readline()
if not line:
break
entity = json.loads(line)
bulk.put(entity, fragment=str(idx), origin=origin)
if idx % 10000 == 0:
log.info("Write [%s]: %s entities", dataset.name, idx)
bulk.flush()


def iterate_stream(dataset, file, entity_id=None):
from followthemoney.cli.util import write_entity

for entity in dataset.iterate(entity_id=entity_id):
log.debug("[%s]: %s", entity.id, entity.caption)
write_entity(file, entity)
5 changes: 5 additions & 0 deletions tests/fixtures/entities
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"id":"637427924ad445058bd0f19d67add26a.b97aa090345d3f0725a2135954cba8d51a912e54","mutable":true,"role_id":4,"created_at":null,"updated_at":"2024-03-18T18:34:44.510358","origin":"model","schema":"Person","properties":{"name":["Carlos Danger"],"nationality":["us"]}}
{"id":"98c8d8d56eb241ccb0996a9e6f69007a.80b47167fc0bb2551b90583596a23a54d75608be","mutable":true,"role_id":4,"created_at":null,"updated_at":"2024-03-18T18:34:44.574715","origin":"model","schema":"LegalEntity","properties":{"name":["Carlos Danger"],"country":["gb"]}}
{"id":"caa695fa11c44488b064aa22971fccdc.4a41a3439e6d2ab55fba7f7fec1d8be53de21fa2","mutable":true,"role_id":null,"created_at":"2024-03-18T18:34:43.110728","updated_at":"2024-03-18T18:34:43.109976","origin":"model","schema":"Note","properties":{"entity":["fbc529fbcee44a588438cdb2f3ff9b3f.e4fae2145ec67c7ed22a2ab38932e7c004ed29f7"],"description":["note"]}}
{"id":"d8fa04818236482693955c2d74acb7e8.68311e6a7c472bd286c01bf578d3dfd04e8458bd","mutable":true,"role_id":4,"created_at":null,"updated_at":"2024-03-18T18:34:44.631827","origin":"model","schema":"Person","properties":{"name":["Pure Risk"],"nationality":["us"]}}
{"id":"fbc529fbcee44a588438cdb2f3ff9b3f.e4fae2145ec67c7ed22a2ab38932e7c004ed29f7","mutable":true,"role_id":null,"created_at":"2024-03-18T18:34:43.110727","updated_at":"2024-03-18T18:34:43.109784","origin":"model","schema":"Company","properties":{"name":["KwaZulu"],"alias":["kwazulu"]}}
38 changes: 38 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path
import tempfile

from ftmstore import init
from ftmstore.utils import write_stream, iterate_stream


def test_input_output():
uri = "sqlite://"
dataset = init("IO-TEST", database_uri=uri)
assert dataset.name == "IO-TEST"
assert len(dataset.store) == 0

input_file = Path("./tests/fixtures/entities")

with open(input_file, "r") as f:
data = f.readlines()

number_of_entities = len(data)

# test writing FTM entities to FTM Store
write_stream(dataset, open(input_file, "r"))
assert len(dataset) == number_of_entities

# test reading FTM entities from FTM Store
output_file = Path("./tests/fixtures/temp_output")

fh = open(output_file, "w+b")
iterate_stream(dataset, fh)
fh.close()

with open(output_file, "r") as f:
assert len(f.readlines()) == number_of_entities

output_file.unlink()

dataset.drop()
dataset.store.close()
1 change: 1 addition & 0 deletions tests/test_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ def test_sqlite():
assert len(list(dataset.iterate(entity_id="key3"))) == 1
assert len(dataset.store) == 1

dataset.drop()
dataset.store.close()

0 comments on commit 09b6386

Please sign in to comment.