Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create BankAccount entitied from valid IBANs #503

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion ingestors/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@
from followthemoney.util import make_entity_id
from followthemoney.namespace import Namespace

import schwifty

from ingestors import settings
from ingestors.analysis.aggregate import TagAggregatorFasttext
from ingestors.analysis.aggregate import TagAggregator
from ingestors.analysis.extract import extract_entities
from ingestors.analysis.patterns import extract_patterns
from ingestors.analysis.language import detect_languages
from ingestors.analysis.util import TAG_COMPANY, TAG_PERSON
from ingestors.analysis.util import TAG_COMPANY, TAG_PERSON, TAG_IBAN
from ingestors.analysis.util import text_chunks, ANALYZABLE, DOCUMENT


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -46,6 +49,7 @@ def feed(self, entity):
for prop, tag in extract_patterns(self.entity, text):
self.aggregator_patterns.add(prop, tag)


def flush(self):
writer = self.dataset.bulk()
countries = set()
Expand All @@ -60,13 +64,36 @@ def flush(self):
countries.add(key)

mention_ids = set()

# if there are ibanMentioned, we validate them with schwifty
# valid IBANs are used to create BankAccount FTM entities
# we keep track of how many we created
bank_accounts_created = 0

for key, prop, values in results:
label = values[0]
if prop.type == registry.name:
label = registry.name.pick(values)

if prop == TAG_IBAN:
try:
_ = schwifty.IBAN(label)
except schwifty.exceptions.SchwiftyException:
continue

if not schwifty.IBAN(label, allow_invalid=True).is_valid:
continue

bank_account = model.make_entity("BankAccount")
bank_account.make_id("mention", self.entity.id, prop, key)
bank_account.add("iban", label)
bank_account = self.ns.apply(bank_account)
writer.put(bank_account)
bank_accounts_created += 1

schema = self.MENTIONS.get(prop)
if schema is not None and self.entity.schema.is_a(DOCUMENT):

mention = model.make_entity("Mention")
mention.make_id("mention", self.entity.id, prop, key)
mention_ids.add(mention.id)
Expand All @@ -89,6 +116,8 @@ def flush(self):
self.entity.schema.name,
self.entity.id,
)
if bank_accounts_created:
log.debug(f"Created {bank_accounts_created} BankAccount entities")
writer.put(self.entity)
writer.flush()

Expand Down
Loading