Skip to content

Commit

Permalink
Fix relations
Browse files Browse the repository at this point in the history
* Inherit groups and themes from broader
* Allow multiple columns with 'Alt Label', 'Broader concept', 'Group', and 'Theme'
  • Loading branch information
arielpontes committed Jan 11, 2021
1 parent 1cb8781 commit 677b02a
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 71 deletions.
119 changes: 62 additions & 57 deletions gemet/thesaurus/import_spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,10 @@
from gemet.thesaurus.utils import get_new_code


CONCEPT_NS = Namespace.objects.get(heading='Concepts')
GROUP_NS = Namespace.objects.get(heading='Groups')
THEME_NS = Namespace.objects.get(heading='Themes')


class ImportError(Exception):
pass


def namespace_for(property_type_name):
heading_for = {
'broader': CONCEPT_NS,
'group': GROUP_NS,
'theme': THEME_NS,
}
return heading_for[property_type_name]


def row_dicts(sheet):
rows = sheet.rows

Expand All @@ -49,8 +35,14 @@ def row_dicts(sheet):
raise ImportError(u'Column "{}" is mandatory.'.format(column))

for column in column_names:
if 'Alt Label' not in column and column not in supported_columns:
raise ImportError(u'Column "{}" is not supported.'.format(column))
if column not in supported_columns:
for text in ['Alt Label', 'Broader concept', 'Group', 'Theme']:
if text in column:
break
else:
raise ImportError(
u'Column "{}" is not supported.'.format(column)
)

for row in rows:
values = [(c.value or '').strip() for c in row[1:]]
Expand Down Expand Up @@ -101,9 +93,6 @@ def import_file(self):
num_reg_concepts_after = Concept.objects.filter(
namespace=self.concept_ns
).count()
num_groups_after = Concept.objects.filter(
namespace=self.group_ns
).count()

results = ("Created {} concepts.").format(
num_reg_concepts_after - num_reg_concepts_bef,
Expand Down Expand Up @@ -194,18 +183,29 @@ def _create_relations(self, sheet):
for i, row in enumerate(row_dicts(sheet)):

source_label = row.get("Term") # aka prefLabel
source = self.concepts[source_label.lower()]

for property_type in property_types:

# Look for columns specifying relationships
if property_type.name == 'broader':
target_label = row.get("Broader concept")
target_labels = [
row[key] for key in row.keys()
if 'Broader concept' in key and row[key]
]
broader_labels = target_labels
elif property_type.name == 'group':
target_label = row.get("Group")
target_labels = [
row[key] for key in row.keys()
if 'Group' in key and row[key]
]
elif property_type.name == 'theme':
target_label = row.get("Theme")
target_labels = [
row[key] for key in row.keys()
if 'Theme' in key and row[key]
]

if not target_label:
if not target_labels:
# If it doesn't exist, there is no relation to be created
print(
(
Expand All @@ -215,45 +215,50 @@ def _create_relations(self, sheet):
)
continue

source = self.concepts[source_label.lower()]
target = Concept.objects.filter(
properties__name='prefLabel',
properties__value=target_label,
properties__language_id='en',
namespace=namespace_for(property_type.name)
).exclude(
status=DELETED_PENDING, properties__status=DELETED_PENDING
).first()

if not target:
raise ImportError(
'Row {}: concept "{}" does not exist.'.format(
i, target_label
for target_label in target_labels:
target = Concept.objects.filter(
properties__name='prefLabel',
properties__value=target_label,
properties__language_id='en',
namespace=property_type.namespace
).exclude(
status=DELETED_PENDING,
properties__status=DELETED_PENDING
).first()

if not target:
raise ImportError(
'Row {}: concept "{}" does not exist.'.format(
i, target_label
)
)
)

# target is broader of source
relation = Relation.objects.filter(
source=source,
target=target,
property_type=property_type,
).first()

if not relation:
relation = Relation.objects.create(
source=source,
target=target,
relation, created = Relation.objects.get_or_create(
source=source, # child
target=target, # parent
property_type=property_type,
version_added=self.version,
status=PENDING,
defaults={
'version_added': self.version, 'status': PENDING
}
)
print('Relation created: {}'.format(relation))

if not relation.reverse:
reverse_relation = relation.create_reverse()
print(
'Reverse relation created: {}'.format(reverse_relation)
)
if created:
print('Relation created: {}'.format(relation))

if not relation.reverse:
reverse_relation = relation.create_reverse()
print(
'Reverse relation created: {}'.format(
reverse_relation
)
)

created = source.inherit_groups_and_themes_from_broader()
print(
'Inherited groups and themes from: {}'.format(
', '.join(broader_labels)
)
)

def _add_translations(self, sheet):
for row in row_dicts(sheet):
Expand Down
46 changes: 44 additions & 2 deletions gemet/thesaurus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,29 @@ def label(self):
language='en', name='prefLabel', status__in=[0, 1]
).first().value

def inherit_groups_and_themes_from_broader(self, version=None):
""" Inherit groups and themes from broader concepts """
version = version or Version.under_work()
broader_concepts = Term.objects.filter(
source_relations__target__namespace__heading='Concepts'
)
num_created = 0
for broader in broader_concepts:
group_theme_relations = broader.source_relations.filter(
property_type__name__in=['group', 'theme']
)

for relation in group_theme_relations:
relation, created = Relation.objects.get_or_create(
source=self, # child
target=relation.target, # parent
property_type=relation.property_type,
defaults={'version_added': version, 'status': PENDING}
)
if created:
num_created += 1
return num_created

def update_or_create_properties(
self, property_values, language_id='en', version=None
):
Expand Down Expand Up @@ -392,13 +415,26 @@ def prefix(self):
def get_by_name(cls, name):
return cls.objects.filter(name=name).first()

@property
def namespace(self):
namespaces = {
'broader': Namespace.objects.get(heading='Concepts'),
'group': Namespace.objects.get(heading='Groups'),
'theme': Namespace.objects.get(heading='Themes'),
}
return namespaces.get(self.name)

def __unicode__(self):
return self.name


class Relation(VersionableModel):
source = models.ForeignKey(Concept, related_name='source_relations')
target = models.ForeignKey(Concept, related_name='target_relations')
source = models.ForeignKey( # child
Concept, related_name='source_relations'
)
target = models.ForeignKey( # parent
Concept, related_name='target_relations'
)
property_type = models.ForeignKey(PropertyType)

def __unicode__(self):
Expand Down Expand Up @@ -544,6 +580,12 @@ def create(self, **kwargs):
kwargs.setdefault('namespace', ns)
return super(ConceptManager, self).create(**kwargs)

def get_by_name(self, name):
return self.get_queryset().filter(
properties__name='prefLabel', properties__value=name,
properties__language_id='en'
).distinct().get()


class PublishedConceptManager(ConceptManager):
def get_queryset(self):
Expand Down
Binary file modified gemet/thesaurus/tests/files/concepts.xlsx
Binary file not shown.
Binary file modified gemet/thesaurus/tests/files/only_en.xlsx
Binary file not shown.
Binary file modified gemet/thesaurus/tests/files/only_translations.xlsx
Binary file not shown.
55 changes: 43 additions & 12 deletions gemet/thesaurus/tests/test_import.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# -*- coding: utf-8 -*-
from django.core.files import File
from django.db.models import Q
from django.test import TestCase, Client

from .factories import VersionFactory, ConceptFactory
from gemet.thesaurus.models import Concept, Import, Property, Namespace
from .factories import (
VersionFactory, TermFactory, GroupFactory, ThemeFactory, ConceptFactory,
Relation
)
from gemet.thesaurus.models import (
Concept, Term, Group, Theme, Import, Property, PropertyType, Namespace
)


class ConceptImportView(TestCase):
Expand All @@ -23,30 +27,43 @@ def setUp(self):
i = 1
# Create broader concepts
for value in ['pollution', 'impact source']:
concept = ConceptFactory(namespace=concept_namespace, code=str(i))
concept = TermFactory(code=str(i))
concept.properties.create(
name='prefLabel', value=value, language_id='en',
version_added=version
)
i += 1

# Create groups
for value in ['BIOSPHERE', 'HYDROSPHERE']:
concept = ConceptFactory(namespace=group_namespace, code=str(i))
for value in ['BIOSPHERE', 'HYDROSPHERE', 'WASTES']:
concept = GroupFactory(code=str(i))
concept.properties.create(
name='prefLabel', value=value, language_id='en',
version_added=version
version_added=version,
)
i += 1

# Make `pollution` a member of `WASTES`
pollution_concept = Term.objects.get_by_name('pollution')
wastes_group = Group.objects.get_by_name('WASTES')
pollution_concept.source_relations.create(
target=wastes_group,
version_added=version,
property_type=PropertyType.objects.get(name='group')
)
# Create themes
for value in ['Fake Theme']:
concept = ConceptFactory(namespace=theme_namespace, code=str(i))
for value in ['pollution', 'theme1', 'theme2']:
concept = ThemeFactory(code=str(i))
concept.properties.create(
name='prefLabel', value=value, language_id='en',
version_added=version
)
i += 1
pollution_theme = Theme.objects.get_by_name('pollution')
pollution_concept.source_relations.create(
target=pollution_theme,
version_added=version,
property_type=PropertyType.objects.get(name='theme')
)

def test_import_concepts_and_translations_together(self):
num_concepts_before = Concept.objects.count()
Expand Down Expand Up @@ -76,9 +93,23 @@ def test_import_concepts_and_translations_together(self):
# And multiple altlabels
self.assertEqual(
Property.objects.filter(name='altLabel').filter(
Q(value='altlabel1') | Q(value='altlabel2')
value__in=[
'Net zero emissions economy',
'net-zero greenhouse gas emissions economy',
'climate neutrality economy'
]
).count(),
3
)
# Multiple broader concepts, groups and themes are supported
brown_finance = Term.objects.get_by_name('brown finance')
self.assertEqual(
Relation.objects.filter(
source=brown_finance,
).count(),
2
8
# 2 Broader concepts, 2 Groups, 2 Themes
# + 1 Group and 1 Theme Inherited from pollution
)

def test_import_concepts_and_translations_separately(self):
Expand Down

0 comments on commit 677b02a

Please sign in to comment.