diff --git a/gemet/thesaurus/admin.py b/gemet/thesaurus/admin.py index a05889b..63aa1b8 100644 --- a/gemet/thesaurus/admin.py +++ b/gemet/thesaurus/admin.py @@ -5,7 +5,7 @@ class ConceptAdmin(admin.ModelAdmin): search_fields = ('code',) - list_display = ('code', 'namespace', 'status', 'version_added') + list_display = ('code', 'label', 'namespace', 'status', 'version_added') list_filter = ('version_added__identifier', 'status', 'namespace') @@ -71,4 +71,4 @@ class AsyncTaskAdmin(admin.ModelAdmin): admin.site.register(models.Version, VersionAdmin) admin.site.register(models.DefinitionSource, SourceAdmin) -admin.site.register(models.AsyncTask, AsyncTaskAdmin) \ No newline at end of file +admin.site.register(models.AsyncTask, AsyncTaskAdmin) diff --git a/gemet/thesaurus/edit_views.py b/gemet/thesaurus/edit_views.py index 2667eac..28d9ab6 100644 --- a/gemet/thesaurus/edit_views.py +++ b/gemet/thesaurus/edit_views.py @@ -15,6 +15,7 @@ from gemet.thesaurus import EDIT_URL_NAMES, FOREIGN_RELATION_TYPES from gemet.thesaurus import PENDING, PUBLISHED, DELETED, DELETED_PENDING from gemet.thesaurus import SOURCE_RELATION_TO_TARGET +from gemet.thesaurus import SEARCH_FIELDS from gemet.thesaurus import models from gemet.thesaurus.exports import create_export_files from gemet.thesaurus.forms import ConceptForm, PropertyForm, ForeignRelationForm @@ -198,7 +199,8 @@ def post(self, request, langcode, id, name): name=name, **form.cleaned_data ) - refresh_search_text(field.name, id, langcode, self.pending_version) + if field.name in SEARCH_FIELDS: + refresh_search_text(id, langcode, self.pending_version) data = {"value": field.value} return self._get_response(data, 'success', 200) @@ -351,7 +353,8 @@ def post(self, request, langcode, id, name): ) delete_url = reverse('delete_property', kwargs={'pk': field.pk}) - refresh_search_text(field.name, id, langcode, self.pending_version) + if field.name in SEARCH_FIELDS: + refresh_search_text(id, langcode, self.pending_version) data = { "value": field.value, "id": field.id, @@ -377,7 +380,8 @@ def post(self, request, pk): elif field.status == PENDING: field.delete() - refresh_search_text(field.name, field.concept_id, field.language_id) + if field.name in SEARCH_FIELDS: + refresh_search_text(field.concept_id, field.language_id) return self._get_response({}, 'success', 200) @@ -464,13 +468,15 @@ def form_valid(self, form): new_concept.save() # create prefLabel property for the new concept - models.Property.objects.create(status=PENDING, - version_added=self.pending_version, - concept=new_concept, - language=self.language, - name='prefLabel', - value=form.cleaned_data['name']) - refresh_search_text('prefLabel', new_concept.id, self.language.code) + models.Property.objects.create( + status=PENDING, + version_added=self.pending_version, + concept=new_concept, + language=self.language, + name='prefLabel', + value=form.cleaned_data['name'] + ) + refresh_search_text(new_concept.id, self.language.code) url_name = EDIT_URL_NAMES[namespace.heading] url = reverse(url_name, kwargs={'langcode': self.langcode, 'code': new_concept.code}) diff --git a/gemet/thesaurus/management/commands/import_spreadsheet.py b/gemet/thesaurus/management/commands/import_spreadsheet.py index 7c83894..57f536b 100644 --- a/gemet/thesaurus/management/commands/import_spreadsheet.py +++ b/gemet/thesaurus/management/commands/import_spreadsheet.py @@ -3,22 +3,67 @@ from django.core.management import CommandError from django.core.management.base import BaseCommand +from django.db import transaction +from django.utils import timezone -from gemet.thesaurus import DELETED_PENDING, PENDING, PUBLISHED +from gemet.thesaurus import PENDING, PUBLISHED from gemet.thesaurus import models from gemet.thesaurus.utils import get_new_code, get_search_text -from gemet.thesaurus.utils import split_text_into_terms -NAMESPACE = 'Concepts' -LANGCODE = 'en' + +def namespace_for(property_type_name): + heading_for = { + 'broader': 'Concepts', + 'group': 'Groups', + } + return models.Namespace.objects.get( + heading=heading_for[property_type_name] + ) + + +def row_dicts(sheet): + rows = sheet.rows + + mandatory_columns = { + "Term", "Definition", "Definition reference" + } + optional_columns = { + "Alt Label", "Abbreviation/Alt Label", "Synonym/Alt Label", + "Broader concept", "Broader URI", "Group", "Note" + } + optional_columns.add(sheet.title) + supported_columns = mandatory_columns.union(optional_columns) + column_names = [ + c.value.strip() for c in next(rows) if c.value and c.value.strip() + ] + + for column in mandatory_columns: + if column not in column_names: + raise CommandError(u'Column "{}" is mandatory.'.format(column)) + + for column in column_names: + if column not in supported_columns: + raise CommandError(u'Column "{}" is not supported.'.format(column)) + + for row in rows: + values = [(c.value or '').strip() for c in row[1:]] + if not ''.join(values).strip(): + # The sheet is over, there are only empty rows now. + return + yield dict(zip(column_names, values)) class Command(BaseCommand): + """ + TODO: Convert this to a helper function and create admin action to call it + """ + help = 'Import new concepts from Excel spreadsheet' def add_arguments(self, parser): parser.add_argument('excel_file') + @transaction.atomic def handle(self, *args, **options): try: wb = load_workbook(filename=options['excel_file']) @@ -27,202 +72,202 @@ def handle(self, *args, **options): except IOError: raise CommandError('The file provided does not exist.') - sheet = wb.active - self.language = models.Language.objects.get(code=LANGCODE) + # We assume all concepts being introduced are in the namespace + # "Concepts". Adding other types of concepts is not supported. + self.namespace_obj = models.Namespace.objects.get(heading='Concepts') + # Get the version with no name, used for pending concepts self.version = models.Version.under_work() - self.namespace = models.Namespace.objects.get(heading=NAMESPACE) + self.concepts = {} + concepts_sheetname = wb.sheetnames[0] + translation_sheetnames = wb.sheetnames[1:] + self.stdout.write('Creating concepts...') - self._create_concepts(sheet) + self._create_concepts(wb[concepts_sheetname]) self.stdout.write('Creating relations...') - self._create_relations(sheet) + self._create_relations(wb[concepts_sheetname]) + + if translation_sheetnames: + self.stdout.write('Creating translations...') + for sheetname in translation_sheetnames: + self.stdout.write(' {}...'.format(sheetname)) + self._add_translations(wb[sheetname]) def _create_concepts(self, sheet): - for row in sheet.iter_rows(max_col=3, min_row=2): - label, defin, source = [(cell.value or '').strip() for cell in row] + for i, row in enumerate(row_dicts(sheet)): + label = row.get("Term") # aka prefLabel + alt_labels = [row[key] for key in row.keys() if 'Alt Label' in key] + defin = row.get("Definition") + source = row.get("Definition source") if not label: - continue - properties = { + raise CommandError(u'Row {} has no "Term".'.format(i)) + + property_values = { 'prefLabel': label, 'definition': defin, 'source': source, } - is_new_concept = False - property = models.Property.objects.filter( + if alt_labels: + property_values['altLabels'] = alt_labels + + # A concept must always have at least an English property, so if + # there is no English property corresponding to that term in the + # DB, the concept must be new. + prop = models.Property.objects.filter( name='prefLabel', value__iexact=label, - language=self.language, + language='en', ).first() - if property: - concept = property.concept + if prop: + is_new_concept = False + concept = prop.concept msg = u'Concept {} exists. '.format(label) - if property.status in [PENDING, PUBLISHED]: - del properties['prefLabel'] + if prop.status in [PENDING, PUBLISHED]: + del property_values['prefLabel'] msg += 'Skipping prefLabel creation.' self.stdout.write(msg) else: is_new_concept = True - code = get_new_code(self.namespace) + code = get_new_code(self.namespace_obj) concept = models.Concept.objects.create( code=code, - namespace=self.namespace, + namespace=self.namespace_obj, version_added=self.version, status=PENDING, + date_entered=timezone.now(), ) self.stdout.write(u'Concept added: {}'.format(label)) self.concepts[label.lower()] = concept - for name, value in properties.iteritems(): - current_property = models.Property.objects.filter( - concept=concept, - language=self.language, - name=name, - status__in=[PENDING, PUBLISHED] - ).first() - if current_property: - if current_property.status == PENDING: - current_property.value = value - current_property.save() - else: - current_property.status = DELETED_PENDING - current_property.save() - if not (current_property and - current_property.status == PENDING): - models.Property.objects.create( - status=PENDING, - version_added=self.version, - concept=concept, - language=self.language, - name=name, - value=value, - ) + + concept.update_or_create_properties(property_values) + if is_new_concept: + # Create internal "searchText" property with the concatenated + # values from all other concept properties. search_text = get_search_text( - concept.id, self.language.code, PENDING, self.version) + concept.id, 'en', PENDING, self.version + ) if search_text: search_text.save() - def _create_theme_group_relations(self, source): + def _create_relations(self, sheet): + property_types = models.PropertyType.objects.filter( - name__in=['theme', 'group'] + name__in=['broader', 'group'] ) - property_type_broader = models.PropertyType.objects.get(name='broader') - for property_type in property_types: - relation = source.source_relations.filter( - property_type=property_type).exists() - if relation: - self.stdout.write( - 'Skipping {0} relation creation for concept {1}' - .format(property_type, source)) - continue - broader_relations = models.Relation.objects.filter( - property_type=property_type, - source__target_relations__source=source, - source__target_relations__property_type=property_type_broader - ) - if not broader_relations: - self.stdout.write( - 'Skipping {0} relation creation for concept {1}. ' - 'No broader.'.format(property_type, source)) - continue - for relation in broader_relations: - new_relation = models.Relation.objects.create( - source=source, - target=relation.target, - property_type=property_type, - version_added=self.version, - status=PENDING) - new_relation.create_reverse() - self.stdout.write('For concept {0} relation: {1} was added.' - .format(source, new_relation)) - def _create_relations(self, sheet): - def get_terms(row, idx1, idx2): - text = (row[idx1].value or '') + ';' + (row[idx2].value or '') - return split_text_into_terms(text) + for i, row in enumerate(row_dicts(sheet)): - related = models.PropertyType.objects.get(name='related') - broader = models.PropertyType.objects.get(name='broader') - narrower = models.PropertyType.objects.get(name='narrower') + source_label = row.get("Term") # aka prefLabel - for row in sheet.iter_rows(min_row=2): - label = (row[0].value or '').strip() + for property_type in property_types: - if not label: - continue + # Look for columns specifying relationships + if property_type.name == 'broader': + target_label = row.get("Broader concept") + elif property_type.name == 'group': + target_label = row.get("Group") - relations = { - related: get_terms(row, 3, 4), - broader: get_terms(row, 5, 6), - narrower: get_terms(row, 7, 8), - } + if not target_label: + # If it doesn't exist, there is no relation to be created + self.stdout.write( + ( + 'Row {} has neither "broader" nor "group" columns.' + ).format(i) + ) + continue + + source = self.concepts[source_label.lower()] + target = self._get_concept(target_label) + namespace = namespace_for(property_type.name) - source = self.concepts[label.lower()] - - for property_type, terms in relations.iteritems(): - for term in terms: - target = self._get_concept(term) - if not target: - code = get_new_code(self.namespace) - target = models.Concept.objects.create( - code=code, - namespace=self.namespace, - version_added=self.version, - status=PENDING, - ) - models.Property.objects.create( - status=PENDING, - version_added=self.version, - concept=target, - language=self.language, - name='prefLabel', - value=term, - ) - self.stdout.write('Inexistent concept: {}'.format(term)) - - relation = models.Relation.objects.filter( + if not target: + self.stdout.write( + 'Creating inexistent concept: {}'.format(target_label) + ) + code = get_new_code(self.namespace_obj) + target = models.Concept.objects.create( + code=code, + namespace=namespace, + version_added=self.version, + status=PENDING, + date_entered=timezone.now(), + ) + target.properties.create( + status=PENDING, + version_added=self.version, + language='en', + name='prefLabel', + value=target_label, + ) + + # target is broader of source + relation = models.Relation.objects.filter( + source=source, + target=target, + property_type=property_type, + ).first() + + if not relation: + relation = models.Relation.objects.create( source=source, target=target, property_type=property_type, - ).first() - - if not relation: - relation = models.Relation.objects.create( - source=source, - target=target, - property_type=property_type, - version_added=self.version, - status=PENDING, - ) - self.stdout.write('Relation created: {}' - .format(relation)) - - if not relation.reverse: - reverse_relation = relation.create_reverse() - self.stdout.write('Reverse relation created: {}' - .format(reverse_relation)) - self._create_theme_group_relations(source) + version_added=self.version, + status=PENDING, + ) + self.stdout.write('Relation created: {}'.format(relation)) + + if not relation.reverse: + reverse_relation = relation.create_reverse() + self.stdout.write( + 'Reverse relation created: {}'.format(reverse_relation) + ) + + def _add_translations(self, sheet): + for row in row_dicts(sheet): + + language = models.Language.objects.get(code=sheet.title.lower()) + + en_label = row.get('Term') + foreign_label = row.get(sheet.title) + definition = row.get('Definition') + + if not en_label: + raise CommandError(u'"Term" column cannot be blank.') + + property_values = { + 'prefLabel': foreign_label, + 'definition': definition, + } + + concept = models.Property.objects.get( + name='prefLabel', + value__iexact=en_label, + language=models.Language.objects.get(code='en'), + ).concept + + concept.update_or_create_properties( + property_values, language_id=language.code + ) def _get_concept(self, label): concept = self.concepts.get(label.lower()) if not concept: try: - concept = ( - models.Property.objects - .get( - name='prefLabel', - value__iexact=label, - language=self.language, - concept__namespace=self.namespace, - ) - .concept - ) + concept = models.Property.objects.get( + name='prefLabel', + value__iexact=label, + language='en', + concept__namespace=self.namespace_obj, + ).concept except models.Property.DoesNotExist: concept = None diff --git a/gemet/thesaurus/management/commands/import_translation.py b/gemet/thesaurus/management/commands/import_translation.py deleted file mode 100644 index 0772901..0000000 --- a/gemet/thesaurus/management/commands/import_translation.py +++ /dev/null @@ -1,94 +0,0 @@ -from openpyxl import load_workbook -from openpyxl.utils.exceptions import InvalidFileException - -from django.core.management import CommandError -from django.core.management.base import BaseCommand - -from gemet.thesaurus import DELETED_PENDING, PENDING, PUBLISHED -from gemet.thesaurus import models -from gemet.thesaurus.utils import get_new_code, get_search_text, refresh_search_text -from gemet.thesaurus.utils import split_text_into_terms - - -class Command(BaseCommand): - help = ( - "Import translations for existing concepts from Excel " - "(column A=English name, B=translated name, C=translated definition)" - ) - - def add_arguments(self, parser): - parser.add_argument('excel_file') - parser.add_argument('language_code') - - def handle(self, *args, **options): - try: - wb = load_workbook(filename=options['excel_file']) - except InvalidFileException: - raise CommandError('The file provided is not a valid excel file.') - except IOError: - raise CommandError('The file provided does not exist.') - - sheet = wb.active - langcode = options['language_code'] - try: - self.language = models.Language.objects.get(code=langcode) - except models.Language.DoesNotExist: - raise CommandError('Language "{}" not found.'.format(langcode)) - - self.version = models.Version.under_work() - self.namespace = models.Namespace.objects.get(heading='Concepts') - self.concepts = {} - - self.stdout.write('Adding translations...') - self._add_translations(sheet) - - def _add_translations(self, sheet): - for row in sheet.iter_rows(max_col=3, min_row=2): - en_label, label, definition = [(cell.value or '').strip() for cell in row] - - if not en_label: - # Skip empty rows - continue - properties = { - 'prefLabel': label, - 'definition': definition, - } - - property = models.Property.objects.filter( - name='prefLabel', - value__iexact=en_label, - language=models.Language.objects.get(code='en'), - ).first() - if not property: - # Concept not found - self.stderr.write(u'Skipping concept not found: {}'.format(en_label)) - continue - concept = property.concept - msg = u'Concept {} exists. '.format(en_label) - - for name, value in properties.iteritems(): - current_property = models.Property.objects.filter( - concept=concept, - language=self.language, - name=name, - status__in=[PENDING, PUBLISHED] - ).first() - if current_property: - if current_property.status == PENDING: - current_property.value = value - current_property.save() - else: - current_property.status = DELETED_PENDING - current_property.save() - if not (current_property and - current_property.status == PENDING): - models.Property.objects.create( - status=PENDING, - version_added=self.version, - concept=concept, - language=self.language, - name=name, - value=value, - ) - refresh_search_text('prefLabel', concept.id, self.language.code) - diff --git a/gemet/thesaurus/models.py b/gemet/thesaurus/models.py index 4e142ed..2d7a019 100644 --- a/gemet/thesaurus/models.py +++ b/gemet/thesaurus/models.py @@ -6,6 +6,7 @@ from gemet.thesaurus import PENDING, PUBLISHED, DELETED, DELETED_PENDING from gemet.thesaurus import NS_VIEW_MAPPING, RELATION_PAIRS +from gemet.thesaurus import SEARCH_FIELDS, SEARCH_SEPARATOR class Version(models.Model): @@ -72,6 +73,7 @@ def __unicode__(self): class Concept(VersionableModel): namespace = models.ForeignKey(Namespace) code = models.CharField(max_length=10) + # TODO: Rename to created_at/updated_at and use auto_now and auto_now_add date_entered = models.DateTimeField(blank=True, null=True) date_changed = models.DateTimeField(blank=True, null=True) @@ -96,8 +98,108 @@ def visible_foreign_relations(self): @property def name(self): + """ Relies on data being set properly on set_attributes """ return getattr(self, 'prefLabel', '') + @property + def label(self): + """ Calculates and return prefLabel value of the Concept in English """ + return self.properties.filter( + language='en', name='prefLabel', status__in=[0, 1] + ).first().value + + def update_or_create_properties( + self, property_values, language_id='en', version=None + ): + version = version or Version.under_work() + + # Soft delete matching published properties + self.properties.filter( + name__in=property_values.keys(), + language_id=language_id, + status=PUBLISHED + ).update(status=DELETED_PENDING) + + # For each property + for name, value in property_values.iteritems(): + if name == 'altLabel': + # altLabel key maps to multiple values + assert isinstance(value, list) + # Delete existing + self.properties.filter( + name='altLabel', + language_id=language_id, + status=PENDING + ).delete() + for alt_label in value: + # And create new ones + self.properties.create( + status=PENDING, + version_added=version, + language_id=language_id, + name=name, + value=alt_label, + ) + elif value: + # Update pending if exists + matches = self.properties.filter( + language_id=language_id, + name=name, + status=PENDING + ).update(value=value) + if not matches: + # If it doesn't exist, create it + self.properties.create( + status=PENDING, + version_added=version, + language_id=language_id, + name=name, + value=value, + ) + self.update_or_create_search_text(language_id, version) + + def update_or_create_search_text(self, language_code, version=None): + """ + Update or create Property of type searchText, an internal type of + property consisting of the concatenated values of all searchable + properties of a concept. + """ + version = version or Version.under_work() + + # Get values from searchable properties + search_prop_values = self.properties.filter( + language_id=language_code, + name__in=SEARCH_FIELDS, + status__in=[PUBLISHED, PENDING], + ).values_list('value', flat=True) + + # Concatenate them using internal format + if search_prop_values: + search_text = SEARCH_SEPARATOR.join(search_prop_values) + search_text = SEARCH_SEPARATOR + search_text + SEARCH_SEPARATOR + else: + search_text = '' + + # Look for existing searchText Property object + search_text_property = self.properties.filter( + language_id=language_code, + name='searchText', + status__in=[PUBLISHED, PENDING], + ).first() + + if search_text_property: + # If it exists, update it with the new calculated value + search_text_property.value = search_text + search_text_property.save() + else: + # If not, create it + search_text_property = self.properties.create( + language_id=language_code, + name='searchText', + status=PENDING, + version_added=version, + ) + def get_attributes(self, langcode, property_list): values = ['id', 'name', 'value'] values.extend(self.extra_values) diff --git a/gemet/thesaurus/templatetags/gemet_tags.py b/gemet/thesaurus/templatetags/gemet_tags.py index 4c59ff5..6897bdb 100644 --- a/gemet/thesaurus/templatetags/gemet_tags.py +++ b/gemet/thesaurus/templatetags/gemet_tags.py @@ -63,17 +63,14 @@ def get_concept_names(concept, status_values, langcode): def get_concept_name(concept_name, concept_id, status_values): if concept_name: return concept_name - concept = ( - Concept.objects.get(pk=concept_id) - .properties.filter( - language__code=DEFAULT_LANGCODE, - name='prefLabel', - status__in=status_values, - ).first() - ) - if concept: + prop = Concept.objects.get(pk=concept_id).properties.filter( + language__code=DEFAULT_LANGCODE, + name='prefLabel', + status__in=status_values, + ).first() + if prop: language = Language.objects.get(code=DEFAULT_LANGCODE).name.lower() - return mark_safe(concept.value + ' [' + language + ']') + return mark_safe(prop.value + ' [' + language + ']') return '' diff --git a/gemet/thesaurus/utils.py b/gemet/thesaurus/utils.py index 988173b..e1306da 100644 --- a/gemet/thesaurus/utils.py +++ b/gemet/thesaurus/utils.py @@ -191,16 +191,12 @@ def concept_has_unique_relation(concept, relation_type): def get_search_text(concept_id, language_code, status, version): - search_properties = ( - Property.objects - .filter( - concept_id=concept_id, - language_id=language_code, - name__in=SEARCH_FIELDS, - status__in=[PUBLISHED, PENDING], - ) - .values_list('value', flat=True) - ) + search_properties = Property.objects.filter( + concept_id=concept_id, + language_id=language_code, + name__in=SEARCH_FIELDS, + status__in=[PUBLISHED, PENDING], + ).values_list('value', flat=True) if not search_properties: return @@ -213,31 +209,23 @@ def get_search_text(concept_id, language_code, status, version): language_id=language_code, name='searchText', value=search_text, - is_resource=0, status=status, - version_added_id=version.id + version_added=version, ) -def refresh_search_text(proptype, concept_id, language_code, version=None): - if proptype not in SEARCH_FIELDS: - return - +def refresh_search_text(concept_id, language_code, version=None): version = version or Version.under_work() new_search = get_search_text(concept_id, language_code, PENDING, version) if not new_search: return - search_property = ( - Property.objects - .filter( - concept_id=concept_id, - language_id=language_code, - name='searchText', - status__in=[PUBLISHED, PENDING], - ) - .first() - ) + search_property = Property.objects.filter( + concept_id=concept_id, + language_id=language_code, + name='searchText', + status__in=[PUBLISHED, PENDING], + ).first() if not search_property: pass elif search_property.status == PENDING: