diff --git a/README.rst b/README.rst
index f4376fe..79a0524 100644
--- a/README.rst
+++ b/README.rst
@@ -13,10 +13,33 @@ GEMET
.. contents ::
Project Name
-------------
+############
The Project Name is GEMET - GEneral Multilingual Environmental Thesaurus
http://www.eionet.europa.eu/gemet
+Installing with Docker
+#########################
+
+Create settings files from .example files:
+
+ cp gemet/local_settings.py.example gemet/local_settings.py
+ cp gemet/local_test_settings.py.example gemet/local_test_settings.py
+
+Install Docker and docker-compose, then run:
+
+ docker-compose up -d
+
+Now you should be able to attach to the app container:
+
+ docker exec -it gemet.app bash
+
+And run the Django server for development:
+
+ python manage.py runserver 0:8888
+
+Installing without Docker
+#########################
+
Prerequisites - System packages
-------------------------------
@@ -120,7 +143,7 @@ be run as an unprivileged user in the product directory::
Build production
-----------------
+################
Setup production environment using an unprivileged user::
@@ -139,7 +162,7 @@ Configure supervisord and set the WSGI server port::
Build staging
--------------
+#############
Setup staging environment using an unprivileged user::
@@ -159,13 +182,13 @@ production, for example 8010)::
Configuration
--------------
+#############
Details about configurable settings can be found in ``settings.py``.
Data Import
------------
+###########
1. Considering you have a dump of the old database (``gemet.sql``), import it in a
**separate** database::
@@ -199,7 +222,7 @@ configuration file with the name of the database used for import
Other commands
---------------
+##############
1. Some romanian terms, definitions etc. are written with the wrong diacritical marks (cedillas instead of commas).
The following custom management command fixes those characters and prints the number of objects changed::
@@ -221,7 +244,7 @@ Run the command providing a valid excel file::
Documentation
--------------
+#############
The documentation has been created using `Sphinx`_. The source directories for the three sections of documentation can be found in the `docs`_ directory.
@@ -231,7 +254,7 @@ The documentation has been created using `Sphinx`_. The source directories for t
In order to get the HTML output, you should run the following command inside one of the documentation directories (``api``, ``new_api`` or ``overview``)::
make html
-
+
These static HTML files can be served via a web server (Apache, Nginx, etc).
Docs contents
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..1a576fe
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,48 @@
+version: '2'
+services:
+ async:
+ image: eeacms/gemet:latest
+ container_name: gemet.async
+ env_file:
+ - ./docker/db.env
+ - ./docker/app.env
+ volumes:
+ - gemet-uat-eionet-exports:/var/local/gemet/exports
+ command:
+ - qcluster
+ gemet:
+ image: eeacms/gemet:latest
+ container_name: gemet.app
+ ports:
+ - 8888:8888
+ env_file:
+ - ./docker/db.env
+ - ./docker/app.env
+ volumes:
+ - gemet-uat-eionet-static:/var/local/static
+ - gemet-uat-eionet-exports:/var/local/gemet/exports
+ - .:/var/local/gemet
+ entrypoint: ["/usr/bin/tail", "-f", "/dev/null"]
+ mysql:
+ image: mysql:latest
+ container_name: gemet.db
+ ports:
+ - 3306:3306
+ env_file:
+ - ./docker/db.env
+ volumes:
+ - gemet-uat-eionet-data:/var/lib/mysql
+ command:
+ - --character-set-server=utf8mb4
+ - --collation-server=utf8mb4_unicode_ci
+ - --default-authentication-plugin=mysql_native_password
+ redis:
+ image: redis
+ container_name: gemet.redis
+volumes:
+ gemet-uat-eionet-exports:
+ driver: local
+ gemet-uat-eionet-static:
+ driver: local
+ gemet-uat-eionet-data:
+ driver: local
diff --git a/docker/app.env b/docker/app.env
new file mode 100644
index 0000000..57e8fc7
--- /dev/null
+++ b/docker/app.env
@@ -0,0 +1,12 @@
+# Dummy values for local development. DO NOT use in production.
+AUTH_LDAP_SERVER_URI=
+DJANGO_LOG_LEVEL=INFO
+FORCE_SCRIPT_NAME=
+PLONE_URL=
+MYSQL_DATABASE=gemet
+MYSQL_HOST=mysql
+MYSQL_PASSWORD=gemet
+MYSQL_ROOT_PASSWORD=gemet
+MYSQL_USER=gemet
+SECRET_KEY=secret_key
+TZ=Europe/Copenhagen
diff --git a/docker/db.env b/docker/db.env
new file mode 100644
index 0000000..75fca61
--- /dev/null
+++ b/docker/db.env
@@ -0,0 +1,7 @@
+# Dummy values for local development. DO NOT use in production.
+MYSQL_DATABASE=gemet
+MYSQL_HOST=mysql
+MYSQL_PASSWORD=gemet
+MYSQL_ROOT_PASSWORD=gemet
+MYSQL_USER=gemet
+TZ=Europe/Copenhagen
diff --git a/gemet/local_settings.py.example b/gemet/local_settings.py.example
index 5ad7f70..a07f6ae 100644
--- a/gemet/local_settings.py.example
+++ b/gemet/local_settings.py.example
@@ -39,11 +39,11 @@ ALLOWED_HOSTS = ['*']
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
- 'NAME': 'db_name',
- 'USER': 'username',
- 'PASSWORD': 'password',
- 'HOST': 'db_host',
- 'PORT': 'db_port',
+ 'NAME': 'gemet',
+ 'USER': 'gemet',
+ 'PASSWORD': 'gemet',
+ 'HOST': 'mysql',
+ 'PORT': 3306,
'OPTIONS': {
'sql_mode': 'traditional',
}
@@ -88,4 +88,3 @@ AUTH_LDAP_USER_ATTR_MAP = {
"last_name": "sn",
"email": "mail"
}
-
diff --git a/gemet/settings.py b/gemet/settings.py
index 93449bc..552b8ee 100644
--- a/gemet/settings.py
+++ b/gemet/settings.py
@@ -50,6 +50,7 @@
'django.template.context_processors.debug',
'django.contrib.auth.context_processors.auth',
'django.template.context_processors.request',
+ 'django.contrib.messages.context_processors.messages',
'gemet.layout.layout_context_processor',
'gemet.thesaurus.context_processors.globals',
],
@@ -80,6 +81,10 @@
STATIC_ROOT = os.path.join(BASE_DIR, '..', 'static/')
STATIC_URL = '/static/'
+MEDIA_ROOT = os.path.join(BASE_DIR, '..', 'media/')
+MEDIA_URL = '/media/'
+
+
# Exports / Downloads
EXPORTS_ROOT = os.path.join(BASE_DIR, 'exports/')
@@ -94,6 +99,7 @@
LOCAL_INSTALLED_APPS = ()
+LOCAL_MIDDLEWARE_CLASSES = ()
try:
from local_settings import *
INSTALLED_APPS += LOCAL_INSTALLED_APPS
diff --git a/gemet/thesaurus/admin.py b/gemet/thesaurus/admin.py
index a05889b..0934d69 100644
--- a/gemet/thesaurus/admin.py
+++ b/gemet/thesaurus/admin.py
@@ -1,11 +1,12 @@
from django.contrib import admin
+from django.utils.html import mark_safe
from gemet.thesaurus import models
class ConceptAdmin(admin.ModelAdmin):
search_fields = ('code',)
- list_display = ('code', 'namespace', 'status', 'version_added')
+ list_display = ('code', 'label', 'namespace', 'status', 'version_added')
list_filter = ('version_added__identifier', 'status', 'namespace')
@@ -46,17 +47,55 @@ class AuthorizedUserAdmin(admin.ModelAdmin):
class VersionAdmin(admin.ModelAdmin):
list_display = ('id', 'identifier', 'publication_date', 'is_current')
+
class SourceAdmin(admin.ModelAdmin):
search_fields = ('abbr', 'url',)
list_display = ('abbr', 'title', 'url')
list_filter = ()
+
class AsyncTaskAdmin(admin.ModelAdmin):
search_fields = ()
list_display = ('date', 'user', 'version', 'status')
list_filter = ()
+class ImportAdmin(admin.ModelAdmin):
+ search_fields = ()
+ readonly_fields = (
+ 'id', 'created_at', 'updated_at', 'started_at', 'failed_at',
+ 'succeeded_at', 'logs'
+ )
+ list_display = (
+ 'id', 'spreadsheet', 'admin_status', 'created_at', 'started_at',
+ 'failed_at', 'succeeded_at', 'action'
+ )
+ list_filter = ()
+
+ class Media:
+ js = ('thesaurus/js/start_import.js',)
+
+ def action(self, obj):
+ if obj.status == 'In progress':
+ return mark_safe('N/A')
+ return mark_safe(
+ (
+ ''
+ ).format(obj.pk)
+ )
+
+ action.short_description = 'Action'
+
+ def admin_status(self, obj):
+ status = obj.status
+ if status == 'In progress':
+ status += ' (refresh to update)'
+ return status
+
+ admin_status.short_description = 'Status'
+
+
admin.site.register(models.Namespace)
admin.site.register(models.Concept, ConceptAdmin)
admin.site.register(models.Property, PropertiesAdmin)
@@ -71,4 +110,5 @@ class AsyncTaskAdmin(admin.ModelAdmin):
admin.site.register(models.Version, VersionAdmin)
admin.site.register(models.DefinitionSource, SourceAdmin)
-admin.site.register(models.AsyncTask, AsyncTaskAdmin)
\ No newline at end of file
+admin.site.register(models.AsyncTask, AsyncTaskAdmin)
+admin.site.register(models.Import, ImportAdmin)
diff --git a/gemet/thesaurus/edit_views.py b/gemet/thesaurus/edit_views.py
index 2667eac..28d9ab6 100644
--- a/gemet/thesaurus/edit_views.py
+++ b/gemet/thesaurus/edit_views.py
@@ -15,6 +15,7 @@
from gemet.thesaurus import EDIT_URL_NAMES, FOREIGN_RELATION_TYPES
from gemet.thesaurus import PENDING, PUBLISHED, DELETED, DELETED_PENDING
from gemet.thesaurus import SOURCE_RELATION_TO_TARGET
+from gemet.thesaurus import SEARCH_FIELDS
from gemet.thesaurus import models
from gemet.thesaurus.exports import create_export_files
from gemet.thesaurus.forms import ConceptForm, PropertyForm, ForeignRelationForm
@@ -198,7 +199,8 @@ def post(self, request, langcode, id, name):
name=name,
**form.cleaned_data
)
- refresh_search_text(field.name, id, langcode, self.pending_version)
+ if field.name in SEARCH_FIELDS:
+ refresh_search_text(id, langcode, self.pending_version)
data = {"value": field.value}
return self._get_response(data, 'success', 200)
@@ -351,7 +353,8 @@ def post(self, request, langcode, id, name):
)
delete_url = reverse('delete_property', kwargs={'pk': field.pk})
- refresh_search_text(field.name, id, langcode, self.pending_version)
+ if field.name in SEARCH_FIELDS:
+ refresh_search_text(id, langcode, self.pending_version)
data = {
"value": field.value,
"id": field.id,
@@ -377,7 +380,8 @@ def post(self, request, pk):
elif field.status == PENDING:
field.delete()
- refresh_search_text(field.name, field.concept_id, field.language_id)
+ if field.name in SEARCH_FIELDS:
+ refresh_search_text(field.concept_id, field.language_id)
return self._get_response({}, 'success', 200)
@@ -464,13 +468,15 @@ def form_valid(self, form):
new_concept.save()
# create prefLabel property for the new concept
- models.Property.objects.create(status=PENDING,
- version_added=self.pending_version,
- concept=new_concept,
- language=self.language,
- name='prefLabel',
- value=form.cleaned_data['name'])
- refresh_search_text('prefLabel', new_concept.id, self.language.code)
+ models.Property.objects.create(
+ status=PENDING,
+ version_added=self.pending_version,
+ concept=new_concept,
+ language=self.language,
+ name='prefLabel',
+ value=form.cleaned_data['name']
+ )
+ refresh_search_text(new_concept.id, self.language.code)
url_name = EDIT_URL_NAMES[namespace.heading]
url = reverse(url_name, kwargs={'langcode': self.langcode,
'code': new_concept.code})
diff --git a/gemet/thesaurus/import_spreadsheet.py b/gemet/thesaurus/import_spreadsheet.py
new file mode 100644
index 0000000..7380c66
--- /dev/null
+++ b/gemet/thesaurus/import_spreadsheet.py
@@ -0,0 +1,311 @@
+from openpyxl import load_workbook
+from openpyxl.utils.exceptions import InvalidFileException
+
+from django.db import transaction
+from django.utils import timezone
+
+from gemet.thesaurus import PENDING, PUBLISHED
+from gemet.thesaurus.models import (
+ Namespace, Version, Concept, Property, PropertyType, Relation, Language
+)
+from gemet.thesaurus.utils import get_new_code, get_search_text
+
+
+class ImportError(Exception):
+ pass
+
+
+def namespace_for(property_type_name):
+ heading_for = {
+ 'broader': 'Concepts',
+ 'group': 'Groups',
+ }
+ return Namespace.objects.get(
+ heading=heading_for[property_type_name]
+ )
+
+
+def row_dicts(sheet):
+ rows = sheet.rows
+
+ mandatory_columns = {
+ "Term", "Definition", "Definition reference"
+ }
+ optional_columns = {
+ "Alt Label", "Abbreviation/Alt Label", "Synonym/Alt Label",
+ "Broader concept", "Broader URI", "Group", "Note"
+ }
+ optional_columns.add(sheet.title)
+ supported_columns = mandatory_columns.union(optional_columns)
+ column_names = [
+ c.value.strip() for c in next(rows) if c.value and c.value.strip()
+ ]
+
+ for column in mandatory_columns:
+ if column not in column_names:
+ raise ImportError(u'Column "{}" is mandatory.'.format(column))
+
+ for column in column_names:
+ if column not in supported_columns:
+ raise ImportError(u'Column "{}" is not supported.'.format(column))
+
+ for row in rows:
+ values = [(c.value or '').strip() for c in row[1:]]
+ if not ''.join(values).strip():
+ # The sheet is over, there are only empty rows now.
+ return
+ yield dict(zip(column_names, values))
+
+
+class Importer(object):
+
+ def __init__(self, import_obj):
+ self.import_obj = import_obj
+
+ @transaction.atomic
+ def import_file(self):
+ """ Imports data from file and returns string with results """
+ self.concept_ns = Namespace.objects.get(heading='Concepts')
+ self.group_ns = Namespace.objects.get(heading='Groups')
+ # Number of regular concepts before
+ num_reg_concepts_bef = Concept.objects.filter(
+ namespace=self.concept_ns
+ ).count()
+ # Number of group concepts before
+ num_groups_bef = Concept.objects.filter(namespace=self.group_ns).count()
+
+ try:
+ print("Opening file...")
+ wb = load_workbook(filename=self.import_obj.spreadsheet.path)
+ except InvalidFileException:
+ raise ImportError('The file provided is not a valid excel file.')
+ except IOError:
+ raise ImportError('The file provided does not exist.')
+
+ # Get the version with no name, used for pending concepts
+ self.version = Version.under_work()
+ # Keep a cache with a reference to all created concepts
+ self.concepts = {}
+
+ results = ""
+
+ # The 'EN' sheet must have the original English concepts
+ if 'EN' in wb.sheetnames:
+ print('Creating concepts...')
+ self._create_concepts(wb['EN'])
+
+ print('Creating relations...')
+ self._create_relations(wb['EN'])
+
+ num_reg_concepts_after = Concept.objects.filter(
+ namespace=self.concept_ns
+ ).count()
+ num_groups_after = Concept.objects.filter(
+ namespace=self.group_ns
+ ).count()
+
+ results = (
+ "Created {} regular concepts and {} group concepts."
+ ).format(
+ num_reg_concepts_after - num_reg_concepts_bef,
+ num_groups_after - num_groups_bef,
+ )
+
+ # All other sheets must have translations
+ translation_sheetnames = [sn for sn in wb.sheetnames if sn != 'EN']
+
+ if translation_sheetnames:
+ print('Creating translations...')
+ for sheetname in translation_sheetnames:
+ print(' {}...'.format(sheetname))
+ self._add_translations(wb[sheetname])
+
+ if results:
+ results += '\n\n'
+ results += (
+ "Created translations for the following {} languages: {}."
+ ).format(
+ len(translation_sheetnames),
+ ', '.join(translation_sheetnames)
+ )
+
+ return results
+
+ def _create_concepts(self, sheet):
+ for i, row in enumerate(row_dicts(sheet)):
+ label = row.get("Term") # aka prefLabel
+ if not label:
+ raise ImportError(u'Row {} has no "Term".'.format(i))
+
+ alt_labels = [row[key] for key in row.keys() if 'Alt Label' in key]
+ defin = row.get("Definition")
+ source = row.get("Definition source")
+
+ property_values = {
+ 'prefLabel': label,
+ 'definition': defin,
+ 'source': source,
+ }
+
+ if alt_labels:
+ property_values['altLabels'] = alt_labels
+
+ # A concept must always have at least an English property, so if
+ # there is no English property corresponding to that term in the
+ # DB, the concept must be new.
+ prop = Property.objects.filter(
+ name='prefLabel',
+ value__iexact=label,
+ language_id='en',
+ ).first()
+
+ if prop:
+ is_new_concept = False
+ concept = prop.concept
+ msg = u'Concept {} exists. '.format(label)
+ if prop.status in [PENDING, PUBLISHED]:
+ del property_values['prefLabel']
+ msg += 'Skipping prefLabel creation.'
+ print(msg)
+ else:
+ is_new_concept = True
+ code = get_new_code(self.concept_ns)
+
+ concept = Concept.objects.create(
+ code=code,
+ namespace=self.concept_ns,
+ version_added=self.version,
+ status=PENDING,
+ date_entered=timezone.now(),
+ )
+ print(u'Concept added: {}'.format(label))
+
+ self.concepts[label.lower()] = concept
+
+ concept.update_or_create_properties(property_values)
+
+ if is_new_concept:
+ # Create internal "searchText" property with the concatenated
+ # values from all other concept properties.
+ search_text = get_search_text(
+ concept.id, 'en', PENDING, self.version
+ )
+ if search_text:
+ search_text.save()
+
+ def _create_relations(self, sheet):
+
+ property_types = PropertyType.objects.filter(
+ name__in=['broader', 'group']
+ )
+
+ for i, row in enumerate(row_dicts(sheet)):
+
+ source_label = row.get("Term") # aka prefLabel
+
+ for property_type in property_types:
+
+ # Look for columns specifying relationships
+ if property_type.name == 'broader':
+ target_label = row.get("Broader concept")
+ elif property_type.name == 'group':
+ target_label = row.get("Group")
+
+ if not target_label:
+ # If it doesn't exist, there is no relation to be created
+ print(
+ (
+ 'Row {} has neither "broader" nor "group" columns.'
+ ).format(i)
+ )
+ continue
+
+ source = self.concepts[source_label.lower()]
+ target = self._get_concept(target_label)
+ namespace = namespace_for(property_type.name)
+
+ if not target:
+ print(
+ 'Creating inexistent concept: {}'.format(target_label)
+ )
+ code = get_new_code(self.concept_ns)
+ target = Concept.objects.create(
+ code=code,
+ namespace=namespace,
+ version_added=self.version,
+ status=PENDING,
+ date_entered=timezone.now(),
+ )
+ target.properties.create(
+ status=PENDING,
+ version_added=self.version,
+ language_id='en',
+ name='prefLabel',
+ value=target_label,
+ )
+
+ # target is broader of source
+ relation = Relation.objects.filter(
+ source=source,
+ target=target,
+ property_type=property_type,
+ ).first()
+
+ if not relation:
+ relation = Relation.objects.create(
+ source=source,
+ target=target,
+ property_type=property_type,
+ version_added=self.version,
+ status=PENDING,
+ )
+ print('Relation created: {}'.format(relation))
+
+ if not relation.reverse:
+ reverse_relation = relation.create_reverse()
+ print(
+ 'Reverse relation created: {}'.format(reverse_relation)
+ )
+
+ def _add_translations(self, sheet):
+ for row in row_dicts(sheet):
+
+ language = Language.objects.get(code=sheet.title.lower())
+
+ en_label = row.get('Term')
+ foreign_label = row.get(sheet.title)
+ definition = row.get('Definition')
+
+ if not en_label:
+ raise ImportError(u'"Term" column cannot be blank.')
+
+ property_values = {
+ 'prefLabel': foreign_label,
+ 'definition': definition,
+ }
+
+ concept = Property.objects.get(
+ name='prefLabel',
+ value__iexact=en_label,
+ language=Language.objects.get(code='en'),
+ ).concept
+
+ concept.update_or_create_properties(
+ property_values, language_id=language.code
+ )
+
+ def _get_concept(self, label):
+ concept = self.concepts.get(label.lower())
+
+ if not concept:
+ try:
+ concept = Property.objects.get(
+ name='prefLabel',
+ value__iexact=label,
+ language_id='en',
+ concept__namespace=self.concept_ns,
+ ).concept
+ except Property.DoesNotExist:
+ concept = None
+
+ return concept
diff --git a/gemet/thesaurus/management/commands/check_spreadsheet.py b/gemet/thesaurus/management/commands/check_spreadsheet.py
deleted file mode 100644
index 89b42f1..0000000
--- a/gemet/thesaurus/management/commands/check_spreadsheet.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from openpyxl import load_workbook
-from openpyxl.utils.exceptions import InvalidFileException
-
-from django.core.management import CommandError
-from django.core.management.base import BaseCommand
-
-from gemet.thesaurus.models import Property
-from gemet.thesaurus.utils import split_text_into_terms
-
-
-LABELS = {
- 'RT (GEMET)': True,
- 'RT (new)': False,
- 'BT (GEMET)': True,
- 'BT (new)': False,
- 'NT (GEMET)': True,
- 'NT (new)': False,
-}
-
-
-class Command(BaseCommand):
- help = 'Check if spreadsheet terms are consistent.'
-
- def add_arguments(self, parser):
- parser.add_argument('excel_file')
-
- def check_term_existence(self, term, term_type, excel_cell_value,
- new_terms):
- message = ' at cell {}: "{}"'.format(excel_cell_value, term)
- term_in_database = Property.objects.filter(value=term).exists()
- if term_type:
- if not term_in_database:
- message += ' not found'
- message += ' in database, but found in spreadsheet. [WARNING]' \
- if term in new_terms else '. [ERROR]'
- self.stdout.write(message)
- else:
- if term not in new_terms:
- message += ' not found'
- message += ' in spreadsheet, but found in database. [WARNING]' \
- if term_in_database else '. [ERROR]'
- self.stdout.write(message)
-
- def handle(self, *args, **options):
- try:
- wb = load_workbook(filename=options['excel_file'])
- except InvalidFileException:
- raise CommandError('The file provided is not a valid excel file.')
- except IOError:
- raise CommandError('The file provided does not exist.')
-
- sheet = wb.active
- new_terms = [x.value.strip().lower() for x, in
- sheet.iter_rows(min_col=1, max_col=1, min_row=2)
- if x.value is not None]
-
- for label_cell, in sheet.iter_cols(min_row=1, max_row=1, min_col=1):
- if label_cell.value not in LABELS:
- continue
-
- term_type = LABELS.get(label_cell.value)
- for cell, in sheet.iter_rows(min_col=label_cell.col_idx,
- max_col=label_cell.col_idx, min_row=2):
- if not cell.value:
- continue
- correct_terms = split_text_into_terms(cell.value)
- for term in correct_terms:
- self.check_term_existence(term, term_type,
- cell.coordinate, new_terms)
diff --git a/gemet/thesaurus/management/commands/import.py b/gemet/thesaurus/management/commands/import.py
deleted file mode 100644
index 40288bf..0000000
--- a/gemet/thesaurus/management/commands/import.py
+++ /dev/null
@@ -1,162 +0,0 @@
-from django.core.management.base import BaseCommand
-from django.db import connections
-from gemet.thesaurus import PUBLISHED
-
-from gemet.thesaurus.models import (
- Concept,
- Namespace,
- Property,
- Language,
- PropertyType,
- Relation,
- ForeignRelation,
- DefinitionSource,
- Version,
-)
-
-
-def dictfetchall(cursor, query_str):
- """Returns all rows from a cursor as a dict"""
-
- cursor.execute(query_str)
- column_names = [col[0] for col in cursor.description]
- return [dict(zip(column_names, row)) for row in cursor.fetchall()]
-
-
-class Command(BaseCommand):
- help = 'Import a set of terms into the database'
-
- def handle(self, *args, **options):
- ns_ids = Namespace.objects.values_list('id', flat=True)
- ns_str = ', '.join([str(id) for id in ns_ids])
-
- langcodes = (
- list(Language.objects.values_list('code', flat=True)) + ['zh'])
- langcodes_str = ', '.join(["'{0}'".format(code) for code in langcodes])
-
- cursor = connections['import'].cursor()
- version_id = Version.objects.get(is_current=True).id
-
- query_str = (
- "SELECT ns AS namespace_id, "
- "id_concept AS code, "
- "datent AS date_entered, "
- "datchg as date_changed "
- "FROM concept "
- "WHERE ns IN ({0})".format(ns_str)
- )
- rows = dictfetchall(cursor, query_str)
- for row in rows:
- row['version_added_id'] = version_id
- row['status'] = PUBLISHED
- self.import_rows(rows, Concept)
- self.warn_ignored_rows(cursor, 'concept', len(rows))
- self.stdout.write('\n')
-
- query_str = (
- "SELECT concat(ns, id_concept) AS concept_id, "
- "langcode AS language_id, "
- "name, "
- "value, "
- "is_resource "
- "FROM property "
- "WHERE ns IN ({0}) "
- "AND langcode IN ({1})"
- .format(ns_str, langcodes_str)
- )
- rows = dictfetchall(cursor, query_str)
-
- concept_ids = {'{0}{1}'.format(c.namespace.id, c.code): c.id
- for c in Concept.objects.all()}
- for row in rows:
- row['concept_id'] = concept_ids[row['concept_id']]
- row['version_added_id'] = version_id
- row['status'] = PUBLISHED
- row['is_resource'] = row['is_resource'] or 0
- if row['language_id'] == 'zh':
- row['language_id'] = 'zh-CN'
-
- self.import_rows(rows, Property)
- self.warn_ignored_rows(cursor, 'property', len(rows))
- self.stdout.write('\n')
-
- query_str = (
- "SELECT concat(source_ns, id_concept) AS source_id, "
- "concat(target_ns, id_relation) AS target_id, "
- "id_type AS property_type_id "
- "FROM relation "
- "WHERE source_ns IN ({0}) "
- "AND target_ns IN ({0}) "
- .format(ns_str)
- )
- rows = dictfetchall(cursor, query_str)
-
- property_ids = {p.name: p.id for p in PropertyType.objects.all()}
-
- def update_values(row):
- try:
- row['source_id'] = concept_ids[row['source_id']]
- row['target_id'] = concept_ids[row['target_id']]
- row['property_type_id'] = property_ids[row['property_type_id']]
- row['version_added_id'] = version_id
- row['status'] = PUBLISHED
- except KeyError:
- return False
- return True
-
- rows = filter(update_values, rows)
-
- self.import_rows(rows, Relation)
- self.warn_ignored_rows(cursor, 'relation', len(rows))
- self.stdout.write('\n')
-
- query_str = (
- "SELECT concat(source_ns, id_concept) AS concept_id, "
- "relation_uri AS uri, "
- "id_type AS property_type_id, "
- "label, "
- "show_in_html "
- "FROM foreign_relation "
- "WHERE source_ns IN ({0}) "
- .format(ns_str)
- )
- rows = dictfetchall(cursor, query_str)
-
- for row in rows:
- row['concept_id'] = concept_ids[row['concept_id']]
- row['property_type_id'] = property_ids[row['property_type_id']]
- row['version_added_id'] = version_id
- row['status'] = PUBLISHED
-
- self.import_rows(rows, ForeignRelation)
- self.warn_ignored_rows(cursor, 'foreign_relation', len(rows))
- self.stdout.write('\n')
-
- query_str = "SELECT * FROM definition_sources;"
- rows = dictfetchall(cursor, query_str)
-
- self.import_rows(rows, DefinitionSource)
- self.stdout.write('\n')
-
- def import_rows(self, rows, model_cls):
- if rows:
- table_name = model_cls._meta.db_table
-
- self.stdout.write('Truncating `{0}` ...'.format(table_name))
- model_cls.objects.all().delete()
-
- cursor = connections['default'].cursor()
- reset_index = "ALTER TABLE {0} AUTO_INCREMENT=1".format(table_name)
- cursor.execute(reset_index)
-
- self.stdout.write('Inserting {0} new rows ...'.format(len(rows)))
- new_rows = [model_cls(**row) for row in rows]
- model_cls.objects.bulk_create(new_rows, batch_size=10000)
- else:
- self.stderr.write('0 rows found in the import table. Aborting ...')
-
- def warn_ignored_rows(self, cursor, table_name, inserted_rows_cnt):
- cursor.execute("SELECT count(*) FROM {0}".format(table_name))
- total_rows_cnt = cursor.fetchone()[0]
- ignored_rows_cnt = total_rows_cnt - inserted_rows_cnt
- self.stdout.write('{0} rows ignored.'.format(ignored_rows_cnt))
diff --git a/gemet/thesaurus/management/commands/import_spreadsheet.py b/gemet/thesaurus/management/commands/import_spreadsheet.py
deleted file mode 100644
index 7c83894..0000000
--- a/gemet/thesaurus/management/commands/import_spreadsheet.py
+++ /dev/null
@@ -1,229 +0,0 @@
-from openpyxl import load_workbook
-from openpyxl.utils.exceptions import InvalidFileException
-
-from django.core.management import CommandError
-from django.core.management.base import BaseCommand
-
-from gemet.thesaurus import DELETED_PENDING, PENDING, PUBLISHED
-from gemet.thesaurus import models
-from gemet.thesaurus.utils import get_new_code, get_search_text
-from gemet.thesaurus.utils import split_text_into_terms
-
-NAMESPACE = 'Concepts'
-LANGCODE = 'en'
-
-
-class Command(BaseCommand):
- help = 'Import new concepts from Excel spreadsheet'
-
- def add_arguments(self, parser):
- parser.add_argument('excel_file')
-
- def handle(self, *args, **options):
- try:
- wb = load_workbook(filename=options['excel_file'])
- except InvalidFileException:
- raise CommandError('The file provided is not a valid excel file.')
- except IOError:
- raise CommandError('The file provided does not exist.')
-
- sheet = wb.active
- self.language = models.Language.objects.get(code=LANGCODE)
- self.version = models.Version.under_work()
- self.namespace = models.Namespace.objects.get(heading=NAMESPACE)
- self.concepts = {}
-
- self.stdout.write('Creating concepts...')
- self._create_concepts(sheet)
- self.stdout.write('Creating relations...')
- self._create_relations(sheet)
-
- def _create_concepts(self, sheet):
- for row in sheet.iter_rows(max_col=3, min_row=2):
- label, defin, source = [(cell.value or '').strip() for cell in row]
-
- if not label:
- continue
- properties = {
- 'prefLabel': label,
- 'definition': defin,
- 'source': source,
- }
- is_new_concept = False
-
- property = models.Property.objects.filter(
- name='prefLabel',
- value__iexact=label,
- language=self.language,
- ).first()
-
- if property:
- concept = property.concept
- msg = u'Concept {} exists. '.format(label)
- if property.status in [PENDING, PUBLISHED]:
- del properties['prefLabel']
- msg += 'Skipping prefLabel creation.'
- self.stdout.write(msg)
- else:
- is_new_concept = True
- code = get_new_code(self.namespace)
-
- concept = models.Concept.objects.create(
- code=code,
- namespace=self.namespace,
- version_added=self.version,
- status=PENDING,
- )
- self.stdout.write(u'Concept added: {}'.format(label))
-
- self.concepts[label.lower()] = concept
- for name, value in properties.iteritems():
- current_property = models.Property.objects.filter(
- concept=concept,
- language=self.language,
- name=name,
- status__in=[PENDING, PUBLISHED]
- ).first()
- if current_property:
- if current_property.status == PENDING:
- current_property.value = value
- current_property.save()
- else:
- current_property.status = DELETED_PENDING
- current_property.save()
- if not (current_property and
- current_property.status == PENDING):
- models.Property.objects.create(
- status=PENDING,
- version_added=self.version,
- concept=concept,
- language=self.language,
- name=name,
- value=value,
- )
- if is_new_concept:
- search_text = get_search_text(
- concept.id, self.language.code, PENDING, self.version)
- if search_text:
- search_text.save()
-
- def _create_theme_group_relations(self, source):
- property_types = models.PropertyType.objects.filter(
- name__in=['theme', 'group']
- )
- property_type_broader = models.PropertyType.objects.get(name='broader')
- for property_type in property_types:
- relation = source.source_relations.filter(
- property_type=property_type).exists()
- if relation:
- self.stdout.write(
- 'Skipping {0} relation creation for concept {1}'
- .format(property_type, source))
- continue
- broader_relations = models.Relation.objects.filter(
- property_type=property_type,
- source__target_relations__source=source,
- source__target_relations__property_type=property_type_broader
- )
- if not broader_relations:
- self.stdout.write(
- 'Skipping {0} relation creation for concept {1}. '
- 'No broader.'.format(property_type, source))
- continue
- for relation in broader_relations:
- new_relation = models.Relation.objects.create(
- source=source,
- target=relation.target,
- property_type=property_type,
- version_added=self.version,
- status=PENDING)
- new_relation.create_reverse()
- self.stdout.write('For concept {0} relation: {1} was added.'
- .format(source, new_relation))
-
- def _create_relations(self, sheet):
- def get_terms(row, idx1, idx2):
- text = (row[idx1].value or '') + ';' + (row[idx2].value or '')
- return split_text_into_terms(text)
-
- related = models.PropertyType.objects.get(name='related')
- broader = models.PropertyType.objects.get(name='broader')
- narrower = models.PropertyType.objects.get(name='narrower')
-
- for row in sheet.iter_rows(min_row=2):
- label = (row[0].value or '').strip()
-
- if not label:
- continue
-
- relations = {
- related: get_terms(row, 3, 4),
- broader: get_terms(row, 5, 6),
- narrower: get_terms(row, 7, 8),
- }
-
- source = self.concepts[label.lower()]
-
- for property_type, terms in relations.iteritems():
- for term in terms:
- target = self._get_concept(term)
- if not target:
- code = get_new_code(self.namespace)
- target = models.Concept.objects.create(
- code=code,
- namespace=self.namespace,
- version_added=self.version,
- status=PENDING,
- )
- models.Property.objects.create(
- status=PENDING,
- version_added=self.version,
- concept=target,
- language=self.language,
- name='prefLabel',
- value=term,
- )
- self.stdout.write('Inexistent concept: {}'.format(term))
-
- relation = models.Relation.objects.filter(
- source=source,
- target=target,
- property_type=property_type,
- ).first()
-
- if not relation:
- relation = models.Relation.objects.create(
- source=source,
- target=target,
- property_type=property_type,
- version_added=self.version,
- status=PENDING,
- )
- self.stdout.write('Relation created: {}'
- .format(relation))
-
- if not relation.reverse:
- reverse_relation = relation.create_reverse()
- self.stdout.write('Reverse relation created: {}'
- .format(reverse_relation))
- self._create_theme_group_relations(source)
-
- def _get_concept(self, label):
- concept = self.concepts.get(label.lower())
-
- if not concept:
- try:
- concept = (
- models.Property.objects
- .get(
- name='prefLabel',
- value__iexact=label,
- language=self.language,
- concept__namespace=self.namespace,
- )
- .concept
- )
- except models.Property.DoesNotExist:
- concept = None
-
- return concept
diff --git a/gemet/thesaurus/management/commands/import_translation.py b/gemet/thesaurus/management/commands/import_translation.py
deleted file mode 100644
index 0772901..0000000
--- a/gemet/thesaurus/management/commands/import_translation.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from openpyxl import load_workbook
-from openpyxl.utils.exceptions import InvalidFileException
-
-from django.core.management import CommandError
-from django.core.management.base import BaseCommand
-
-from gemet.thesaurus import DELETED_PENDING, PENDING, PUBLISHED
-from gemet.thesaurus import models
-from gemet.thesaurus.utils import get_new_code, get_search_text, refresh_search_text
-from gemet.thesaurus.utils import split_text_into_terms
-
-
-class Command(BaseCommand):
- help = (
- "Import translations for existing concepts from Excel "
- "(column A=English name, B=translated name, C=translated definition)"
- )
-
- def add_arguments(self, parser):
- parser.add_argument('excel_file')
- parser.add_argument('language_code')
-
- def handle(self, *args, **options):
- try:
- wb = load_workbook(filename=options['excel_file'])
- except InvalidFileException:
- raise CommandError('The file provided is not a valid excel file.')
- except IOError:
- raise CommandError('The file provided does not exist.')
-
- sheet = wb.active
- langcode = options['language_code']
- try:
- self.language = models.Language.objects.get(code=langcode)
- except models.Language.DoesNotExist:
- raise CommandError('Language "{}" not found.'.format(langcode))
-
- self.version = models.Version.under_work()
- self.namespace = models.Namespace.objects.get(heading='Concepts')
- self.concepts = {}
-
- self.stdout.write('Adding translations...')
- self._add_translations(sheet)
-
- def _add_translations(self, sheet):
- for row in sheet.iter_rows(max_col=3, min_row=2):
- en_label, label, definition = [(cell.value or '').strip() for cell in row]
-
- if not en_label:
- # Skip empty rows
- continue
- properties = {
- 'prefLabel': label,
- 'definition': definition,
- }
-
- property = models.Property.objects.filter(
- name='prefLabel',
- value__iexact=en_label,
- language=models.Language.objects.get(code='en'),
- ).first()
- if not property:
- # Concept not found
- self.stderr.write(u'Skipping concept not found: {}'.format(en_label))
- continue
- concept = property.concept
- msg = u'Concept {} exists. '.format(en_label)
-
- for name, value in properties.iteritems():
- current_property = models.Property.objects.filter(
- concept=concept,
- language=self.language,
- name=name,
- status__in=[PENDING, PUBLISHED]
- ).first()
- if current_property:
- if current_property.status == PENDING:
- current_property.value = value
- current_property.save()
- else:
- current_property.status = DELETED_PENDING
- current_property.save()
- if not (current_property and
- current_property.status == PENDING):
- models.Property.objects.create(
- status=PENDING,
- version_added=self.version,
- concept=concept,
- language=self.language,
- name=name,
- value=value,
- )
- refresh_search_text('prefLabel', concept.id, self.language.code)
-
diff --git a/gemet/thesaurus/migrations/0012_import.py b/gemet/thesaurus/migrations/0012_import.py
new file mode 100644
index 0000000..5d1283e
--- /dev/null
+++ b/gemet/thesaurus/migrations/0012_import.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.10.6 on 2021-01-06 12:42
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('thesaurus', '0011_auto_20170628_1931'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Import',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('created_at', models.DateTimeField(auto_now_add=True)),
+ ('updated_at', models.DateTimeField(auto_now=True)),
+ ('spreadsheet', models.FileField(help_text=b'Details about the supported file format can be found here.', upload_to=b'imports/')),
+ ('started_at', models.DateTimeField(null=True)),
+ ('failed_at', models.DateTimeField(null=True)),
+ ('succeeded_at', models.DateTimeField(null=True)),
+ ('logs', models.TextField(blank=True)),
+ ],
+ options={
+ 'abstract': False,
+ },
+ ),
+ ]
diff --git a/gemet/thesaurus/models.py b/gemet/thesaurus/models.py
index 4e142ed..7a0d459 100644
--- a/gemet/thesaurus/models.py
+++ b/gemet/thesaurus/models.py
@@ -3,9 +3,11 @@
from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
+from django.utils.html import mark_safe
from gemet.thesaurus import PENDING, PUBLISHED, DELETED, DELETED_PENDING
from gemet.thesaurus import NS_VIEW_MAPPING, RELATION_PAIRS
+from gemet.thesaurus import SEARCH_FIELDS, SEARCH_SEPARATOR
class Version(models.Model):
@@ -72,6 +74,7 @@ def __unicode__(self):
class Concept(VersionableModel):
namespace = models.ForeignKey(Namespace)
code = models.CharField(max_length=10)
+ # TODO: Rename to created_at/updated_at and use auto_now and auto_now_add
date_entered = models.DateTimeField(blank=True, null=True)
date_changed = models.DateTimeField(blank=True, null=True)
@@ -96,8 +99,108 @@ def visible_foreign_relations(self):
@property
def name(self):
+ """ Relies on data being set properly on set_attributes """
return getattr(self, 'prefLabel', '')
+ @property
+ def label(self):
+ """ Calculates and return prefLabel value of the Concept in English """
+ return self.properties.filter(
+ language='en', name='prefLabel', status__in=[0, 1]
+ ).first().value
+
+ def update_or_create_properties(
+ self, property_values, language_id='en', version=None
+ ):
+ version = version or Version.under_work()
+
+ # Soft delete matching published properties
+ self.properties.filter(
+ name__in=property_values.keys(),
+ language_id=language_id,
+ status=PUBLISHED
+ ).update(status=DELETED_PENDING)
+
+ # For each property
+ for name, value in property_values.iteritems():
+ if name == 'altLabel':
+ # altLabel key maps to multiple values
+ assert isinstance(value, list)
+ # Delete existing
+ self.properties.filter(
+ name='altLabel',
+ language_id=language_id,
+ status=PENDING
+ ).delete()
+ for alt_label in value:
+ # And create new ones
+ self.properties.create(
+ status=PENDING,
+ version_added=version,
+ language_id=language_id,
+ name=name,
+ value=alt_label,
+ )
+ elif value:
+ # Update pending if exists
+ matches = self.properties.filter(
+ language_id=language_id,
+ name=name,
+ status=PENDING
+ ).update(value=value)
+ if not matches:
+ # If it doesn't exist, create it
+ self.properties.create(
+ status=PENDING,
+ version_added=version,
+ language_id=language_id,
+ name=name,
+ value=value,
+ )
+ self.update_or_create_search_text(language_id, version)
+
+ def update_or_create_search_text(self, language_code, version=None):
+ """
+ Update or create Property of type searchText, an internal type of
+ property consisting of the concatenated values of all searchable
+ properties of a concept.
+ """
+ version = version or Version.under_work()
+
+ # Get values from searchable properties
+ search_prop_values = self.properties.filter(
+ language_id=language_code,
+ name__in=SEARCH_FIELDS,
+ status__in=[PUBLISHED, PENDING],
+ ).values_list('value', flat=True)
+
+ # Concatenate them using internal format
+ if search_prop_values:
+ search_text = SEARCH_SEPARATOR.join(search_prop_values)
+ search_text = SEARCH_SEPARATOR + search_text + SEARCH_SEPARATOR
+ else:
+ search_text = ''
+
+ # Look for existing searchText Property object
+ search_text_property = self.properties.filter(
+ language_id=language_code,
+ name='searchText',
+ status__in=[PUBLISHED, PENDING],
+ ).first()
+
+ if search_text_property:
+ # If it exists, update it with the new calculated value
+ search_text_property.value = search_text
+ search_text_property.save()
+ else:
+ # If not, create it
+ search_text_property = self.properties.create(
+ language_id=language_code,
+ name='searchText',
+ status=PENDING,
+ version_added=version,
+ )
+
def get_attributes(self, langcode, property_list):
values = ['id', 'name', 'value']
values.extend(self.extra_values)
@@ -363,6 +466,62 @@ class AsyncTask(models.Model):
)
+class TimeTrackedModel(models.Model):
+ class Meta:
+ abstract = True
+
+ created_at = models.DateTimeField(auto_now_add=True)
+ updated_at = models.DateTimeField(auto_now=True)
+
+
+class Import(TimeTrackedModel):
+ """
+ Keeps track of data imports via excel files.
+ """
+ spreadsheet = models.FileField(
+ upload_to='imports/',
+ help_text=mark_safe(
+ 'Details about the supported file format can be found here.'
+ )
+ )
+ started_at = models.DateTimeField(null=True)
+ failed_at = models.DateTimeField(null=True)
+ succeeded_at = models.DateTimeField(null=True)
+ logs = models.TextField(blank=True)
+
+ @property
+ def status(self):
+ if not self.started_at:
+ return u'Unstarted'
+ elif self.succeeded_at:
+ return u'Succeeded'
+ elif self.failed_at:
+ return u'Failed'
+ else:
+ return u'In progress'
+
+ def run(self):
+ self.logs = ''
+ self.failed_at = None
+ self.succeeded_at = None
+ self.started_at = timezone.now()
+ self.save()
+
+ try:
+ from gemet.thesaurus.import_spreadsheet import Importer
+ importer = Importer(self)
+ self.logs = importer.import_file()
+ self.succeeded_at = timezone.now()
+ self.save()
+ except Exception as exc:
+ self.logs = str(exc)
+ self.failed_at = timezone.now()
+ self.save()
+ raise
+
+
class ConceptManager(models.Manager):
def __init__(self, namespace):
diff --git a/gemet/thesaurus/static/thesaurus/js/start_import.js b/gemet/thesaurus/static/thesaurus/js/start_import.js
new file mode 100644
index 0000000..8b1b1b9
--- /dev/null
+++ b/gemet/thesaurus/static/thesaurus/js/start_import.js
@@ -0,0 +1,8 @@
+$ = django.jQuery;
+$(document).ready(function () {
+ $('input.start-import').click(function () {
+ url = window.location.origin + "/import/" + $(this)[0].id + "/start/";
+ $.ajax({ url: url });
+ location.reload();
+ });
+});
diff --git a/gemet/thesaurus/templatetags/gemet_tags.py b/gemet/thesaurus/templatetags/gemet_tags.py
index 4c59ff5..6897bdb 100644
--- a/gemet/thesaurus/templatetags/gemet_tags.py
+++ b/gemet/thesaurus/templatetags/gemet_tags.py
@@ -63,17 +63,14 @@ def get_concept_names(concept, status_values, langcode):
def get_concept_name(concept_name, concept_id, status_values):
if concept_name:
return concept_name
- concept = (
- Concept.objects.get(pk=concept_id)
- .properties.filter(
- language__code=DEFAULT_LANGCODE,
- name='prefLabel',
- status__in=status_values,
- ).first()
- )
- if concept:
+ prop = Concept.objects.get(pk=concept_id).properties.filter(
+ language__code=DEFAULT_LANGCODE,
+ name='prefLabel',
+ status__in=status_values,
+ ).first()
+ if prop:
language = Language.objects.get(code=DEFAULT_LANGCODE).name.lower()
- return mark_safe(concept.value + ' [' + language + ']')
+ return mark_safe(prop.value + ' [' + language + ']')
return ''
diff --git a/gemet/thesaurus/tests/files/concepts.xlsx b/gemet/thesaurus/tests/files/concepts.xlsx
new file mode 100644
index 0000000..55424e1
Binary files /dev/null and b/gemet/thesaurus/tests/files/concepts.xlsx differ
diff --git a/gemet/thesaurus/tests/files/only_en.xlsx b/gemet/thesaurus/tests/files/only_en.xlsx
new file mode 100644
index 0000000..91c6ef8
Binary files /dev/null and b/gemet/thesaurus/tests/files/only_en.xlsx differ
diff --git a/gemet/thesaurus/tests/files/only_translations.xlsx b/gemet/thesaurus/tests/files/only_translations.xlsx
new file mode 100644
index 0000000..5214c6e
Binary files /dev/null and b/gemet/thesaurus/tests/files/only_translations.xlsx differ
diff --git a/gemet/thesaurus/tests/test_edit_view.py b/gemet/thesaurus/tests/test_edit_view.py
index 20ec01e..5c0868b 100644
--- a/gemet/thesaurus/tests/test_edit_view.py
+++ b/gemet/thesaurus/tests/test_edit_view.py
@@ -398,16 +398,21 @@ def test_get_sets_form(self):
def test_post_correct_form(self):
url = reverse('concept_add', kwargs={'langcode': self.language.code})
- response = self.app.post(url, user=self.user,
- params={'name': 'test',
- 'namespace': self.namespace.id})
+ response = self.app.post(
+ url,
+ user=self.user,
+ params={'name': 'test', 'namespace': self.namespace.id}
+ )
self.assertEqual(302, response.status_code)
self.assertEqual(2, len(models.Concept.objects.all()))
self.assertEqual('201', models.Concept.objects.last().code)
- self.assertEqual('test',
- models.Property.objects.get(name='prefLabel',
- concept__code='201').value)
+ self.assertEqual(
+ 'test',
+ models.Property.objects.get(
+ name='prefLabel', concept__code='201'
+ ).value
+ )
def test_search_text_is_created(self):
url = reverse('concept_add', kwargs={'langcode': self.language.code})
diff --git a/gemet/thesaurus/tests/test_import.py b/gemet/thesaurus/tests/test_import.py
new file mode 100644
index 0000000..548a2bb
--- /dev/null
+++ b/gemet/thesaurus/tests/test_import.py
@@ -0,0 +1,70 @@
+from django.core.files import File
+from django.test import TestCase, Client
+
+from .factories import VersionFactory
+from gemet.thesaurus.models import Concept, Import, Property
+
+
+class ConceptImportView(TestCase):
+
+ # Create namespaces, languages, property types, and users
+ fixtures = ['data.json']
+
+ def setUp(self):
+ VersionFactory(identifier='')
+ self.client = Client()
+
+ def test_import_concepts_and_translations_together(self):
+ import_obj = Import.objects.create(
+ spreadsheet=File(open('gemet/thesaurus/tests/files/concepts.xlsx'))
+ )
+ url = '/import/{}/start/'.format(import_obj.pk)
+ response = self.client.get(url)
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(
+ Concept.objects.filter(
+ status=0, namespace__heading='Concepts'
+ ).count(),
+ 79
+ )
+ self.assertEqual(
+ Concept.objects.filter(
+ status=0, namespace__heading='Groups'
+ ).count(),
+ 2
+ )
+
+ def test_import_concepts_and_translations_separately(self):
+ # Import concepts in English first
+ import_obj = Import.objects.create(
+ spreadsheet=File(open('gemet/thesaurus/tests/files/only_en.xlsx'))
+ )
+ url = '/import/{}/start/'.format(import_obj.pk)
+ response = self.client.get(url)
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(
+ Concept.objects.filter(
+ status=0, namespace__heading='Concepts'
+ ).count(),
+ 79
+ )
+ self.assertEqual(
+ Concept.objects.filter(
+ status=0, namespace__heading='Groups'
+ ).count(),
+ 2
+ )
+ num_properties_before = Property.objects.count()
+ # Import a separate spreadsheet only with translations
+ import_obj = Import.objects.create(
+ spreadsheet=File(
+ open('gemet/thesaurus/tests/files/only_translations.xlsx')
+ )
+ )
+ url = '/import/{}/start/'.format(import_obj.pk)
+ response = self.client.get(url)
+ self.assertEqual(response.status_code, 200)
+ # The number of concepts is still the same
+ self.assertEqual(Concept.objects.filter(status=0).count(), 81)
+ # New properties were created with translations
+ self.assertGreater(Property.objects.count(), num_properties_before)
diff --git a/gemet/thesaurus/urls.py b/gemet/thesaurus/urls.py
index 79b87b8..2d0dc15 100644
--- a/gemet/thesaurus/urls.py
+++ b/gemet/thesaurus/urls.py
@@ -162,8 +162,16 @@
url(r'^(?P\w+)/(?P\d+)$',
views.concept_redirect,
name='concept_redirect'),
+ url(
+ r'^import/(?P\d+)/start/$',
+ views.start_import,
+ name='start_import'
+ ),
url(r'^auth/login/$', auth_views.LoginView.as_view(), name='login'),
url(r'^auth/logout/$', auth_views.LogoutView.as_view(), name='logout'),
+
+ # Custom Admin pages
+ # url(r'^admin/import/$', views.AdminImportView.as_view(), name='import'),
]
if settings.DEBUG:
diff --git a/gemet/thesaurus/utils.py b/gemet/thesaurus/utils.py
index 988173b..d7d9597 100644
--- a/gemet/thesaurus/utils.py
+++ b/gemet/thesaurus/utils.py
@@ -7,6 +7,9 @@
from django_q.brokers import get_broker
from django_q.status import Stat
+from django.db import models
+from django.db.models.functions import Cast
+
from gemet.thesaurus import PENDING, PUBLISHED, DELETED_PENDING
from gemet.thesaurus import SEARCH_FIELDS, SEARCH_SEPARATOR
from gemet.thesaurus import EXACT_QUERY, END_WITH_QUERY, BEGIN_WITH_QUERY
@@ -151,14 +154,17 @@ def get_form_errors(errors):
def get_new_code(namespace):
- codes = (
- Concept.objects
- .filter(namespace=namespace)
- .exclude(code='')
- .values_list('code', flat=True)
+ # We cannot use an autoincrement integer field for `code` because some
+ # existing production values (i.e. for Inspire Themes) are not integers.
+ return unicode(
+ (
+ Concept.objects.filter(namespace=namespace).annotate(
+ int_code=Cast('code', models.IntegerField())
+ ).order_by('-int_code').values_list(
+ 'int_code', flat=True
+ ).first() or 0
+ ) + 1
)
- new_code = max(map(int, codes)) + 1
- return unicode(new_code)
def split_text_into_terms(raw_text):
@@ -191,16 +197,12 @@ def concept_has_unique_relation(concept, relation_type):
def get_search_text(concept_id, language_code, status, version):
- search_properties = (
- Property.objects
- .filter(
- concept_id=concept_id,
- language_id=language_code,
- name__in=SEARCH_FIELDS,
- status__in=[PUBLISHED, PENDING],
- )
- .values_list('value', flat=True)
- )
+ search_properties = Property.objects.filter(
+ concept_id=concept_id,
+ language_id=language_code,
+ name__in=SEARCH_FIELDS,
+ status__in=[PUBLISHED, PENDING],
+ ).values_list('value', flat=True)
if not search_properties:
return
@@ -213,31 +215,23 @@ def get_search_text(concept_id, language_code, status, version):
language_id=language_code,
name='searchText',
value=search_text,
- is_resource=0,
status=status,
- version_added_id=version.id
+ version_added=version,
)
-def refresh_search_text(proptype, concept_id, language_code, version=None):
- if proptype not in SEARCH_FIELDS:
- return
-
+def refresh_search_text(concept_id, language_code, version=None):
version = version or Version.under_work()
new_search = get_search_text(concept_id, language_code, PENDING, version)
if not new_search:
return
- search_property = (
- Property.objects
- .filter(
- concept_id=concept_id,
- language_id=language_code,
- name='searchText',
- status__in=[PUBLISHED, PENDING],
- )
- .first()
- )
+ search_property = Property.objects.filter(
+ concept_id=concept_id,
+ language_id=language_code,
+ name='searchText',
+ status__in=[PUBLISHED, PENDING],
+ ).first()
if not search_property:
pass
elif search_property.status == PENDING:
diff --git a/gemet/thesaurus/views.py b/gemet/thesaurus/views.py
index 9675221..2436074 100644
--- a/gemet/thesaurus/views.py
+++ b/gemet/thesaurus/views.py
@@ -5,7 +5,7 @@
from urllib import urlencode
from xmlrpclib import Fault
-from django.http import Http404, StreamingHttpResponse
+from django.http import Http404, HttpResponse, StreamingHttpResponse
from django.shortcuts import render, get_object_or_404, redirect
from django.core.exceptions import ObjectDoesNotExist
from django.core.paginator import Paginator
@@ -18,9 +18,10 @@
from django.conf import settings
from django_q.tasks import result
-from gemet.thesaurus.models import Concept, DefinitionSource, Group, Language
-from gemet.thesaurus.models import InspireTheme, Namespace, Property, SuperGroup
-from gemet.thesaurus.models import Term, Theme, Version, AsyncTask
+from gemet.thesaurus.models import (
+ Concept, DefinitionSource, Group, Language, InspireTheme, Namespace,
+ Property, SuperGroup, Term, Theme, Version, AsyncTask, Import
+)
from gemet.thesaurus.collation_charts import unicode_character_map
from gemet.thesaurus.forms import SearchForm, ExportForm
from gemet.thesaurus.utils import search_queryset, exp_decrypt, is_rdf
@@ -821,3 +822,12 @@ def error500(request):
template = '500.html'
status_code = 500
return render(request, template, context, status=status_code)
+
+
+def start_import(request, import_id):
+ try:
+ data_import = Import.objects.get(pk=import_id)
+ except Import.DoesNotExist:
+ raise Http404("Import object does not exist")
+ data_import.run()
+ return HttpResponse("")
diff --git a/gemet/urls.py b/gemet/urls.py
index 62673aa..2e2fddf 100644
--- a/gemet/urls.py
+++ b/gemet/urls.py
@@ -1,4 +1,6 @@
+from django.conf import settings
from django.conf.urls import include, url
+from django.conf.urls.static import static
from django.contrib import admin
admin.autodiscover()
@@ -7,7 +9,7 @@
urlpatterns = [
url(r'', include('gemet.thesaurus.urls')),
url(r'^admin/', include(admin.site.urls)),
-]
+] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
handler404 = 'gemet.thesaurus.views.error404'
handler500 = 'gemet.thesaurus.views.error500'
diff --git a/requirements.txt b/requirements.txt
index 9415bd8..57f43d4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ argparse==1.2.1
django-auth-ldap==1.2.10
django-q==0.8.0
docutils==0.11
-openpyxl==2.4.1
+openpyxl==2.6.4
redis==2.10.5
requests==2.3.0
wsgiref==0.1.2