Skip to content

Commit

Permalink
Merge pull request #84 from eea/2020-import
Browse files Browse the repository at this point in the history
2020 import
  • Loading branch information
dianaboiangiu committed Jan 14, 2021
2 parents 1b320a7 + 80b6f1d commit 6dc7750
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 90 deletions.
155 changes: 80 additions & 75 deletions gemet/thesaurus/import_spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ def import_file(self):
print('Creating concepts...')
self._create_concepts(wb['EN'])

# Fetch the property types needed to create relations
self.property_types = PropertyType.objects.filter(
name__in=['broader', 'group', 'theme']
)

print('Creating relations...')
self._create_relations(wb['EN'])

Expand Down Expand Up @@ -176,89 +181,89 @@ def _create_concepts(self, sheet):

def _create_relations(self, sheet):

property_types = PropertyType.objects.filter(
name__in=['broader', 'group', 'theme']
)

for i, row in enumerate(row_dicts(sheet)):

source_label = row.get("Term") # aka prefLabel
source = self.concepts[source_label.lower()]

for property_type in property_types:

# Look for columns specifying relationships
if property_type.name == 'broader':
target_labels = [
row[key] for key in row.keys()
if 'Broader concept' in key and row[key]
]
broader_labels = target_labels
elif property_type.name == 'group':
target_labels = [
row[key] for key in row.keys()
if 'Group' in key and row[key]
]
elif property_type.name == 'theme':
target_labels = [
row[key] for key in row.keys()
if 'Theme' in key and row[key]
]

if not target_labels:
# If it doesn't exist, there is no relation to be created
print(
(
'Row {} has no relationship columns '
'(i.e. "Broader concept", "Group", or "Theme").'
).format(i)
)
continue

for target_label in target_labels:
target = Concept.objects.filter(
properties__name='prefLabel',
properties__value=target_label,
properties__language_id='en',
namespace=property_type.namespace
).exclude(
status=DELETED_PENDING,
properties__status=DELETED_PENDING
).first()

if not target:
raise ImportError(
'Row {}: concept "{}" does not exist.'.format(
i, target_label
)
print("Processing row {}...".format(i))

self._create_row_relations(i, row)

def _create_row_relations(self, i, row):
source_label = row.get("Term") # aka prefLabel
source = self.concepts[source_label.lower()]

for property_type in self.property_types:

# Look for columns specifying relationships
if property_type.name == 'broader':
target_labels = [
row[key] for key in row.keys()
if 'Broader concept' in key and row[key]
]
broader_labels = target_labels
elif property_type.name == 'group':
target_labels = [
row[key] for key in row.keys()
if 'Group' in key and row[key]
]
elif property_type.name == 'theme':
target_labels = [
row[key] for key in row.keys()
if 'Theme' in key and row[key]
]

if not target_labels:
# If it doesn't exist, there is no relation to be created
print(
(
'Row {} has no relationship columns '
'(i.e. "Broader concept", "Group", or "Theme").'
).format(i)
)
return

for target_label in target_labels:
target = Concept.objects.filter(
properties__name='prefLabel',
properties__value=target_label,
properties__language_id='en',
namespace=property_type.namespace
).exclude(
status=DELETED_PENDING,
properties__status=DELETED_PENDING
).first()

if not target:
raise ImportError(
'Row {}: concept "{}" does not exist.'.format(
i, target_label
)

relation, created = Relation.objects.get_or_create(
source=source, # child
target=target, # parent
property_type=property_type,
defaults={
'version_added': self.version, 'status': PENDING
}
)

if created:
print('Relation created: {}'.format(relation))
relation, created = Relation.objects.get_or_create(
source=source, # child
target=target, # parent
property_type=property_type,
defaults={
'version_added': self.version, 'status': PENDING
}
)

if created:
print('Relation created: {}'.format(relation))

if not relation.reverse:
reverse_relation = relation.create_reverse()
print(
'Reverse relation created: {}'.format(
reverse_relation
)
if not relation.reverse:
reverse_relation = relation.create_reverse()
print(
'Reverse relation created: {}'.format(
reverse_relation
)
)

created = source.inherit_groups_and_themes_from_broader()
print(
'Inherited groups and themes from: {}'.format(
', '.join(broader_labels)
)
created = source.inherit_groups_and_themes_from_broader()
print(
'Inherited groups and themes from: {}'.format(
', '.join(broader_labels)
)
)

def _add_translations(self, sheet):
for row in row_dicts(sheet):
Expand Down
7 changes: 5 additions & 2 deletions gemet/thesaurus/static/thesaurus/js/start_import.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ $ = django.jQuery;
$(document).ready(function () {
$('input.start-import').click(function () {
url = window.location.href.split("admin")[0] + "import/" + $(this)[0].id + "/start/";
$.ajax({ url: url });
location.reload();
$.ajax({
url: url, success: function (result) {
location.reload();
}
});
});
});
15 changes: 6 additions & 9 deletions gemet/thesaurus/tests/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from django.test import TestCase, Client

from .factories import (
VersionFactory, TermFactory, GroupFactory, ThemeFactory, ConceptFactory,
Relation
VersionFactory, TermFactory, GroupFactory, ThemeFactory, Relation
)
from gemet.thesaurus.models import (
Concept, Term, Group, Theme, Import, Property, PropertyType, Namespace
Concept, Term, Group, Theme, Import, Property, PropertyType
)


Expand All @@ -20,10 +19,6 @@ def setUp(self):
version = VersionFactory(identifier='')
self.client = Client()

concept_namespace = Namespace.objects.get(heading='Concepts')
group_namespace = Namespace.objects.get(heading='Groups')
theme_namespace = Namespace.objects.get(heading='Themes')

i = 1
# Create broader concepts
for value in ['pollution', 'impact source']:
Expand Down Expand Up @@ -71,8 +66,9 @@ def test_import_concepts_and_translations_together(self):
spreadsheet=File(open('gemet/thesaurus/tests/files/concepts.xlsx'))
)
url = '/import/{}/start/'.format(import_obj.pk)
response = self.client.get(url)
response = self.client.get(url, {"synchronous": True})
self.assertEqual(response.status_code, 200)

# 45 new concepts were imported
num_concepts_after = Concept.objects.count()
self.assertEqual(num_concepts_after - num_concepts_before, 45)
Expand Down Expand Up @@ -119,8 +115,9 @@ def test_import_concepts_and_translations_separately(self):
spreadsheet=File(open('gemet/thesaurus/tests/files/only_en.xlsx'))
)
url = '/import/{}/start/'.format(import_obj.pk)
response = self.client.get(url)
response = self.client.get(url, {"synchronous": True})
self.assertEqual(response.status_code, 200)

# 45 new concepts were imported
num_concepts_after = Concept.objects.count()
self.assertEqual(num_concepts_after - num_concepts_before, 45)
Expand Down
3 changes: 0 additions & 3 deletions gemet/thesaurus/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,6 @@
),
url(r'^auth/login/$', auth_views.LoginView.as_view(), name='login'),
url(r'^auth/logout/$', auth_views.LogoutView.as_view(), name='logout'),

# Custom Admin pages
# url(r'^admin/import/$', views.AdminImportView.as_view(), name='import'),
]

if settings.DEBUG:
Expand Down
16 changes: 15 additions & 1 deletion gemet/thesaurus/views.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import re
import sys
import multiprocessing
from itertools import chain
from urllib import urlencode
from xmlrpclib import Fault

from django import db
from django.http import Http404, HttpResponse, StreamingHttpResponse
from django.shortcuts import render, get_object_or_404, redirect
from django.core.exceptions import ObjectDoesNotExist
Expand Down Expand Up @@ -824,10 +826,22 @@ def error500(request):
return render(request, template, context, status=status_code)


def run_import(data_import):
data_import.run()


def start_import(request, import_id):
try:
data_import = Import.objects.get(pk=import_id)
except Import.DoesNotExist:
raise Http404("Import object does not exist")
data_import.run()
synchronous = request.GET.get("synchronous", False)
if synchronous:
data_import.run()
else:
process = multiprocessing.Process(
target=run_import, args=(data_import,), kwargs={}
)
db.connections.close_all()
process.start()
return HttpResponse("")

0 comments on commit 6dc7750

Please sign in to comment.