Skip to content
Browse files

Complete initial combine fields script

  • Loading branch information...
1 parent 6e00610 commit cdaad58e16d8fbb5e361986e1bb3bc463e577c5c @knabar knabar committed Feb 23, 2014
Showing with 76 additions and 21 deletions.
  1. +76 −21 rooibos/data/management/commands/combine_fields.py
View
97 rooibos/data/management/commands/combine_fields.py
@@ -1,43 +1,98 @@
from django.core.management.base import BaseCommand
-from rooibos.data.models import Field
+from django.db.models import Count
+from rooibos.data.models import Field, FieldSetField, FieldValue
from optparse import make_option
-import rooibos.contrib.djangologging.middleware # does not get loaded otherwise
-import logging
class Command(BaseCommand):
help = 'Fields and combines equivalent fields'
option_list = BaseCommand.option_list + (
- make_option('--auto', action='store_true',
+ make_option(
+ '--execute', action='store_true',
help='Combine automatically detected fields'),
- make_option('--ignorevocabs', action='store_true',
- help='Ignore vocabularis when comparing fields'),
+ make_option(
+ '--ignorevocabs', action='store_true',
+ help='Ignore vocabularies when comparing fields'),
)
def handle(self, *commands, **options):
+
+ execute = options.get('execute')
+
+ deleted = []
+ equivalents = dict()
+
+ def combine_fields(field, replace_with_field):
+ print "Replacing %s with %s" % (field, replace_with_field)
+ deleted.append(field.id)
+ eq = list(field.equivalent.values_list('id', flat=True))
+ equivalents[replace_with_field] = equivalents.get(
+ replace_with_field, set()).union(set(eq))
+ if execute:
+ # replace equivalence references
+ for f in Field.objects.filter(equivalent=field):
+ f.equivalent.add(replace_with_field)
+ f.equivalent.remove(field)
+ # replace field references
+ FieldSetField.objects.filter(field=field).update(
+ field=replace_with_field)
+ FieldValue.objects.filter(field=field).update(
+ field=replace_with_field)
+ field.delete()
+
unique = dict()
- duplicate = 0
usevocabs = not options.get('ignorevocabs')
for field in Field.objects.all():
- equivalents = list(field.equivalent.values_list('id', flat=True))
- equivalents.append(field.id)
-
- key = ' '.join(str(s) for s in [
+ key = ' '.join([
field.label,
- field.standard.prefix if field.standard else -1,
- field.vocabulary.id if field.vocabulary and usevocabs else -1,
-# sorted(equivalents)
+ field.standard.prefix if field.standard else "-",
+ str(field.vocabulary.id) if field.vocabulary and usevocabs
+ else "-",
])
- if key in unique:
- duplicate += 1
- else:
- unique[key] = field
- print key
+ unique.setdefault(key, []).append(field)
+
+ print "\nFound %s unique fields out of %s" % (
+ len(unique), Field.objects.count()
+ )
+
+ # for each unique field, determine replacements
+
+ for fields in unique.values():
+ if len(fields) < 2:
+ continue
+ sorted_fields = sorted(fields, key=lambda f: f.name)
+ keep = sorted_fields[0]
+ for remove in sorted_fields[1:]:
+ combine_fields(remove, keep)
+
+ # now go through remaining fields and replace the ones that are not
+ # standard fields with their standard equivalents, if any
+
+ for field in Field.objects.exclude(id__in=deleted):
+ if field.standard:
+ continue
+ eq = equivalents.get(field, set()).union(
+ field.equivalent.values_list('id', flat=True))
+ eqfields = Field.objects.filter(id__in=eq).exclude(
+ id__in=deleted).exclude(standard=None).filter(
+ label=field.label)
+ if usevocabs:
+ eqfields = eqfields.filter(vocabulary=field.vocabulary)
+ if eqfields:
+ combine_fields(field, eqfields[0])
+
+ if execute:
+ remaining = Field.objects.exclude(id__in=deleted)
+ print "\nRemaining fields after cleanup:", len(remaining)
- print "Duplicates found:", duplicate
- print "Uniques:", len(unique)
+ print "\nFields currently in use:\n Values Field"
+ for name, prefix, count in FieldValue.objects.values_list(
+ 'field__name', 'field__standard__prefix').annotate(
+ dcount=Count('field')).order_by(
+ 'field__standard__prefix', 'field__name'):
+ print "%10d %s%s" % (count, prefix + "." if prefix else "", name)

0 comments on commit cdaad58

Please sign in to comment.
Something went wrong with that request. Please try again.