-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate data from popit-django to django-popolo
- Loading branch information
Showing
1 changed file
with
190 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import unicode_literals | ||
|
||
import re | ||
from urlparse import urlsplit, urlunsplit | ||
|
||
from django.db import migrations | ||
from django.contrib.contenttypes.management import update_contenttypes | ||
|
||
|
||
def is_proxy_url(url): | ||
return url == 'everypolitician-writeinpublic.herokuapp.com' | ||
|
||
|
||
def update_source_url(original_url): | ||
split_url = urlsplit(original_url) | ||
has_popit_in_domain = 'popit' in split_url.netloc.split('.') | ||
is_possible_api_url = re.match(r'/?$|^/api', split_url.path) | ||
if is_proxy_url(split_url.netloc): | ||
# If this is one of the faked PopIt instances for | ||
# EveryPolitician, change that to the EveryPolitican JSON. | ||
country, house = split_url.path.lstrip('/').split('/') | ||
url = ('https://raw.githubusercontent.com' | ||
'/everypolitician/everypolitician-data/master/data/' | ||
'{country}/{house}/ep-popolo-v1.0.json').format( | ||
country=country, house=house) | ||
elif has_popit_in_domain and is_possible_api_url: | ||
# If looks like a PopIt instance, then replace that with a | ||
# link to the PopIt instance's export.json: | ||
split_as_list = list(split_url) | ||
split_as_list[2] = '/api/v0.1/export.json' | ||
split_as_list[3] = '' | ||
split_as_list[4] = '' | ||
url = urlunsplit(split_as_list) | ||
else: | ||
url = original_url | ||
return url | ||
|
||
|
||
def forwards(apps, schema_editor): | ||
# Make sure the content types for django-popolo exist, with a | ||
# hacky workaround from: http://stackoverflow.com/a/35353170/223092 | ||
popolo_app = apps.app_configs['popolo'] | ||
popolo_app.models_module = popolo_app.models_module or True | ||
update_contenttypes(popolo_app, verbosity=1, interactive=False) | ||
ContentType = apps.get_model('contenttypes', 'ContentType') | ||
person_content_type = ContentType.objects.get( | ||
app_label='popolo', model='person') | ||
# Create a PopoloSource for each old APIInstance | ||
ApiInstance = apps.get_model('popit', 'ApiInstance') | ||
PopoloSource = apps.get_model('popolo_sources', 'PopoloSource') | ||
LinkToPopoloSource = apps.get_model('popolo_sources', 'LinkToPopoloSource') | ||
ai_to_ps = {} | ||
for ai in ApiInstance.objects.all(): | ||
url = update_source_url(ai.url) | ||
ps = PopoloSource.objects.create(url=url) | ||
ai_to_ps[ai] = ps | ||
# Now create a django-popolo Person for each old django-popit | ||
# Person: | ||
PopItPerson = apps.get_model('popit', 'Person') | ||
PopoloPerson = apps.get_model('popolo', 'Person') | ||
Identifier = apps.get_model('popolo', 'Identifier') | ||
p_old_to_p_new = {} | ||
for popit_person in PopItPerson.objects.all(): | ||
# The popit_ids from | ||
# everypolitician-writeinpublic.herokuapp.com sources have | ||
# 'person/' prepended, which they won't have when we switch to | ||
# using the upstream EveryPolitician Popolo JSON. So if it's | ||
# from such a source, remove that prefix. | ||
new_popolo_person_id = popit_person.popit_id | ||
if is_proxy_url(urlsplit(popit_person.popit_url).netloc): | ||
prefix = 'person/' | ||
if new_popolo_person_id.startswith(prefix): | ||
new_popolo_person_id = new_popolo_person_id[len(prefix):] | ||
# Create the new django-popolo Person object: | ||
new_person = PopoloPerson.objects.create( | ||
name=popit_person.name, | ||
summary=popit_person.summary, | ||
image=popit_person.image, | ||
) | ||
p_old_to_p_new[popit_person] = new_person | ||
# The popit_id Identifier is to preserve the old popit_id that | ||
# was on a popit-django Person. This will still be returned by | ||
# the API for people that have it, and will be looked up when | ||
# used in some URLs to avoid breaking old links. | ||
Identifier.objects.create( | ||
scheme='popit_id', | ||
identifier=popit_person.popit_id, | ||
object_id=new_person.id, | ||
content_type_id=person_content_type.id, | ||
) | ||
# The popit_url Identifier is to preserver the old popit_url | ||
# that was on a popit-django Person. This should still be used | ||
# as the canonical identifier for people in the API if they | ||
# have it, but for newer people popolo_uri will be used. | ||
Identifier.objects.create( | ||
scheme='popit_url', | ||
identifier=popit_person.popit_url, | ||
object_id=new_person.id, | ||
content_type_id=person_content_type.id, | ||
) | ||
# The popolo:person Identifier represents the 'id' of the | ||
# Person in the Popolo JSON source; it's required (among other | ||
# things) so that when updating from the Popolo JSON source, | ||
# we can tell whether a person already exists in the database | ||
# or not. (When fetching new people, they will have it added | ||
# as well.) | ||
Identifier.objects.create( | ||
scheme='popolo:person', | ||
identifier=new_popolo_person_id, | ||
object_id=new_person.id, | ||
content_type_id=person_content_type.id, | ||
) | ||
# The popit_django_person_id Identifier preserved the old ID | ||
# of the popit-django Person object; this shouldn't be needed | ||
# anywhere, but it would be foolish not to preserve it in case | ||
# it's helpful for debugging, etc. | ||
Identifier.objects.create( | ||
scheme='popit_django_person_id', | ||
identifier=popit_person.id, | ||
object_id=new_person.id, | ||
content_type_id=person_content_type.id, | ||
) | ||
# This is the new canonical URI for the source of information | ||
# about a person, which is used in API responses where there | ||
# is no legacy popit_url. | ||
Identifier.objects.create( | ||
scheme='popolo_uri', | ||
identifier=(ps.url + '#person-' + popit_person.popit_id), | ||
object_id=new_person.id, | ||
content_type_id=person_content_type.id, | ||
) | ||
# Set the right PopoloSource for the person: | ||
ps = ai_to_ps[popit_person.api_instance] | ||
LinkToPopoloSource.objects.create( | ||
popolo_source=ps, | ||
deleted_from_source=False, | ||
object_id=new_person.id, | ||
content_type_id=person_content_type.id, | ||
) | ||
# Update the parallel popolo_person attribute on Contact | ||
Contact = apps.get_model('contactos', 'Contact') | ||
for c in Contact.objects.all(): | ||
c.popolo_person = p_old_to_p_new[c.person] | ||
c.save() | ||
# Create the parallel InstanceMembership relation: | ||
Membership = apps.get_model('instance', 'Membership') | ||
InstanceMembership = apps.get_model('instance', 'InstanceMembership') | ||
for m in Membership.objects.all(): | ||
popolo_person = p_old_to_p_new[m.person] | ||
InstanceMembership.objects.create( | ||
person=popolo_person, | ||
writeitinstance=m.writeitinstance, | ||
) | ||
# Update the parallel popolo_source field on | ||
# WriteitInstancePopitInstanceRecord. | ||
WriteitInstancePopitInstanceRecord = apps.get_model( | ||
'instance', 'WriteitInstancePopitInstanceRecord') | ||
for wiipir in WriteitInstancePopitInstanceRecord.objects.all(): | ||
wiipir.popolo_source = ai_to_ps[wiipir.popitapiinstance] | ||
wiipir.save() | ||
# Update the parallel popolo_person attribute on Answer: | ||
Answer = apps.get_model('nuntium', 'Answer') | ||
for a in Answer.objects.all(): | ||
a.popolo_person = p_old_to_p_new[a.person] | ||
a.save() | ||
# Update the parallel popolo_person field on NoContactOM: | ||
NoContactOM = apps.get_model('nuntium', 'NoContactOM') | ||
for ncom in NoContactOM.objects.all(): | ||
ncom.popolo_person = p_old_to_p_new[ncom.person] | ||
ncom.save() | ||
|
||
def backwards(apps, schema_editor): | ||
pass | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('popit', '0001_initial'), | ||
('popolo_sources', '0001_initial'), | ||
('instance', '0003_add_parallel_popolo_data'), | ||
('popolo', '0002_update_models_from_upstream'), | ||
('contactos', '0002_contact_popolo_person'), | ||
('nuntium', '0003_add_parallel_popolo_data'), | ||
] | ||
|
||
operations = [ | ||
migrations.RunPython(forwards, backwards) | ||
] |