-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #431 from UCL/feature/search-diacriticals
Feature/search diacriticals
- Loading branch information
Showing
3 changed files
with
137 additions
and
1 deletion.
There are no files selected for viewing
98 changes: 98 additions & 0 deletions
98
src/rard/research/migrations/0070_updating_plain_renditions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# Generated by Django 3.2 on 2024-03-08 12:27 | ||
|
||
import re | ||
import string | ||
import unicodedata | ||
from django.db import migrations | ||
from django.utils.html import strip_tags | ||
|
||
|
||
def strip_combining_and_make_plain(content): | ||
plain_text = make_plain_text(content) | ||
normalized = unicodedata.normalize("NFD", plain_text) | ||
return "".join([char for char in normalized if not unicodedata.combining(char)]) | ||
|
||
|
||
def make_plain_text(content): | ||
no_ufeff = content.replace("\ufeff", "") # found around mentions for some reason | ||
no_tags = strip_tags(no_ufeff.replace("><", "> <")) | ||
no_html_chars = re.sub(r"&\w+;", " ", no_tags) | ||
no_punctuation = no_html_chars.translate(str.maketrans("", "", string.punctuation)) | ||
no_lone_numbers = re.sub(r"\s\d{1,2}\s", " ", no_punctuation) # mentions | ||
no_excess_space = re.sub(r" +", " ", no_lone_numbers) | ||
return no_excess_space | ||
|
||
|
||
def update_antiquarians(apps, schema_editor, plaining_function): | ||
Antiquarian = apps.get_model("research", "Antiquarian") | ||
for ant in Antiquarian.objects.all(): | ||
if ant.plain_introduction: | ||
ant.plain_introduction = plaining_function(ant.introduction.content) | ||
ant.save() | ||
|
||
|
||
def update_works(apps, schema_editor, plaining_function): | ||
Work = apps.get_model("research", "Work") | ||
for wk in Work.objects.all(): | ||
if wk.plain_introduction: | ||
wk.plain_introduction = plaining_function(wk.introduction.content) | ||
wk.save() | ||
|
||
|
||
def update_fragments(apps, schema_editor, plaining_function): | ||
Fragment = apps.get_model("research", "Fragment") | ||
for frag in Fragment.objects.all(): | ||
if frag.plain_commentary: | ||
frag.plain_commentary = plaining_function(frag.commentary.content) | ||
frag.save() | ||
|
||
|
||
def update_anonymous_fragments(apps, schema_editor, plaining_function): | ||
AnonymousFragment = apps.get_model("research", "AnonymousFragment") | ||
for afrag in AnonymousFragment.objects.all(): | ||
if afrag.plain_commentary: | ||
afrag.plain_commentary = plaining_function(afrag.commentary.content) | ||
afrag.save() | ||
|
||
|
||
def update_testimonia(apps, schema_editor, plaining_function): | ||
Testimonium = apps.get_model("research", "Testimonium") | ||
for tes in Testimonium.objects.all(): | ||
if tes.plain_commentary: | ||
tes.plain_commentary = plaining_function(tes.commentary.content) | ||
tes.save() | ||
|
||
|
||
def update_original_texts(apps, schema_editor, plaining_function): | ||
OriginalText = apps.get_model("research", "OriginalText") | ||
for ot in OriginalText.objects.all(): | ||
if ot.plain_content: | ||
ot.plain_content = plaining_function(ot.content) | ||
if ot.plain_translated_text: | ||
ot.plain_translated_text = plaining_function(ot.translated_text) | ||
ot.save() | ||
|
||
|
||
def update_plain_texts(apps, schema_editor): | ||
update_antiquarians(apps, schema_editor, strip_combining_and_make_plain) | ||
update_works(apps, schema_editor, strip_combining_and_make_plain) | ||
update_fragments(apps, schema_editor, strip_combining_and_make_plain) | ||
update_anonymous_fragments(apps, schema_editor, strip_combining_and_make_plain) | ||
update_testimonia(apps, schema_editor, strip_combining_and_make_plain) | ||
|
||
|
||
def revert_plain_texts(apps, schema_editor): | ||
update_antiquarians(apps, schema_editor, make_plain_text) | ||
update_works(apps, schema_editor, make_plain_text) | ||
update_fragments(apps, schema_editor, make_plain_text) | ||
update_anonymous_fragments(apps, schema_editor, make_plain_text) | ||
update_testimonia(apps, schema_editor, make_plain_text) | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("research", "0069_anonymousfragment_anonymous_fragments"), | ||
] | ||
|
||
operations = [migrations.RunPython(update_plain_texts, revert_plain_texts)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters