In [1]:
%load_ext lab_black

In [2]:
import re
import json

from uuid import uuid4

from django.db import connections

from rich import print

# Manual Stuff

* Set DELETE_WAGTAIL_IMAGES=False to deactivate post_delete_file_cleanup, to avoid deleting original 🥶
* Start only postgres and then: dropdb homepage && createdb homepage && python manage.py migrate
* Sometimes after convert: python manage.py fixtree
* Sometimes, you have to run: python manage.py sqlsequencereset cast + execute via pgcli

# Create New Empty DB

In [5]:
import os

from django.conf import settings
from django.core.management import call_command

from pathlib import Path

In [6]:
current_working_dir = Path.cwd()
os.chdir(settings.ROOT_DIR)
!dropdb homepage && createdb homepage
call_command("migrate")
os.chdir(current_working_dir)

Operations to perform:
  Apply all migrations: account, admin, auth, authtoken, cast, contenttypes, django_comments, filepond, fluent_comments, indieweb, sessions, sites, socialaccount, taggit, threadedcomments, users, wagtailadmin, wagtailcore, wagtaildocs, wagtailembeds, wagtailforms, wagtailimages, wagtailredirects, wagtailsearch, wagtailusers, watson
Running migrations:
  Applying contenttypes.0001_initial... OK
  Applying contenttypes.0002_remove_content_type_name... OK
  Applying auth.0001_initial... OK
  Applying auth.0002_alter_permission_name_max_length... OK
  Applying auth.0003_alter_user_email_max_length... OK
  Applying auth.0004_alter_user_username_opts... OK
  Applying auth.0005_alter_user_last_login_null... OK
  Applying auth.0006_require_contenttypes_0002... OK
  Applying auth.0007_alter_validators_add_error_messages... OK
  Applying auth.0008_alter_user_username_max_length... OK
  Applying users.0001_initial... OK
  Applying account.0001_initial... OK
  Applying accou

# Fetch Legacy Data from Database Restore

In [8]:
def dictfetchall(cursor):
    "Return all rows from a cursor as a dict"
    columns = [col[0] for col in cursor.description]
    return [dict(zip(columns, row)) for row in cursor.fetchall()]


class Legacy:
    def __init__(self, db_name="legacy"):
        self.db_name = db_name
        self.users = self.fetch_rows("select * from users_user")
        self.blogs = self.fetch_rows("select * from cast_blog")
        self.posts = self.fetch_rows("select * from cast_post")
        self.images = self.fetch_rows("select * from cast_image")
        self.galleries = self.fetch_rows("select * from cast_gallery")
        self.gallery_images = self.fetch_rows("select * from cast_gallery_images")
        self.videos = self.fetch_rows("select * from cast_video")
        self.audios = self.fetch_rows("select * from cast_audio")

    def fetch_rows(self, stmt):
        with connections[self.db_name].cursor() as cursor:
            cursor.execute(stmt)
            rows = dictfetchall(cursor)
        return rows


class Converter:
    def __init__(self, legacy):
        self.legacy = legacy
        self.blog_content_type = ContentType.objects.get(app_label="cast", model="blog")

    def users(self):
        for l_user in self.legacy.users:
            user = User(**l_user)
            user.save()
        return {user.pk: user for user in User.objects.all()}

    def blogs(self):
        blog_legacy_to_wagtail = {}
        root = Page.objects.get(title="Welcome to your new Wagtail site!")
        for l_blog in self.legacy.blogs:
            kwargs = l_blog.copy()
            del kwargs["user_id"]
            del kwargs["id"]
            kwargs["owner"] = self.user_lookup[l_blog["user_id"]]
            l_blog["content_type"] = self.blog_content_type
            blog = Blog(**kwargs)
            blog = root.add_child(instance=blog)
            blog_legacy_to_wagtail[l_blog["id"]] = blog.pk
        return blog_legacy_to_wagtail

    def images(self):
        for num, l_image in enumerate(self.legacy.images):
            image = Image(
                pk=l_image["id"],
                file=l_image["original"],
                uploaded_by_user=self.user_lookup[l_image["user_id"]],
                created_at=l_image["created"],
                width=l_image["original_width"],
                height=l_image["original_height"],
            )
            image.save()
            if num % 300 == 0:
                print(num)

    def galleries(self):
        for l_gallery in self.legacy.galleries:
            kwargs = {k: v for k, v in l_gallery.items() if k != "user_id"}
            gallery = Gallery(**kwargs)
            gallery.save()

    def gallery_image_links(self):
        links = [
            (gi["id"], gi["gallery_id"], gi["image_id"])
            for gi in self.legacy.gallery_images
        ]
        stmt = "insert into cast_gallery_images (id, gallery_id, image_id) values (%s, %s, %s)"
        with connections["default"].cursor() as cursor:
            cursor.executemany(stmt, links)

    def convert(self):
        self.user_lookup = self.users()
        self.blog_lookup = self.blogs()
        self.images()
        self.galleries()
        self.gallery_image_links()

In [9]:
converter = Converter(Legacy())

In [10]:
converter.legacy.videos

[{'id': 4,
  'created': datetime.datetime(2017, 9, 25, 7, 54, 11, 347675, tzinfo=datetime.timezone.utc),
  'modified': datetime.datetime(2018, 11, 9, 8, 44, 23, 746351, tzinfo=datetime.timezone.utc),
  'original': 'blogs_videos/51C64BBB-D835-4D49-B853-3A16DBCD7D22.MOV',
  'poster': 'blogs_videos/poster/poster_kccd027q.jpg',
  'poster_seconds': 1.0,
  'user_id': 1},
 {'id': 5,
  'created': datetime.datetime(2017, 9, 26, 9, 7, 28, 1621, tzinfo=datetime.timezone.utc),
  'modified': datetime.datetime(2018, 11, 9, 8, 44, 23, 746431, tzinfo=datetime.timezone.utc),
  'original': 'blogs_videos/62EA7F29-EA50-4B83-BBCD-DA5647E20D57.MOV',
  'poster': 'blogs_videos/poster/poster_u5w_lmu1.jpg',
  'poster_seconds': 1.0,
  'user_id': 1},
 {'id': 7,
  'created': datetime.datetime(2017, 10, 17, 15, 40, 48, 658117, tzinfo=datetime.timezone.utc),
  'modified': datetime.datetime(2018, 11, 9, 8, 44, 23, 746495, tzinfo=datetime.timezone.utc),
  'original': 'blogs_videos/claas_wohnzimmer_2017-10-17_compresse

In [5]:
%%time
converter.convert()

CPU times: user 4.44 s, sys: 424 ms, total: 4.87 s
Wall time: 4.24 s


In [34]:
#post = Post.objects.first()
#print(post.body._raw_data)

# Migrate Posts

In [7]:
def is_tag(text):
    return text.startswith("{%") and text.endswith("%}")


def tag_to_block(tag):
    tag_name, tag_id = tag.strip("{%").strip("%}").split()
    tag_id = int(tag_id)
    if tag_name == "gallery":
        image_blocks = []
        for image in Gallery.objects.get(pk=tag_id).images.all():
            image_blocks.append({"type": "item", "value": image.pk, "id": str(uuid4())})
        return {"type": tag_name, "value": image_blocks}
    return {"type": tag_name, "value": tag_id}


def content_to_streamfield(content):
    blocks = overview = []
    just_tag = re.compile(r"({% \w+ \d+ %})")
    for part in just_tag.split(content):
        if len(part) == 0:
            continue
        if is_tag(part):
            blocks.append(tag_to_block(part))
        else:
            blocks.append({"type": "paragraph", "value": part})
    return [{"type": "overview", "value": overview}]


def build_post_from_legacy(legacy):
    # post = Post(title=legacy["title"], visible_date=legacy["visible_date"])
    kwargs = {
        k: v
        for k, v in legacy.items()
        if k not in ["content", "author_id", "blog_id", "id"]
    }
    kwargs["content_type"] = ContentType.objects.get(app_label="cast", model="post")
    # print(kwargs)
    post = Post(**kwargs)
    raw_body = content_to_streamfield(legacy["content"])
    post.body = json.dumps(raw_body)
    return post

In [10]:
blog = Page.objects.get(slug="ephes_blog")
legacy_post_lookup = {p["id"]: p for p in converter.legacy.posts}
for p_id in (332, 333):
    l_post = legacy_post_lookup[p_id]
    post = build_post_from_legacy(l_post)
    post = blog.add_child(instance=post)

In [18]:
[l_post] = [p for p in converter.legacy.posts if p["id"] == 333]

In [19]:
post = build_post_from_legacy(l_post)

In [20]:
print(post.body.raw_data)

In [21]:
post = blog.add_child(instance=post)

In [56]:
def is_tag(text):
    return text.startswith("{%") and text.endswith("%}")


def tag_to_block(tag):
    tag_name, tag_id = tag.strip("{%").strip("%}").split()
    tag_id = int(tag_id)
    return {"type": tag_name, "value": tag_id}


def content_to_streamfield(content):
    blocks = overview = []
    just_tag = re.compile(r"({% \w+ \d+ %})")
    for part in just_tag.split(content):
        if len(part) == 0:
            continue
        if is_tag(part):
            blocks.append(tag_to_block(part))
        else:
            blocks.append({"type": "paragraph", "value": part})
    return [{"type": "overview", "value": overview}]

In [57]:
content_to_streamfield(l_post["content"])

[{'type': 'overview',
  'value': [{'type': 'paragraph',
    'value': '<p>Short work week. Gave me time to indulge in heavy yak shaving. At first, I tried to just make&nbsp;<a href="https://github.com/ephes/django-cast">django-cast</a>&nbsp;(the develop branch) work for newer <a href="https://www.djangoproject.com/">Django</a> and <a href="https://wagtail.org/">Wagtail</a> versions. Then I realized I had to fix&nbsp;<a href="http://github.com/ephes/wagtail_srcset/issues/2">wagtail-srcset</a>&nbsp;first and replaced&nbsp;<a href="https://python-poetry.org/">poetry</a>&nbsp;with&nbsp;<a href="https://flit.pypa.io/en/stable/">flit</a>&nbsp;while I&#39;m at it. Then I got lost fixing some stuff in&nbsp;<a href="https://github.com/ephes/kptncook/issues/18">kptncook</a>&nbsp;before I remembered that I have to also update&nbsp;<a href="https://github.com/ephes/django-indieweb">django-indieweb</a>&nbsp;and&nbsp;<a href="https://github.com/ephes/django_fileresponse/tree/main/">django-filerespons

In [45]:
l_post["content"][-20:]

'3>\r\n{% image 1281 %}'

In [29]:
post = blog.add_child(instance=post)

In [14]:
post.visible_date

datetime.datetime(2022, 11, 7, 0, 0, tzinfo=datetime.timezone.utc)

In [17]:
post.delete()

# Get Templatetags from Content

In [39]:
just_tag = re.compile(r"({% \w+ \d+ %})")
just_tag.split("foo {% asdf 34 %} bar")

['foo ', '{% asdf 34 %}', ' bar']

In [38]:
#split_tags.split(legacy["content"])

In [44]:
parse_tag = re.compile(r"{% (\w+) (\d+) %}")
[(tag_name, tag_pk)] = parse_tag.findall("{% asdf 34 %}")
print(tag_name, tag_pk)

In [42]:
parse_tag.findall("asdf")

[]

In [21]:
tags_regex.findall(legacy["content"])

[('image', '1281')]

In [51]:
tag_name, tag_id = "{% image 1281 %}".strip("{%").strip("%}").split()
tag_id = int(tag_id)
print(tag_name, tag_id)

In [20]:
Page.objects.last().pk

6

In [30]:
print(Page.objects.get(slug="gallerie-test").post.body.raw_data)

In [44]:
{
    "type": "gallery",
    "value": [
        {"type": "item", "value": 1281, "id": "88c02e62-77e9-44f2-a84e-c3d84453cb13"},
        {"type": "item", "value": 1280, "id": "eb5d3c2e-6e47-434a-a18a-98e2e0c5d3af"},
    ],
}

{'type': 'gallery',
 'value': [{'type': 'item',
   'value': 1281,
   'id': '88c02e62-77e9-44f2-a84e-c3d84453cb13'},
  {'type': 'item',
   'value': 1280,
   'id': 'eb5d3c2e-6e47-434a-a18a-98e2e0c5d3af'}]}

In [25]:
Page.objects.all()

<PageQuerySet [<Page: Root>, <Page: Welcome to your new Wagtail site!>, <Page: Claas sagt Hallo>, <Page: Ephes Blog>, <Page: Gallerie-Test>, <Page: sadf>]>

In [46]:
post.body.raw_data[0]["value"][-1]

{'type': 'gallery',
 'value': [{'type': 'item',
   'value': 1281,
   'id': '88c02e62-77e9-44f2-a84e-c3d84453cb13'},
  {'type': 'item',
   'value': 1280,
   'id': 'eb5d3c2e-6e47-434a-a18a-98e2e0c5d3af'}]}

In [49]:
post.body.raw_data[0]["value"][-1] = {
    "type": "gallery",
    "value": [
        {"type": "item", "value": 1281, "id": "88c02e62-77e9-44f2-a84e-c3d84453cb13"},
        {"type": "item", "value": 1280, "id": "eb5d3c2e-6e47-434a-a18a-98e2e0c5d3af"},
    ],
}

In [51]:
Post.objects.all()

<PageQuerySet [<Post: Gallerie-Test>]>

In [56]:
Page.objects.last()

<Page: sadf>

In [18]:
{
    "type": "gallery",
    "value": [
        {"type": "item", "value": 1281},
        {"type": "item", "value": 1280},
    ],
}

{'type': 'gallery',
 'value': [{'type': 'item', 'value': 1281}, {'type': 'item', 'value': 1280}]}

In [11]:
{
    "type": "gallery",
    "value": [
        {"type": "item", "value": 1281, "id": "88c02e62-77e9-44f2-a84e-c3d84453cb13"},
        {"type": "item", "value": 1280, "id": "eb5d3c2e-6e47-434a-a18a-98e2e0c5d3af"},
    ],
}

{'type': 'gallery',
 'value': [{'type': 'item',
   'value': 1281,
   'id': '88c02e62-77e9-44f2-a84e-c3d84453cb13'},
  {'type': 'item',
   'value': 1280,
   'id': 'eb5d3c2e-6e47-434a-a18a-98e2e0c5d3af'}]}

In [23]:
from uuid import uuid4

In [26]:
str(uuid4())

'96bbeccf-1981-4c90-aa53-ddc703c90c8e'