# Insert data into Strapi CMS

In [4]:
import getpass
import json
import os

import httpx
import tqdm
from dotenv import load_dotenv
from markdownify import markdownify
from slugify import slugify

load_dotenv()

strapi_address = os.getenv("STRAPI_ADDRESS", "http://localhost:1337/")
project_dir = "/Users/dulguun/source/artfilm-legacy/"
media_dir = f"{project_dir}/webapps/artfilm_static_media/media/"

resp = httpx.post(
    f"{strapi_address}auth/local",
    data={"identifier": "strapi", "password": getpass.getpass()},
)
jwt_token = resp.json()["jwt"]

headers = {"Authorization": f"Bearer {jwt_token}"}

## Filtering the collection you want from the big dump
e.g.:
```sh
cat data-slim2.json | jq 'map(select(.model == "film_journal.articlecategory"))' > data-articlecategories.json
```

## Strapi API utilities

In [5]:
from httpx import Response


def strapi_upload_image(file_path, *, headers) -> Response:
    with open(file_path, "rb") as f:
        files = {"files": f}
        resp = httpx.post(
            f"{strapi_address}upload", headers=headers, files=files, timeout=15
        )
        return resp


def strapi_create_entry(endpoint, data, *, headers) -> Response:
    resp = httpx.post(f"{strapi_address}{endpoint}", headers=headers, data=data)
    return resp


def strapi_get_entries(endpoint, kv_pairs=[]):
    filters = "&".join([f"{k}={v}" for (k, v) in kv_pairs])
    resp = httpx.get(f"{strapi_address}{endpoint}?{filters}").json()
    return resp


def strapi_get_entry(endpoint, identifier):
    return httpx.get(f"{strapi_address}{endpoint}/{identifier}").json()


def strapi_update_entry(endpoint, identifier, value, *, headers) -> Response:
    resp = httpx.put(
        f"{strapi_address}{endpoint}/{identifier}", headers=headers, data=value
    )
    return resp

## Load data

In [6]:
# load data from dump
articles_path = f"{project_dir}data-articles.json"
with open(articles_path) as f:
    articles = json.load(f)

author_ids = [a["fields"]["author"] for a in articles]
author_ids = set(author_ids)

users_path = f"{project_dir}data-users.json"
with open(users_path) as f:
    users = json.load(f)

avatars_path = f"{project_dir}data-avatars.json"
with open(avatars_path) as f:
    avatars = json.load(f)

data_path = "/Users/dulguun/source/artfilm-legacy/data-articlecategories.json"
with open(data_path) as f:
    cats = json.load(f)

## Create Categories

In [7]:
def create_categories():
    for cat in cats:
        data = {"title": cat["fields"]["title"], "legacy_id": cat["pk"]}
        httpx.post(f"{strapi_address}categories", headers=headers, data=data)

## Create Authors
- get author IDs from articles
- filter user data with author IDs
- create Authors with user data
  - upload author image
  - post author

In [4]:
def create_authors():
    for author_id in author_ids:
        # check if it's already created, then skip over it
        r = strapi_get_entries("authors", {"legacy_id": author_id}.items())
        if r:
            continue

        # find user name
        (user,) = list(filter(lambda u: u["pk"] == author_id, users))
        username = user["fields"]["username"]
        # find avatar path
        user_avatars = list(
            filter(
                lambda a: a["fields"]["user"] == author_id and a["fields"]["primary"],
                avatars,
            )
        )

        # prepare author data
        author_data = {
            "name": username,
            "legacy_id": user["pk"],
        }

        if user_avatars == []:
            print("no avatar found for", author_id, username)
            # can set to a default image here
        else:
            avatar = user_avatars[0]
            # upload user image first
            print("uploading image of user", username)
            # if this doesn't work check 1. auth token 2. role permissions
            resp = strapi_upload_image(f"{media_dir}{avatar['fields']['avatar']}")
            author_data["picture"] = resp.json()[0]["id"]

        print("creating author entry for", username)
        strapi_create_entry(endpoint="authors", data=author_data)

In [6]:
def set_default_author_images():
    # set default image for people without images
    default_author_image = strapi_get_entries("upload/files", [("name", "youth.jpg")])[
        0
    ]
    authors = [a for a in strapi_get_entries("authors") if a["picture"] is None]
    authors
    for author in authors:
        author["picture"] = default_author_image["id"]
        r = strapi_update_entry("authors", author["id"], author)
        assert r.status_code == 200
        print(".", end="")
    print("Done.")

## Create Articles

- upload article cover image
- markdownify the body
- prepare article data
  - set author using legacy_id relation

In [7]:
def create_articles():
    for article_data in articles:
        # check if it's already created, then skip over it
        r = strapi_get_entries("posts", {"legacy_id": article_data["pk"]}.items())
        if r:
            print("s", end="")
            continue

        # begin create post
        article = article_data["fields"]

        # get author
        #         print('Get author by legacy_id', article['author'])
        author = strapi_get_entries("authors", [("legacy_id", article["author"])])[0]

        # get categories
        #         print('Get categories by legacy_id', article['category'])
        cat_filter = [("legacy_id_in", cat_id) for cat_id in article["category"]]
        categories = strapi_get_entries("categories", cat_filter)
        cat_ids = [cat["id"] for cat in categories]

        # create cover image
        #         print('Create article cover image', article['image'])
        resp = strapi_upload_image(f"{media_dir}{article['image']}")
        # from IPython.core.debugger import Pdb; Pdb().set_trace()
        assert resp.status_code == 200

        cover_image_id = resp.json()[0]["id"]

        entry_data = {
            "title": article["title"],
            "slug": slugify(str(article_data["pk"]) + article["title"]),
            "excerpt": article["summary"],
            "status": "published",  # publish everything
            "content": markdownify(article["body"]),
            "date": article["created_date"][:10],  # lazy date formatting
            "author": author["id"],
            "coverImage": cover_image_id,
            "categories": cat_ids,
            "legacy_id": article_data["pk"],
        }
        #         print('Create post entry with title', article['title'])
        resp = strapi_create_entry(endpoint="posts", data=entry_data)
        #         print(resp)
        #         from IPython.core.debugger import Pdb; Pdb().set_trace()
        print(".", end="")
        assert resp.status_code == 200
        if resp.status_code != 200:
            return
    print("\nDone.")

In [13]:
def set_published_status():
    # forgot to set correct published status
    for article_data in articles:
        is_published = article_data["fields"]["published"]
        # strapi post status
        status = "published" if is_published else "draft"
        legacy_id = article_data["pk"]
        post = strapi_get_entries("posts", [("legacy_id", legacy_id)])[0]
        r = strapi_update_entry(
            "posts", post["id"], {"status": status}, headers=headers
        )
        assert r.status_code == 200
        print(".", end="")
    print("Done.")


set_published_status()

..........................................................................................................................................................................................................................Done.


In [16]:
strapi_published_count = len(
    strapi_get_entries("posts", [("status", "published"), ("_limit", 1000)])
)
dump_published_count = len([a for a in articles if a["fields"]["published"]])
assert strapi_published_count == dump_published_count

## Add Movies and related types

First, add the related contents:
- Countries
- Languages
- Categories

### Add Countries

In [10]:
def load_legacy_data(filename):
    with open(f"{project_dir}dumped-json-data/{filename}.json") as f:
        return json.load(f)


def add_countries():
    countries = load_legacy_data("countries")
    for country in countries:
        data = {
            "name": country["fields"]["name"],
            "code": country["fields"]["iso_3166_1"] or country["pk"],
            "legacy_id": country["pk"],
        }
        r = strapi_create_entry("countries", data, headers=headers)
        assert r.status_code == 200
        print(".", end="")
    print("Done.")

### Add Languages

In [22]:
def add_languages():
    languages = load_legacy_data("languages")
    for lang in languages:
        data = {
            "name": lang["fields"]["name"],
            "code": lang["fields"]["iso_639_1"] or lang["pk"],
            "legacy_id": lang["pk"],
        }
        r = strapi_create_entry("languages", data, headers=headers)
        assert r.status_code == 200
        print(".", end="")
    print("Done.")


add_languages()

..........................................................................................................................................................................................Done.


### Add Movie Categories

In [25]:
def add_movie_categories():
    cats = load_legacy_data("categories")
    for cat in cats:
        data = {"name": cat["fields"]["name"], "legacy_id": cat["pk"]}
        r = strapi_create_entry("movie-categories", data, headers=headers)
        assert r.status_code == 200
        print(".", end="")
    print("Done.")


add_movie_categories()

..........................................Done.


### Add Movies

In [3]:
def create_legacy_id_mapping(collection):
    """create { legacy_id: id } mapping for lookup"""
    return {int(item["legacy_id"]): int(item["id"]) for item in collection}


# get related data
# cats = create_legacy_id_mapping(strapi_get_entries('movie-categories', [('_limit', 1000)]))
# langs = create_legacy_id_mapping(strapi_get_entries('languages', [('_limit', 1000)]))
# countries = create_legacy_id_mapping(strapi_get_entries('countries', [('_limit', 1000)]))
# movies = load_legacy_data('movies')


def create_movies(movies):
    for movie in movies:
        # skipping already inserted movies
        #         if movie['pk'] <= 4138:
        #             continue
        m = movie["fields"]

        # upload image
        image_id = None
        if m["image"]:
            # THE STRAPI UPLOAD HAS PROBLEM WITH LARGISH FILES (~10MB)
            # CONSIDER RESIZING BIGGER IMAGES IF YOU HAVE PROBLEM WITH UPLOADS TIMING OUT
            r = strapi_upload_image(f"{media_dir}{m['image']}", headers=headers)
            assert r.status_code == 200
            image_id = r.json()[0]["id"]
        # prepare data

        data = {
            "title": m["name"],
            "title_mongolian": m["name_mongolian"],
            "title_original": m["name_original"],
            "slug": slugify(m["name"] + "-" + str(movie["pk"])),
            "year": m["year"] or 0,
            "synopsis": m["synopsis"],
            "silent": m["silent"],
            "length_min": m["length"] or 0,
            "trailer_url": m["trailer"],
            "imdb_url": m["imdb_url"],
            "legacy_id": movie["pk"],
            "languages": [langs[_id] for _id in m["languages"]],
            "countries": [countries[_id] for _id in m["country"]],
            "categories": [cats[_id] for _id in m["category"]],
        }
        if image_id:
            data["image"] = image_id
        r = strapi_create_entry("movies", data, headers=headers)
        if r.status_code != 200:
            print(r.content)
        assert r.status_code == 200
        print(".", end="")
    print("Done.")

### Add Movie Crew & Casts

In [11]:
persons = load_legacy_data("persons")
movie_teams = load_legacy_data("casts")
roles = load_legacy_data("castcategories")


def add_persons():
    for person in persons:
        # skipping already inserted movies
        if person["pk"] <= 4286:
            continue
        m = person["fields"]

        # upload image
        image_id = None
        if m["image"]:
            # THE STRAPI UPLOAD HAS PROBLEM WITH LARGISH FILES (~10MB)
            # CONSIDER RESIZING BIGGER IMAGES IF YOU HAVE PROBLEM WITH UPLOADS TIMING OUT
            r = strapi_upload_image(f"{media_dir}{m['image']}", headers=headers)
            assert r.status_code == 200
            image_id = r.json()[0]["id"]
        # prepare data

        data = {
            "name": m["name"],
            "name_mongolian": m["name_mongolian"],
            "bio": m["bio"] or "",
            "birth_year": m["birth_year"] or "",
            "death_year": m["death_year"] or "",
            "quote": m["quote"] or "",
            "legacy_id": person["pk"],
        }
        if image_id:
            data["image"] = image_id
        r = strapi_create_entry("people", data, headers=headers)
        if r.status_code != 200:
            print(r.content)
        assert r.status_code == 200
        print(".", end="")
    print("Done.")


def add_roles():
    for role in roles:
        data = {
            "name": role["fields"]["name"],
            "slug": role["fields"]["slug"],
            "weight": role["fields"]["weight"],
            "legacy_id": role["pk"],
        }
        r = strapi_create_entry("role-categories", data, headers=headers)
        if r.status_code != 200:
            print(r.content)
        assert r.status_code == 200
    print("Done.")

In [24]:
# we need to make a mapping of legacy_id: id for all cast relation objects
# don't want to get all the details, so let's use graphql here

#!pip install graphqlclient
from graphqlclient import GraphQLClient

client = GraphQLClient(f"{strapi_address}graphql")
client.inject_token(f"Bearer {jwt_token}")

# the limit will work after increasing Strapi GraphQL plugin amountLimit
variables = {"limit": 10000}
result = client.execute(
    """
query getDataForCasts($limit: Int) {
  movies (limit: $limit) {
    id
    legacy_id
  }
  
  people (limit: $limit) {
    id
    legacy_id
  }
  
  roleCategories (limit: $limit) {
    id
    legacy_id
  }
}
""",
    variables,
)
result = json.loads(result)

movies_map = create_legacy_id_mapping(result["data"]["movies"])
people_map = create_legacy_id_mapping(result["data"]["people"])
role_categories_map = create_legacy_id_mapping(result["data"]["roleCategories"])

In [26]:
from tqdm import tqdm


def add_movie_crew_and_casts():
    for role in tqdm(movie_teams):
        data = {
            "movie": movies_map[role["fields"]["film"]],
            "role_category": roleCategories_map[role["fields"]["cast"]],
            "person": people_map[role["fields"]["artist"]],
        }
        r = strapi_create_entry("movie-crew-and-casts", data, headers=headers)
        assert r.status_code == 200


add_movie_crew_and_casts()

100%|██████████| 20122/20122 [1:08:51<00:00,  4.87it/s]
