In [1]:
from genson import SchemaBuilder
from itertools import chain
import joblib

In [2]:
responses = joblib.load('anime_responses.pkl')
response_list = list(chain(*[r['data'] for r in responses]))

In [3]:
builder = SchemaBuilder()
builder.add_schema({"type": "object", "properties": {}})
for anime in response_list:
    builder.add_object(anime)
builder.to_schema()

In [65]:
import pandas as pd

def format_anime_base(anime):
    return pd.DataFrame([
        dict(
            anime_id = anime['mal_id'],
            url = anime['url'],
            image_jpg = anime['images'].get('jpg', dict()).get('image_url'),
            image_jpg_small = anime['images'].get('jpg', dict()).get('small_image_url'),
            image_jpg_large = anime['images'].get('jpg', dict()).get('large_image_url'),
            image_webp = anime['images'].get('webp', dict()).get('image_url'),
            image_webp_small = anime['images'].get('webp', dict()).get('small_image_url'),
            image_webp_large = anime['images'].get('webp', dict()).get('large_image_url'),
            approved = anime['approved'],
            type = anime['type'],
            source = anime['source'],
            episodes = anime['episodes'],
            status = anime['status'],
            airing = anime['airing'],
            aired_from = anime['aired'].get('from'),
            aired_to = anime['aired'].get('to'),
            duration = anime['duration'],
            rating = anime['rating'],
            score = anime['score'],
            scored_by = anime['scored_by'],
            rank = anime['rank'],
            popularity = anime['popularity'],
            season = anime['season'],
            year = anime['year']
        )
    ])


def format_X(anime, key):
    return pd\
        .DataFrame(anime[f'{key}s'])\
        .rename(dict(mal_id=f'{key}_id'), axis=1)


def format_anime_X(anime, key):
    return pd\
        .concat([
            pd.DataFrame(columns=['mal_id', 'type', 'name', 'url']),
            pd.DataFrame(anime[f'{key}s'])\
        ])\
        .assign(anime_id = anime['mal_id'])\
        [['anime_id', f'mal_id']]\
        .rename(dict(mal_id=f'{key}_id'), axis=1)



def format_anime_titles(anime):
    return pd\
        .DataFrame(anime['titles'])\
        .assign(anime_id = anime['mal_id'])\
        [['anime_id', 'title', 'type']]


def format_anime(anime,i):
    if not i % 100:
        print(i)

    return dict(
        df_anime             = format_anime_base(anime),
        df_producer          = format_X(anime, 'producer'),
        df_licensor          = format_X(anime, 'licensor'),
        df_studio            = format_X(anime, 'studio'),
        df_genre             = format_X(anime, 'genre'),
        df_theme             = format_X(anime, 'theme'),
        df_demographic       = format_X(anime, 'demographic'),
        df_anime_producer    = format_anime_X(anime, 'producer'),
        df_anime_licensor    = format_anime_X(anime, 'licensor'),
        df_anime_studio      = format_anime_X(anime, 'studio'),
        df_anime_genre       = format_anime_X(anime, 'genre'),
        df_anime_theme       = format_anime_X(anime, 'theme'),
        df_anime_demographic = format_anime_X(anime, 'demographic'),
        df_anime_titles      = format_anime_titles(anime)
    )

In [66]:
animesf = [format_anime(anime,i) for i,anime in enumerate(response_list)]

In [79]:
df_anime = pd.concat([anime['df_anime'] for anime in animesf])
df_producer = pd.concat([anime['df_producer'] for anime in animesf]).drop_duplicates()
df_licensor = pd.concat([anime['df_licensor'] for anime in animesf]).drop_duplicates()
df_studio = pd.concat([anime['df_studio'] for anime in animesf]).drop_duplicates()
df_genre = pd.concat([anime['df_genre'] for anime in animesf]).drop_duplicates()
df_theme = pd.concat([anime['df_theme'] for anime in animesf]).drop_duplicates()
df_demographic = pd.concat([anime['df_demographic'] for anime in animesf]).drop_duplicates()
df_anime_producer = pd.concat([anime['df_anime_producer'] for anime in animesf])
df_anime_licensor = pd.concat([anime['df_anime_licensor'] for anime in animesf])
df_anime_studio = pd.concat([anime['df_anime_studio'] for anime in animesf])
df_anime_genre = pd.concat([anime['df_anime_genre'] for anime in animesf])
df_anime_theme = pd.concat([anime['df_anime_theme'] for anime in animesf])
df_anime_demographic = pd.concat([anime['df_anime_demographic'] for anime in animesf])
df_anime_titles = pd.concat([anime['df_anime_titles'] for anime in animesf])

In [80]:
import sqlite3 as sq

conn = sq.connect('mydb.sqlite')
df_anime.to_sql(             'anime',             conn, if_exists='append',index=False)
df_producer.to_sql(          'producer',          conn, if_exists='append',index=False)
df_licensor.to_sql(          'licensor',          conn, if_exists='append',index=False)
df_studio.to_sql(            'studio',            conn, if_exists='append',index=False)
df_genre.to_sql(             'genre',             conn, if_exists='append',index=False)
df_theme.to_sql(             'theme',             conn, if_exists='append',index=False)
df_demographic.to_sql(       'demographic',       conn, if_exists='append',index=False)
df_anime_producer.to_sql(    'anime_producer',    conn, if_exists='append',index=False)
df_anime_licensor.to_sql(    'anime_licensor',    conn, if_exists='append',index=False)
df_anime_studio.to_sql(      'anime_studio',      conn, if_exists='append',index=False)
df_anime_genre.to_sql(       'anime_genre',       conn, if_exists='append',index=False)
df_anime_theme.to_sql(       'anime_theme',       conn, if_exists='append',index=False)
df_anime_demographic.to_sql( 'anime_demographic', conn, if_exists='append',index=False)
df_anime_titles.to_sql(      'anime_titles',      conn, if_exists='append',index=False)
conn.commit()
conn.close()


In [83]:
con = sq.connect('mydb.sqlite')
cursor = con.cursor()
cursor.execute("SELECT * FROM sqlite_master WHERE type='table';")
df_types = pd.DataFrame(cursor.fetchall())


In [86]:
df_types[4].values

In [None]:
CREATE TABLE anime (
    anime_id INTEGER PRIMARY KEY,
    url TEXT,
    image_jpg TEXT,
    image_jpg_small TEXT,
    image_jpg_large TEXT,
    image_webp TEXT,
    image_webp_small TEXT,
    image_webp_large TEXT,
    approved INTEGER,
    type TEXT,
    source TEXT,
    episodes INTEGER,
    status TEXT,
    airing INTEGER,
    aired_from TEXT,
    aired_to TEXT,
    duration TEXT,
    rating TEXT,
    score TEXT,
    scored_by INTEGER,
    rank INTEGER,
    popularity INTEGER,
    season TEXT,
    year INTEGER
);

CREATE TABLE producer (
    producer_id INTEGER PRIMARY KEY,
    type TEXT,
    name TEXT,
    url TEXT
);

CREATE TABLE licensor (
    licensor_id INTEGER PRIMARY KEY,
    type TEXT,
    name TEXT,
    url TEXT
);

CREATE TABLE studio (
    studio_id INTEGER PRIMARY KEY,
    type TEXT,
    name TEXT,
    url TEXT
);

CREATE TABLE genre (
    genre_id INTEGER PRIMARY KEY,
    type TEXT,
    name TEXT,
    url TEXT
);

CREATE TABLE theme (
    theme_id INTEGER PRIMARY KEY,
    type TEXT,
    name TEXT,
    url TEXT
);

CREATE TABLE demographic (
    demographic_id INTEGER PRIMARY KEY,
    type TEXT,
    name TEXT,
    url TEXT
);

CREATE TABLE anime_producer (
    anime_id INTEGER,
    producer_id INTEGER,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
    FOREIGN KEY(producer_id) REFERENCES producer(producer_id),
);

CREATE TABLE anime_licensor (
    anime_id INTEGER,
    licensor_id INTEGER,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
    FOREIGN KEY(licensor_id) REFERENCES licensor(licensor_id),
);

CREATE TABLE anime_studio (
    anime_id INTEGER,
    studio_id INTEGER,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
    FOREIGN KEY(studio_id) REFERENCES studio(studio_id),
);

CREATE TABLE anime_genre (
    anime_id INTEGER,
    genre_id INTEGER,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
    FOREIGN KEY(genre_id) REFERENCES genre(genre_id),
);

CREATE TABLE anime_theme (
    anime_id INTEGER,
    theme_id INTEGER,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
    FOREIGN KEY(theme_id) REFERENCES theme(theme_id),
);

CREATE TABLE anime_demographic (
    anime_id INTEGER,
    demographic_id INTEGER,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
    FOREIGN KEY(demographic_id) REFERENCES demographic(demographic_id),
);

CREATE TABLE anime_titles (
    anime_id INTEGER,
    title TEXT,
    type TEXT,
    FOREIGN KEY(anime_id) REFERENCES anime(anime_id),
);

CREATE
