In [2]:
import joblib
import pandas as pd
import sqlite3 as sq

from itertools import chain

from src.db import (
    execute_fetchall,
    get_schema,
    get_schemas,
    get_schemas,
    drop_table,
    get_tables,
    count_tables,
    get_table,
    update_db
)

from src.format import (
    format_anime_base,
    format_X,
    format_anime_X,
    format_anime_title,
    format_anime
)

In [6]:
db = 'animedb.sqlite'

In [7]:
get_schemas(db = db)

In [8]:
tables = get_tables(db = db)
count_tables(tables, db = db)

In [3]:
responses = joblib.load('anime_responses.pkl')
response_list = list(chain(*[r['data'] for r in responses]))

In [19]:
df_anime_new = pd\
    .concat([
        format_anime_base(anime)
        for anime in response_list]
    )\
    .assign(index=-1)\
    .set_index('index')

In [20]:
df_anime_old = get_table('anime', db = db)

In [39]:
df_anime_keep = pd\
    .concat([df_anime_old, df_anime_new])\
    .drop_duplicates(
        subset=['anime_id', 'status', 'aired_from', 'aired_from', 'aired_to'],
        keep='first'
    )
anime_ids = df_anime_keep[df_anime_keep.index < 0]['anime_id']
anime_ids

In [55]:
response_list_keep = [r for r in response_list if r['mal_id'] in anime_ids.values]
len(response_list_keep)

In [56]:
joblib.dump(response_list_keep, 'anime_responses_keep.pkl')

In [47]:
animesf = [format_anime(anime, i) for i,anime in enumerate(response_list_keep)]

In [48]:
df_anime = pd.concat([anime['df_anime'] for anime in animesf])
df_producer = pd.concat([anime['df_producer'] for anime in animesf]).drop_duplicates()
df_licensor = pd.concat([anime['df_licensor'] for anime in animesf]).drop_duplicates()
df_studio = pd.concat([anime['df_studio'] for anime in animesf]).drop_duplicates()
df_genre = pd.concat([anime['df_genre'] for anime in animesf]).drop_duplicates()
df_theme = pd.concat([anime['df_theme'] for anime in animesf]).drop_duplicates()
df_demographic = pd.concat([anime['df_demographic'] for anime in animesf]).drop_duplicates()
df_anime_producer = pd.concat([anime['df_anime_producer'] for anime in animesf])
df_anime_licensor = pd.concat([anime['df_anime_licensor'] for anime in animesf])
df_anime_studio = pd.concat([anime['df_anime_studio'] for anime in animesf])
df_anime_genre = pd.concat([anime['df_anime_genre'] for anime in animesf])
df_anime_theme = pd.concat([anime['df_anime_theme'] for anime in animesf])
df_anime_demographic = pd.concat([anime['df_anime_demographic'] for anime in animesf])
df_anime_title = pd.concat([anime['df_anime_title'] for anime in animesf])

In [50]:
update_db(df_anime,             'anime',             ['anime_id'],                   db=db)
update_db(df_producer,          'producer',          ['producer_id'],                db=db)
update_db(df_licensor,          'licensor',          ['licensor_id'],                db=db)
update_db(df_studio,            'studio',            ['studio_id'],                  db=db)
update_db(df_genre,             'genre',             ['genre_id'],                   db=db)
update_db(df_theme,             'theme',             ['theme_id'],                   db=db)
update_db(df_demographic,       'demographic',       ['demographic_id'],             db=db)
update_db(df_anime_producer,    'anime_producer',    ['anime_id', 'producer_id'],    db=db)
update_db(df_anime_licensor,    'anime_licensor',    ['anime_id', 'licensor_id'],    db=db)
update_db(df_anime_studio,      'anime_studio',      ['anime_id', 'studio_id'],      db=db)
update_db(df_anime_genre,       'anime_genre',       ['anime_id', 'genre_id'],       db=db)
update_db(df_anime_theme,       'anime_theme',       ['anime_id', 'theme_id'],       db=db)
update_db(df_anime_demographic, 'anime_demographic', ['anime_id', 'demographic_id'], db=db)
update_db(df_anime_title,       'anime_title',       ['anime_id', 'title'],          db=db)

In [54]:
count_tables(tables, db = db)