In [3]:
import joblib
import pandas as pd
import sqlite3 as sq
import json

from functools import reduce
from operator import add
from itertools import chain
from toolz import reduceby

from src.db import (
    get_table,
)

In [4]:
def to_nested_dict(df, key1, key2, columns_keep=[]):
    if len(columns_keep):
        return {
            k1: df\
                [df[key1] == k1]\
                .drop(key1, axis=1)\
                .set_index(key2)\
                [columns_keep]\
                .to_dict(orient='index')
            for k1 in df[key1].unique()
        }
    else:
        return reduceby(
            key1,
            lambda d1, d2: d1 + [d2[key2]],
            df[[key1, key2]].to_dict(orient='records'),
            []
        )

In [5]:
db = 'animedb.sqlite'

In [16]:
df_anime = get_table('anime', db)[[
    'anime_id',
    'url',
    'image_jpg',
    'image_jpg_small',
    'image_jpg_large',
    'image_webp',
    'image_webp_small',
    'image_webp_large',
    'type',
    'source',
    'episodes',
    'status',
    'aired_from',
    'aired_to',
    'duration',
    'rating',
    'score',
    'scored_by',
    'rank',
    'popularity'
]]

df_anime_title          = get_table('anime_title', db)
df_character            = get_table('character', db)
df_voiceactor           = get_table('voiceactor', db)[[
    'voiceactor_id',
    'name',
    'url',
    'image_url',
    'favorites'
]]
df_staff                = get_table('staff', db)
df_anime_character      = get_table('anime_character', db)
df_character_voiceactor = get_table('character_voiceactor', db)
df_anime_staff          = get_table('anime_staff', db)


In [7]:
df_character = df_character\
    .merge(
        df_anime_character\
            .sort_values('favorites', ascending=False)\
            .drop_duplicates(subset='character_id')\
            [['character_id', 'favorites']],
        on='character_id'
    ).merge(
        df_anime_character\
            .sort_values('role')\
            .drop_duplicates(subset='character_id')\
            [['character_id', 'role']],
        on='character_id'
    ).sort_values('favorites')

In [8]:
df_anime                = df_anime[~df_anime['rating'].isin(['Rx - Hentai']) & (df_anime['scored_by']>10_000)].sort_values('scored_by', ascending=False).fillna('')
df_anime_title          = df_anime_title[df_anime_title['anime_id'].isin(df_anime['anime_id'])].fillna('')
df_anime_f = df_anime.merge(
    df_anime_title[df_anime_title['type'] == 'Default'][['anime_id', 'title']]
)

df_anime_character      = df_anime_character[df_anime_character['anime_id'].isin(df_anime['anime_id'])].sort_values('favorites', ascending=False).fillna('')
df_anime_staff          = df_anime_staff[df_anime_staff['anime_id'].isin(df_anime['anime_id'])].fillna('')
df_character_voiceactor = df_character_voiceactor[df_character_voiceactor['character_id'].isin(df_anime_character['character_id'])].fillna('')
df_character            = df_character[df_character['character_id'].isin(df_anime_character['character_id'])].fillna('')
df_voiceactor           = df_voiceactor[df_voiceactor['voiceactor_id'].isin(df_character_voiceactor['voiceactor_id'])].fillna('')
df_staff                = df_staff[df_staff['staff_id'].isin(df_anime_staff['staff_id'])].fillna('')

df_anime_staff = df_anime_staff[df_anime_staff['position'].str.startswith('Theme Song')]
df_staff = df_staff[df_staff['staff_id'].isin(df_anime_staff['staff_id'].unique())]

In [9]:
df_anime_f['anime_id'].head()

In [8]:
# Custom filters
TEST_ANIMES = [16498, 5081]
df_anime_f              = df_anime_f[df_anime_f['anime_id'].isin(TEST_ANIMES)]
df_anime_character      = df_anime_character[df_anime_character['anime_id'].isin(TEST_ANIMES)]
df_anime_staff          = df_anime_staff[df_anime_staff['anime_id'].isin(TEST_ANIMES)]
df_character            = df_character[df_character['character_id'].isin(df_anime_character['character_id'])]
df_voiceactor           = df_voiceactor[df_voiceactor['voiceactor_id'].isin(df_character_voiceactor['voiceactor_id'])]
df_staff                = df_staff[df_staff['staff_id'].isin(df_anime_staff['staff_id'])]
df_character_voiceactor = df_character_voiceactor[df_character_voiceactor['character_id'].isin(df_anime_character['character_id'])]

In [9]:
character_weights=[
    int(character_id)
    for character_id, favorites in (df_character.set_index('character_id')['favorites']//100).reset_index().values
    for _ in range(favorites)
]

In [10]:
final_dict = dict(
    anime = df_anime_f.set_index('anime_id', drop=False).to_dict(orient='index'),
    character = df_character.set_index('character_id', drop=False).to_dict(orient='index'),
    voiceactor = df_voiceactor.set_index('voiceactor_id', drop=False).to_dict(orient='index'),
    staff = df_staff.set_index('staff_id', drop=False).to_dict(orient='index'),
    anime_character = to_nested_dict(df_anime_character, 'anime_id', 'character_id'),
    character_anime = to_nested_dict(df_anime_character, 'character_id', 'anime_id'),
    anime_staff = to_nested_dict(df_anime_staff[['anime_id', 'staff_id']].drop_duplicates(), 'anime_id', 'staff_id'),
    staff_anime = to_nested_dict(df_anime_staff[['anime_id', 'staff_id']].drop_duplicates(), 'staff_id', 'anime_id'),
    character_voiceactor = to_nested_dict(df_character_voiceactor, 'character_id', 'voiceactor_id'),
    voiceactor_character = to_nested_dict(df_character_voiceactor, 'voiceactor_id', 'character_id'),
    character_weights = character_weights
)

In [37]:
with open('animu_erdos/src/animedbtest.json', 'w') as f:
    json.dump(final_dict, f)

In [38]:
adsfasfasfa

In [None]:
s=df_anime_title.to_json(orient='records')
s[:1000]

In [None]:
df_anime_f = df_anime.merge(
    df_anime_title[df_anime_title['type'] == 'Default'][['anime_id', 'title']]
)[['anime_id', 'title', 'image_webp', 'score', 'scored_by', 'popularity']]

In [None]:
df_anime['id']      = ('A_' + df_anime['anime_id'].astype(str))
df_character['id']  = ('C_' + df_character['character_id'].astype(str))
df_voiceactor['id'] = ('V_' + df_voiceactor['voiceactor_id'].astype(str))
df_staff['id']      = ('S_' + df_staff['staff_id'].astype(str))

In [None]:
import networkx as nx
G = nx.Graph()

G.add_nodes_from(pd.concat([
    ('A_' + df_anime['anime_id'].astype(str)),
    ('C_' + df_character['character_id'].astype(str)),
    ('V_' + df_voiceactor['voiceactor_id'].astype(str)),
    ('S_' + df_staff['staff_id'].astype(str))
]).values)

In [None]:
G.add_edges_from(pd.concat([
    ('A_' + df_anime_character['anime_id'].astype(str)),
    ('C_' + df_anime_character['character_id'].astype(str))
], axis=1).values)

G.add_edges_from(pd.concat([
    ('C_' + df_character_voiceactor['character_id'].astype(str)),
    ('V_' + df_character_voiceactor['voiceactor_id'].astype(str))
], axis=1).values)

G.add_edges_from(pd.concat([
    ('A_' + df_anime_staff['anime_id'].astype(str)),
    ('S_' + df_anime_staff['staff_id'].astype(str))
], axis=1).values)


In [None]:
def df_to_node_attribute(df, exclude_cols, dict_constants):
    return df\
        .drop(columns=exclude_cols)\
        .set_index('id')\
        .assign(**dict_constants)\
        .to_dict(orient = 'index')

In [None]:
# node_attributes = {
#     **df_to_node_attribute(df_anime,      'anime_id',      dict(nodetype='anime')),
#     **df_to_node_attribute(df_character,  'character_id',  dict(nodetype='character')),
#     **df_to_node_attribute(df_voiceactor, 'voiceactor_id', dict(nodetype='voiceactor')),
#     **df_to_node_attribute(df_staff,      'staff_id',      dict(nodetype='staff')),
# }

In [None]:
# nx.set_node_attributes(G, node_attributes)

In [None]:
nx.write_graphml(G, 'web/animegraph.graphml')

In [None]:
nx.write_gexf(G, 'web/animegraph.gexf')

In [None]:
import json
with open('web/cytograph.json', 'w') as f:
    json.dump(nx.cytoscape_data(G), f)

In [None]:
from src.api import request_api_get

r = await request_api_get('users/basemp44/animelist')

In [None]:
r