In [21]:
import gzip
import joblib
import pandas as pd
import sqlite3 as sq
import json

from functools import reduce
from operator import add
from itertools import chain
from toolz import reduceby

from src.db import (
    get_table,
)

In [22]:
def to_nested_dict(df, key1, key2, columns_keep=[]):
    if len(columns_keep):
        return {
            k1: df\
                [df[key1] == k1]\
                .drop(key1, axis=1)\
                .set_index(key2)\
                [columns_keep]\
                .to_dict(orient='index')
            for k1 in df[key1].unique()
        }
    else:
        return reduceby(
            key1,
            lambda d1, d2: d1 + [d2[key2]],
            df[[key1, key2]].to_dict(orient='records'),
            []
        )


def fillna_default(df):
    df[df.select_dtypes('object').columns] = df.select_dtypes('object').fillna('')
    df[df.select_dtypes('float64').columns] = df.select_dtypes('float64').fillna(0)
    return df


def get_nas_types(df):
    df_res = pd.concat([df.isna().sum().rename('nas'), df.dtypes.rename('type')], axis=1)
    return df_res[df_res['nas'] > 0].sort_values('type')


def compress_data(data, filename):
    # Convert to JSON
    json_data = json.dumps(data, indent=2)
    # Convert to bytes
    encoded = json_data.encode('utf-8')
    # Compress
    compressed = gzip.compress(encoded)
    f = open(filename, 'wb')
    f.write(compressed)

In [23]:
db = 'animedb.sqlite'

In [24]:
df_anime = get_table('anime', db, [
    'anime_id',
    'image_webp',
    'status',
    'scored_by',
    'rank',
    'rating'
])

df_anime_title = get_table('anime_title', db)

df_character = get_table('character', db, [
    'character_id',
    'name',
    'images_webp_image_url'
])

df_voiceactor = get_table('voiceactor', db, [
    'voiceactor_id',
    'name',
    'image_url',
    'favorites'
])

df_staff = get_table('staff', db, [
    'staff_id',
    'image_url',
    'name'
])

df_anime_character = get_table('anime_character', db)

df_character_voiceactor = get_table('character_voiceactor', db)

df_anime_staff = get_table('anime_staff', db)

In [25]:
df_anime_f = df_anime[
    ~df_anime['rating'].isin(['Rx - Hentai']) &
    (df_anime['scored_by'] > 0)
]

df_anime_f = df_anime_f\
    .merge(df_anime_title[df_anime_title['type'] == 'Default'])\
    .sort_values(['scored_by', 'title'], ascending=[False, True])\
    .drop(['type', 'rating'], axis=1)

df_anime_f['image_webp'] = df_anime_f['image_webp'].str[30:]
df_anime_f['scored_by'] = df_anime_f['scored_by'].fillna(0).astype(int)
df_anime_f['rank'] = df_anime_f['rank'].fillna(0).astype(int)

In [26]:
df_anime_f[df_anime_f['anime_id'] == 32768]

In [27]:
df_character_anime_f = df_character\
    .merge(df_anime_character)\
    .merge(df_anime_f[['anime_id', 'scored_by', 'title']])\
    .drop('role', axis=1)

df_character_anime_f['images_webp_image_url'] = df_character_anime_f['images_webp_image_url'].str[30:]

df_character_f = df_character_anime_f\
    .sort_values(['favorites', 'name'], ascending=[False, True])\
    .drop(['anime_id', 'scored_by', 'title'], axis=1)\
    .drop_duplicates(subset='character_id')

df_anime_character_ff = df_character_anime_f\
    .sort_values(['anime_id', 'favorites', 'name'], ascending=[True, False, True])\
    [['anime_id', 'character_id']]
df_character_anime_ff = df_character_anime_f\
    .sort_values(['character_id', 'scored_by', 'title'], ascending=[True, False, True])\
    [['character_id', 'anime_id']]
df_character_f

In [28]:
df_voiceactor_character_f = df_voiceactor\
    .merge(df_character_voiceactor)\
    .merge(df_character_f[['character_id', 'favorites', 'name']].rename({
        'favorites': 'favorites_c',
        'name': 'name_c'
    }, axis=1))
df_voiceactor_character_f['image_url'] = df_voiceactor_character_f['image_url'].str[30:]
df_voiceactor_f = df_voiceactor_character_f\
    .sort_values(['favorites', 'name'], ascending=[False, True])\
    .drop(['character_id', 'favorites_c', 'name_c'], axis=1)\
    .drop_duplicates(subset='voiceactor_id')
df_character_voiceactor_ff = df_voiceactor_character_f\
    .sort_values(['character_id', 'favorites', 'name'], ascending=[True, False, True])\
    [['character_id', 'voiceactor_id']]
df_voiceactor_character_ff = df_voiceactor_character_f\
    .sort_values(['voiceactor_id', 'favorites_c', 'name_c'], ascending=[True, False, True])\
    [['voiceactor_id', 'character_id']]

In [29]:
df_staff_anime_f = df_staff\
    .merge(df_anime_staff)\
    .merge(df_anime_f[['anime_id', 'scored_by', 'title']])
df_staff_anime_f['image_url'] = df_staff_anime_f['image_url'].str[30:]
df_staff_f = df_staff_anime_f\
    .sort_values(['scored_by', 'name'], ascending=[False, True])\
    .drop(['anime_id', 'position', 'scored_by', 'title'], axis=1)\
    .drop_duplicates(subset='staff_id')
df_anime_staff_ff = df_staff_anime_f\
    .sort_values(['anime_id', 'name'], ascending=[True, True])\
    [['anime_id', 'staff_id']]
df_staff_anime_ff = df_staff_anime_f\
    .sort_values(['staff_id', 'scored_by', 'title'], ascending=[True, False, True])\
    [['staff_id', 'anime_id']]

In [30]:
character_weights=[
    int(character_id)
    for character_id, favorites in (df_character_f.set_index('character_id')['favorites']//100).reset_index().values
    for _ in range(favorites)
]

In [31]:
def to_orient_dict(df, index):
    return df\
        .set_index(index, drop=False)\
        .to_dict(orient='index')


def to_orient_list(df, index):
    return {
        k:list(v.values())
        for k,v in to_orient_dict(df, index).items()
    }

In [32]:
def make_dict_data(
    df_anime_f,
    df_character_f,
    df_voiceactor_f,
    df_staff_f,
    df_anime_character_ff,
    df_character_anime_ff,
    df_anime_staff_ff,
    df_staff_anime_ff,
    df_character_voiceactor_ff,
    df_voiceactor_character_ff,
    character_weights,
    format_single,
    format_multiple
):
    return dict(
        anime = format_single(df_anime_f, 'anime_id'),
        character = format_single(df_character_f, 'character_id'),
        voiceactor = format_single(df_voiceactor_f, 'voiceactor_id'),
        staff = format_single(df_staff_f, 'staff_id'),
        anime_character = format_multiple(df_anime_character_ff, 'anime_id', 'character_id'),
        character_anime = format_multiple(df_character_anime_ff, 'character_id', 'anime_id'),
        anime_staff = format_multiple(df_anime_staff_ff, 'anime_id', 'staff_id'),
        staff_anime = format_multiple(df_staff_anime_ff, 'staff_id', 'anime_id'),
        character_voiceactor = format_multiple(df_character_voiceactor_ff, 'character_id', 'voiceactor_id'),
        voiceactor_character = format_multiple(df_voiceactor_character_ff, 'voiceactor_id', 'character_id'),
        character_weights = character_weights
    )

In [34]:
output_orient_dict = make_dict_data(
    df_anime_f = df_anime_f,
    df_character_f = df_character_f,
    df_voiceactor_f = df_voiceactor_f,
    df_staff_f = df_staff_f,
    df_anime_character_ff = df_anime_character_ff,
    df_character_anime_ff = df_character_anime_ff,
    df_anime_staff_ff = df_anime_staff_ff,
    df_staff_anime_ff = df_staff_anime_ff,
    df_character_voiceactor_ff = df_character_voiceactor_ff,
    df_voiceactor_character_ff = df_voiceactor_character_ff,
    character_weights = character_weights,
    format_single = to_orient_dict,
    format_multiple = to_nested_dict
)

In [None]:
output_orient_list = make_dict_data(
    df_anime_f = df_anime_f,
    df_character_f = df_character_f,
    df_voiceactor_f = df_voiceactor_f,
    df_staff_f = df_staff_f,
    df_anime_character_ff = df_anime_character_ff,
    df_character_anime_ff = df_character_anime_ff,
    df_anime_staff_ff = df_anime_staff_ff,
    df_staff_anime_ff = df_staff_anime_ff,
    df_character_voiceactor_ff = df_character_voiceactor_ff,
    df_voiceactor_character_ff = df_voiceactor_character_ff,
    character_weights = character_weights,
    format_single = to_orient_list,
    format_multiple = to_nested_dict
)

In [None]:
with open('animu_erdos/src/animedbdict.json', 'w') as f:
    json.dump(output_orient_dict, f)

with open('animu_erdos/src/animedblist.json', 'w') as f:
    json.dump(output_orient_list, f)

In [None]:
adsfasfasfa

In [None]:
s=df_anime_title.to_json(orient='records')
s[:1000]

In [None]:
df_anime_f = df_anime.merge(
    df_anime_title[df_anime_title['type'] == 'Default'][['anime_id', 'title']]
)[['anime_id', 'title', 'image_webp', 'score', 'scored_by', 'popularity']]

In [None]:
df_anime['id']      = ('A_' + df_anime['anime_id'].astype(str))
df_character['id']  = ('C_' + df_character['character_id'].astype(str))
df_voiceactor['id'] = ('V_' + df_voiceactor['voiceactor_id'].astype(str))
df_staff['id']      = ('S_' + df_staff['staff_id'].astype(str))

In [None]:
import networkx as nx
G = nx.Graph()

G.add_nodes_from(pd.concat([
    ('A_' + df_anime['anime_id'].astype(str)),
    ('C_' + df_character['character_id'].astype(str)),
    ('V_' + df_voiceactor['voiceactor_id'].astype(str)),
    ('S_' + df_staff['staff_id'].astype(str))
]).values)

In [None]:
G.add_edges_from(pd.concat([
    ('A_' + df_anime_character['anime_id'].astype(str)),
    ('C_' + df_anime_character['character_id'].astype(str))
], axis=1).values)

G.add_edges_from(pd.concat([
    ('C_' + df_character_voiceactor['character_id'].astype(str)),
    ('V_' + df_character_voiceactor['voiceactor_id'].astype(str))
], axis=1).values)

G.add_edges_from(pd.concat([
    ('A_' + df_anime_staff['anime_id'].astype(str)),
    ('S_' + df_anime_staff['staff_id'].astype(str))
], axis=1).values)


In [None]:
def df_to_node_attribute(df, exclude_cols, dict_constants):
    return df\
        .drop(columns=exclude_cols)\
        .set_index('id')\
        .assign(**dict_constants)\
        .to_dict(orient = 'index')

In [None]:
# node_attributes = {
#     **df_to_node_attribute(df_anime,      'anime_id',      dict(nodetype='anime')),
#     **df_to_node_attribute(df_character,  'character_id',  dict(nodetype='character')),
#     **df_to_node_attribute(df_voiceactor, 'voiceactor_id', dict(nodetype='voiceactor')),
#     **df_to_node_attribute(df_staff,      'staff_id',      dict(nodetype='staff')),
# }

In [None]:
# nx.set_node_attributes(G, node_attributes)

In [None]:
nx.write_graphml(G, 'web/animegraph.graphml')

In [None]:
nx.write_gexf(G, 'web/animegraph.gexf')

In [None]:
import json
with open('web/cytograph.json', 'w') as f:
    json.dump(nx.cytoscape_data(G), f)

In [None]:
from src.api import request_api_get

r = await request_api_get('users/basemp44/animelist')

In [None]:
r