In [None]:
import time
import math
import requests
import json
import pickle
import csv
from datetime import date
import colorsys
import networkx as nx
from networkx.algorithms import community
import seaborn as sns
from IPython.display import clear_output

class Series:
    def __init__(self, series_id):
        self.series_id = series_id

In [None]:
# Anime or manga
is_anime = True

# Map parameters
if is_anime:
    threshold = 100 # 3 ideal for anime
else: 
    threshold = 2 # 2 ideal for manga

# Api parameters
api_attempts = 10
request_delay = 1
retry_delay = 60

# Initialization
queue = []
edge_list = []
series_list = {}
genre_list = {}
is_first_run = True
current_id = -1

export_csvs = False

In [None]:
# Get recommendation data, takes ~2-3 hours
 
if is_anime:
    api_address = "https://api.jikan.moe/v4/anime/"
else: 
    api_address = "https://api.jikan.moe/v4/manga/"
    
if is_first_run:
    queue.append(1) # Bebop(anime)/Monster(manga)
    is_first_run = False
else:
    queue.insert(0, current_id) # Run if connection cuts out

while len(queue) > 0:
    current_id = queue.pop(0)
    series = Series(current_id)
    
    # Get data
    endpoint = api_address + str(series.series_id) + "/"
    rec_endpoint = api_address + str(series.series_id) + "/recommendations"
    
    for attempt in range(api_attempts):
        try:
            series_data_json = requests.get(endpoint).json()
            series_data = series_data_json["data"]
            time.sleep(request_delay) # Wait for cached requests
            rec_data_json = requests.get(rec_endpoint).json()
            rec_data = rec_data_json["data"]
            time.sleep(request_delay) 
            api_success = True
        except:
            print("Retrying...")
            time.sleep(retry_delay)
            api_success = False
            continue
        break
    
    if not api_success:
        break

    # Stuff on the map
    series.name = series_data['title']
    series.img_url = series_data['images']['jpg']['image_url']
    
    # Stuff to include in search
    series.eng_name = series_data['title_english']
    series.jp_name = series_data['title_japanese']
    series.alt_names = series_data['title_synonyms']
    
    # Stuff to filter for
    series.type = series_data['type']
    series.score = series_data['score']
    series.members = series_data['members']
    series.rank = series_data['rank']
    series.popularity = series_data['popularity']
    series.favorites = series_data['favorites']
    
    # Extra stuff
    series.url = series_data['url']
    # series.synopsis = series_data['synopsis']
    
    # Anime/manga specific fields
    if is_anime:
        series.season = series_data['season']
        series.year = series_data['year']
        series.episodes = series_data['episodes']
        series.start_year = series_data['aired']['prop']['from']['year']
    else:
        series.chapters = series_data['chapters']
        series.volumes = series_data['volumes']
        series.start_year= series_data['published']['prop']['from']['year']
    
    # Genres
    series.genres = {}
    for genre in series_data['genres'] + series_data['explicit_genres'] + series_data['themes'] + series_data['demographics']:
        genre_id = genre['mal_id']
        genre_name = genre['name']
        series.genres[genre_id] = genre_name;
        if genre_id not in genre_list:
            genre_list[genre_id] = genre_name

    # Recommendations
    series.recs = {}
    for rec in rec_data:
        rec_id = rec['entry']['mal_id']
        rec_count = rec['votes']
        if rec_count < threshold:
            continue
        series.recs[rec_id] = rec_count
    
    # Add anime to visited dictionary, new anime to queue, new edges to edge list
    series_list[series.series_id] = series
    
    for rec_id in series.recs:
        if rec_id not in series_list:
            if rec_id not in queue:
                queue.append(rec_id)
            edge_list.append([series.series_id, rec_id, series.recs[rec_id]])
    
    clear_output()
    print(series.name + ' | ' + str(len(queue)))    

data_date = date.today()

In [None]:
if is_anime:
    pickle.dump(series_list, open('dumps/dump_series.p', 'wb'))
    pickle.dump(edge_list, open('dumps/dump_edges.p', 'wb'))
    pickle.dump(genre_list, open('dumps/dump_genres.p', 'wb'))
else:
    pickle.dump(series_list, open('dumps/dump_manga_series.p', 'wb'))
    pickle.dump(edge_list, open('dumps/dump_manga_edges.p', 'wb'))
    pickle.dump(genre_list, open('dumps/dump_manga_genres.p', 'wb'))

In [None]:
if is_anime:
    series_list = pickle.load(open('dumps/dump_series.p', 'rb'))
    edge_list = pickle.load(open('dumps/dump_edges.p', 'rb'))
    genre_list = pickle.load(open('dumps/dump_genres.p', 'rb'))
else:
    series_list = pickle.load(open('dumps/dump_manga_series.p', 'rb'))
    edge_list = pickle.load(open('dumps/dump_manga_edges.p', 'rb'))
    genre_list = pickle.load(open('dumps/dump_manga_genres.p', 'rb'))

In [None]:
print("Data: " + str(len(series_list)) + " series, " + 
                 str(len(edge_list)) + " edges, " + 
                 str(len(genre_list)) + " genres" )

In [None]:
# Generate communities
G = nx.Graph()
G.add_nodes_from(series_list)
G.add_weighted_edges_from(edge_list)

seed = 69

# Detect communities using louvain method
communities = community.louvain_communities(G, seed=seed)
print("Generated " + str(len(communities)) + " communities")

# Generate positions
pos = nx.circular_layout(G)
positions = nx.spring_layout(G, k=1, iterations=5000, scale=10000, pos=pos, seed=seed)

In [None]:
# Generate colors
n = len(communities)
hls_tuples = [(x*1.0/n, 0.8, 0.8) for x in range(n)]
rgb_tuples = [colorsys.hls_to_rgb(*hls) for hls in hls_tuples]
hex_colors = ['#%02x%02x%02x' % tuple(int(x*255) for x in rgb) for rgb in rgb_tuples]
sns.palplot(hex_colors)
print(hex_colors)

# Set community and color
for idx, node_ids in enumerate(communities):
    for n in node_ids:
        series_list[n].community = idx
        series_list[n].color = hex_colors[idx]


# Generate modified positions
modified_positions = positions
if is_anime:
    # To keep the map fairly consistent, going to have Code Geass in top left, K-On in bottom right, and Haikyuu in top right
    code_geass = 1575
    k_on = 5680
    haikyuu = 20583

    # shift the graph so that it is centered at Code Geass and K-On
    modified_positions = { k:([v[0] - modified_positions[code_geass][0] , 
                               v[1] - modified_positions[code_geass][1]]) 
                           for (k, v) in modified_positions.items() }

    # shift Code Geass to the origin
    kon_angle = math.atan2(modified_positions[k_on][1], modified_positions[k_on][0])

    # rotate graph so that Code Geass is horizontal to K-On, with Code Geass on the left
    r_angle = -kon_angle
    modified_positions = { k:([v[0]*math.cos(r_angle) - v[1]*math.sin(r_angle), 
                               v[0]*math.sin(r_angle) + v[1]*math.cos(r_angle)]) 
                           for (k, v) in modified_positions.items() }


    # flip graph if Haikyuu is below Code Geass
    if (modified_positions[haikyuu][1] < modified_positions[code_geass][1]):   
        modified_positions = { k:([v[0] , -v[1]]) for (k, v) in modified_positions.items() }

    # rotate graph -45 degrees
    r_angle2 = math.radians(-45) 
    modified_positions = { k:([v[0]*math.cos(r_angle2) - v[1]*math.sin(r_angle2), 
                               v[0]*math.sin(r_angle2) + v[1]*math.cos(r_angle2)]) 
                           for (k, v) in modified_positions.items() }

    # shift the graph to the midpoint between Code Geass and K-On
    modified_positions = { k:([v[0] - modified_positions[k_on][0]/2 , 
                               v[1] - modified_positions[k_on][1]/2]) 
                           for (k, v) in modified_positions.items() }

    # flip graph since sigma flips y axis
    modified_positions = { k:([v[0] , -v[1]]) for (k, v) in modified_positions.items() }
    
    print("Code Geass: " + str(modified_positions[code_geass][0]) + ", " + str(modified_positions[code_geass][1]) + 
          " | K-ON: " + str(modified_positions[k_on][0]) + ", " + str(modified_positions[k_on][1]) +
          " | Haikyuu: " + str(modified_positions[haikyuu][0]) + ", " + str(modified_positions[haikyuu][1]))

# assign positions
for series in series_list.values():
    series.xpos = modified_positions[series.series_id][0]
    series.ypos = modified_positions[series.series_id][1]
    series.weighted_degree = G.degree(series.series_id, 'weight')

# assign node sizes
# We scale nodes logarithmically, then scale again to the max value
max_weighted_degree = max([s.weighted_degree for s in series_list.values()])
for series in series_list.values():
    series.size = math.log(series.weighted_degree) / math.log(max_weighted_degree)

In [None]:
elements = {}

sig_figs = 6
img_base_url = 'https://cdn.myanimelist.net/images/'
link_base_url = 'https://myanimelist.net/'

elements['nodes'] = []
for series in series_list.values():
    node = {}
    node['id'] = series.series_id
    node['name'] = series.name
    node['engName'] = series.eng_name
    node['jpName'] = series.jp_name
    node['altNames'] = series.alt_names
    node['imgUrlPath'] = (series.img_url).replace(img_base_url, '')
    node['urlPath'] = (series.url).replace(link_base_url, '')
    node['weightedDegree'] = series.weighted_degree
    node['type'] = series.type
    node['score'] = series.score
    node['members'] = series.members
    node['favorites'] = series.favorites
    node['rank'] = series.rank
    node['popularity'] = series.popularity
    node['genres'] = list(series.genres.keys())
    node['xPos'] = round(series.xpos, sig_figs)
    node['yPos'] = round(series.ypos, sig_figs)
    node['community'] = series.community
    node['color'] = series.color
    node['size'] = round(series.size, sig_figs)
    node['startYear'] = series.start_year
    
    if is_anime:
        node['season'] = series.season
        node['year'] = series.year
        node['episodes'] = series.episodes
    else:
        node['chapters'] = series.chapters
        node['volumes'] = series.volumes
    
    elements['nodes'].append(node)

elements['edges'] = []
max_edge_weight = max([e[2] for e in edge_list])
for edge in edge_list:
    e = {}
    e['id'] = str(edge[0])+"-"+str(edge[1])
    e['source'] = edge[0]
    e['target'] = edge[1]
    e['weight'] = edge[2]
    e['size'] = round(math.log(edge[2]) / math.log(max_edge_weight), sig_figs)
    
    elements['edges'].append(e)

elements['genres'] = []
for (k, v) in genre_list.items():
    g = {}
    g['id'] = k
    g['name'] = v

    elements['genres'].append(g)

m = {}
m['lastUpdated'] = data_date.strftime("%B %d, %Y")
m['imgBaseUrl'] = img_base_url
m['linkBaseUrl'] = link_base_url
elements['metadata'] = m

if is_anime:
    with open('data/anime_data.json', 'w', encoding='utf8') as json_file:
        json.dump(elements, json_file, ensure_ascii=False)
else:
    with open('data/manga_data.json', 'w', encoding='utf8') as json_file:
        json.dump(elements, json_file, ensure_ascii=False)

In [None]:
# Generate .csv files for import into Gephi
if export_csvs:
    # Export edges
    with open('dumps/edges.csv', 'w', encoding="utf-8") as csv_file:
        writer = csv.writer(csv_file, delimiter=',')
        writer.writerow(["Source", "Target", "Weight", "Type"])
        for e in edge_list:
            line = list(e)
            line.append("Undirected")
            writer.writerow(line)

    # Export nodes
    with open('dumps/nodes.csv', 'w', encoding="utf-8") as csv_file:
        writer = csv.writer(csv_file, delimiter=',')
        writer.writerow(["ID", "Label", "Image", "Community"])
        for a in series_list.values():
            writer.writerow([a.series_id, a.name, a.img_url, a.community])