In [1]:
import pandas as pd

anime = pd.read_json('data/anime.json')

In [2]:
from collections import defaultdict

sequels_dic = defaultdict(set)
sequels_list = []

for _, row in anime.iterrows():
    for relation_type, list_of_dics in row['related'].items():
        anime1 = row['mal_id']
        for dic in list_of_dics:
            if dic['type'] == 'anime':
                anime2 = dic['mal_id']
                if relation_type == 'Sequel' and anime2 not in sequels_dic[anime1]:
                    sequels_dic[anime1].add(anime2)
                    sequels_list.append((anime1, anime2))
                if relation_type == 'Prequel' and anime1 not in sequels_dic[anime1]:
                    sequels_dic[anime2].add(anime1)
                    sequels_list.append((anime2, anime1))

In [3]:
import networkx as nx

G = nx.Graph()
G.add_nodes_from(anime.mal_id)
G.add_edges_from(sequels_list)
components = list(nx.connected_components(G))

In [4]:
class Series:
    def __init__(self, comp):
        self.components = comp
        self.animes = anime.loc[anime.mal_id.isin(comp)]
        self.max = self.animes.mal_members.argmax()

        self.title = self.animes.iloc[self.max].title
        self.popularity = self.animes.iloc[self.max].mal_members
        self.score = (self.animes.mal_score * self.animes.mal_members).sum() / self.animes.mal_members.sum()
        self.image_url = self.animes.iloc[self.max].image_url
        self.episodes = self.animes.episodes.sum()
        self.seasons = len(self.animes)
        self.synopsis = self.animes.iloc[self.max].synopsis
        self.date = self.animes.iloc[self.max].aired_start
        self.genres = self.animes.iloc[self.max].genres
    
    def __str__(self):
        return f'Title: {self.title}, popularity: {self.popularity}, score: {self.score}'
    
    def to_dict(self, series_id):
        return {'series_id': series_id, 'title': self.title, 'popularity': self.popularity, 'score': self.score, 'image_url': self.image_url,
                'episodes': self.episodes, 'seasons': self.seasons, 'synopsis': self.synopsis, 'date': self.date, 'genres': self.genres}

In [5]:
series_list = [Series(comp) for comp in components]

  self.score = (self.animes.mal_score * self.animes.mal_members).sum() / self.animes.mal_members.sum()


In [6]:
series_list.sort(key=lambda x: -x.popularity)

In [7]:
dics = [series_list[i].to_dict(i) for i in range(len(series_list))]

In [8]:
g = pd.DataFrame(dics)
g.to_json('series.json', indent=4, orient='records')