In [2]:
from IPython.core.display import display, HTML
from string import Template
import pandas as pd
import json, random
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def open_json(filename):
    with open(filename, "r", encoding="utf-8") as read_file:
        data = json.load(read_file)
    return data

In [76]:
class Person:
    def __init__(self, dictionary):
        self.id = dictionary['id']
        self.name = dictionary['name']
        self.gender = dictionary['gender']
        if 'pids' in dictionary.keys():
            self.pids = dictionary['pids']
        else:
            self.pids = []
        if 'mid' in dictionary.keys():
            self.mid = dictionary['mid']
        else:
            self.mid = -1
        if 'fid' in dictionary.keys():
            self.fid = dictionary['fid']
        else:
            self.fid = -1
        self.cids = []
        self.rank = -1


    def __repr__(self):
        result = str(self.id) + ', ' + self.name + ', ' + self.gender
        result += ', ' + str(self.pids)
        result += ', ' + str(self.mid)
        result += ', ' + str(self.fid)
        result += ', ' + str(self.cids)
        result += ', ' + str(self.rank)
        return '{' + result + '}'

In [77]:
def get_all_rank(tree, rank):
    result = []
    for id in tree:
        if tree[id].rank == str(rank):
            result.append(tree[id])
    return result

In [79]:
class FamilyTree:
    def __init__(self, tree_list):
        self.tree = {}
        if type(tree_list) is list:
            for node in tree_list:
                if type(node) is dict:
                    person = Person(node)
                    self.tree.update({person.id: person})
                else:
                    print("List must contains dict!")
        else:
            print("Tree must be list!")
        self.__add_children()
        self.__add_rank()

    def __add_children(self):
        result = self.tree
        for i in self.tree:
            if self.tree[i].mid != -1:
                result[self.tree[i].mid].cids.append(self.tree[i].id)
            if self.tree[i].fid != -1:
                result[self.tree[i].fid].cids.append(self.tree[i].id)
        self.tree = result

    def __add_rank(self):
        result = self.tree
        def add_rank_node(node, rank):
            if node.rank == -1:
                node.rank = str(rank)
                for n in node.pids:
                    add_rank_node(result[n], rank)
            else:
                rank+=1
                for n in node.cids:
                    add_rank_node(result[n], rank)
        add_rank_node(result[1], 0)
        self.tree = result

    def get_all_attr(self, attr_name):
        result = []
        for person in self.tree.values():
            result.append(getattr(person, attr_name))
        return result

    def get_dataframe(self):
        df_dict = {}
        for attr in ['id', 'name', 'gender', 'pids', 'mid', 'fid', 'cids', 'rank']:
            df_dict.update({attr:self.get_all_attr(attr)})
        df = pd.DataFrame.from_dict(df_dict)
        df.set_index('id', inplace=True)
        return df

    def get_persons_df(self):
        df_dict = {}
        for attr in ['id', 'name', 'gender', 'rank']:
            df_dict.update({attr:self.get_all_attr(attr)})
        df = pd.DataFrame.from_dict(df_dict)
        df.set_index('id', inplace=True)
        return df

    def get_links_df(self):
        df_dict = {}
        for attr in ['id', 'pids', 'cids']:
            df_dict.update({attr:self.get_all_attr(attr)})
        df = pd.DataFrame.from_dict(df_dict)
        df.set_index('id', inplace=True)
        return df

    def get_links_pair_df(self):
        df_dict = {'source_id':[], 'target_id':[]}
        for person in self.tree.values():
            for pid in person.pids:
                df_dict['source_id'].append(person.id)
                df_dict['target_id'].append(self.tree[pid].id)
            for сid in person.cids:
                df_dict['source_id'].append(person.id)
                df_dict['target_id'].append(self.tree[сid].id)
        df = pd.DataFrame.from_dict(df_dict)
        df.set_index('source_id', inplace=True)
        return df

In [80]:
family_tree = FamilyTree(open_json('data/tree3.json'))
family_tree.tree

{1: {1, Михаил Федорович, male, [2], -1, -1, [3], 0},
 2: {2, Евдокия Лукьяновна Стрешнева, female, [1], -1, -1, [3], 0},
 3: {3, Алексей Михайлович, male, [4, 5], 1, 2, [6, 7, 8, 18], 1},
 4: {4, Мария Ильинична Милославская, female, [3], -1, -1, [6, 7, 8], 1},
 5: {5, Наталья Кирилловна Нарышкина, female, [3], -1, -1, [18], 1},
 6: {6, Федор Алексеевич, male, [9], 3, 4, [], 2},
 7: {7, Софья Алексеевна, female, [], 3, 4, [], 2},
 8: {8, Иоанн V Алексеевич, male, [10], 3, 4, [11, 12], 2},
 9: {9, Марфа Матвеевна Апраксина, female, [6], -1, -1, [], 2},
 10: {10, Прасковья Федоровна Салтыкова, female, [8], -1, -1, [11, 12], 2},
 11: {11, Екатерина Иоановна, female, [13], 8, 10, [15], 3},
 12: {12, Анна Иоановна, female, [14], 8, 10, [], 3},
 13: {13, Карл Леопольд Мекленбург-Шверенский, male, [11], -1, -1, [15], 3},
 14: {14, Фридрих Вильгельм Курляндский, male, [12], -1, -1, [], 3},
 15: {15, Анна Леопольдовна, female, [16], 13, 11, [17], 4},
 16: {16, Антон Ульрих Брауншвейг-Вольфенбю

In [81]:
family_tree.get_dataframe()

Unnamed: 0_level_0,name,gender,pids,mid,fid,cids,rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Михаил Федорович,male,[2],-1,-1,[3],0
2,Евдокия Лукьяновна Стрешнева,female,[1],-1,-1,[3],0
3,Алексей Михайлович,male,"[4, 5]",1,2,"[6, 7, 8, 18]",1
4,Мария Ильинична Милославская,female,[3],-1,-1,"[6, 7, 8]",1
5,Наталья Кирилловна Нарышкина,female,[3],-1,-1,[18],1
6,Федор Алексеевич,male,[9],3,4,[],2
7,Софья Алексеевна,female,[],3,4,[],2
8,Иоанн V Алексеевич,male,[10],3,4,"[11, 12]",2
9,Марфа Матвеевна Апраксина,female,[6],-1,-1,[],2
10,Прасковья Федоровна Салтыкова,female,[8],-1,-1,"[11, 12]",2


In [82]:
family_tree.get_persons_df()

Unnamed: 0_level_0,name,gender,rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Михаил Федорович,male,0
2,Евдокия Лукьяновна Стрешнева,female,0
3,Алексей Михайлович,male,1
4,Мария Ильинична Милославская,female,1
5,Наталья Кирилловна Нарышкина,female,1
6,Федор Алексеевич,male,2
7,Софья Алексеевна,female,2
8,Иоанн V Алексеевич,male,2
9,Марфа Матвеевна Апраксина,female,2
10,Прасковья Федоровна Салтыкова,female,2


In [83]:
family_tree.get_links_df()

Unnamed: 0_level_0,pids,cids
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,[2],[3]
2,[1],[3]
3,"[4, 5]","[6, 7, 8, 18]"
4,[3],"[6, 7, 8]"
5,[3],[18]
6,[9],[]
7,[],[]
8,[10],"[11, 12]"
9,[6],[]
10,[8],"[11, 12]"


In [84]:
family_tree.get_links_pair_df()

Unnamed: 0_level_0,target_id
source_id,Unnamed: 1_level_1
1,2
1,3
2,1
2,3
3,4
...,...
38,40
39,38
39,40
40,41
