# Knowledge Extraction

In [93]:
import json, requests, re
import numpy as np, pandas as pd

import geovpylib.database as db
import geovdata.sparql as sparql
import geovdata.kit as kit

import spacy
import scipy
from pyvis.network import Network


db.connect_yellow('switzerland_and_beyond')

def ask_ollama(prompt, model='mistral'):
    url='http://localhost:11434/api/generate'
    response = requests.post(url, json={'model':model,'prompt':prompt, 'option':{'temperature':0}})
    text = response.text.strip()
    lines = text.split('\n')
    tokens = list(map(lambda line: json.loads(line)['response'], lines))
    formated = ''.join(tokens)
    answer = formated.strip()
    return answer

def analyze(string):
    doc2 = nlp(string)
    print("Tokens:")
    for token in doc2:
        print(f"{token.text} ({token.pos_}-{token.dep_})", end=" ")
    print()
    print("Entities:")
    for ent in doc2.ents:
        print(ent.text, ent.label_)

nlp = spacy.load('en_core_web_lg')
nlp.add_pipe("merge_entities")

[DB] Requests will not be executed
[DB] Connecting to YELLOW database "switzerland_and_beyond" ... Connected!


<function spacy.pipeline.functions.merge_entities(doc: spacy.tokens.doc.Doc)>

# Raw text

In [2]:
persons = db.query(f'select * from hls.person')

person = persons.sample(1).iloc[0]
name = person['name']
notice = person['notice']

print(kit.wrap(notice))

Naît le 11.10.1940 à Schaffhouse, protestant, de Schattenhalb, Meilen et Zurich. Entrepreneur, politicien zurichois, membre de l'Union démocratique du
centre (UDC), conseiller national, conseiller fédéral, figure marquante de la transformation de l'UDC en parti national-conservateur disposant d'une
large base électorale.


# Pseudo structured data

In [44]:
llm_response = ask_ollama(f"""
Provide me all statements you understand from the following text.
Statements should be short, concise like RDF triples, but in understandable text.
If date are provided, they should have the following format: day.month.year.
Names should be fully written, each time.
Nothing should be implied.
                    
Text: "About {name}. {notice}"
""")
# print(llm_response)

In [46]:
assertions = llm_response.split("\n")
assertions = [text[text.find(' ') + 1:] for text in assertions]

for assertion in assertions:
    print(assertion)

Christoph Blocher was born on 11th.10.1940 in Schaffhouse.
Christoph Blocher is protestant.
Christoph Blocher comes from Schattenhalb, Meilen and Zurich.
Christoph Blocher is an entrepreneur and a politician from Zurich.
Christoph Blocher is a member of the Union démocratique du centre (UDC).
Christoph Blocher is a national counselor.
Christoph Blochers is a federal counselor.
Christoph Blocher is a notable figure in the transformation of UDC into a national-conservative party.
Christoph Blocher has a large electoral base.


# Get the graph

**Tool functions**

In [111]:
# To have a unique index
index = 0
def get_index():
    global index
    index += 1
    return index


# Parse a date: 31.12.2020 => (2020, 12, 31)
def parse_date(number):
    splitted = number.split('.')
    if len(splitted) == 3: return (int(splitted[2]), int(splitted[1]), int(splitted[0].replace('th', '')))
    print(number)
    return int(number)


# Keep knowledge of entity
entities = []
def get_entity(klass, label):
    global entities

    same = list(filter(lambda entity: entity['class'] == klass, entities))
    same = list(filter(lambda entity: entity['label'] == label, same))

    if len(same) == 1: return same[0]
    else: 
        to_return = {
            'pk': get_index(),
            'class': klass,
            'label': f"{label.title()}\n({klass})"
        }
        entities.append(to_return)
        return to_return


def get_label_of_entity(pk):
    global entities
    same = list(filter(lambda entity: entity['pk'] == pk, entities))
    return same[0]['label']

# all_colors = ['#f3c169', '#83ea1d', '#092524', '#a784a6', '#130551', '#4d0190', '#40b71b', '#215ef9', '#f9f357', '#586e11', '#e907bd', '#9f0a58', '#2b32f8', '#7a8e6b', '#436c81', '#dad258', '#eaaba4', '#8e5188', '#06889b', '#95b64f', '#7e99f6', '#e88b47', '#9e4a83', '#a117fe', '#5a5082', '#2a70e9', '#e56d08', '#a1fa9a', '#5d5e94', '#7ec00c', '#947763', '#741c90', '#fe53e2', '#0ce971', '#38c8ce', '#a4ef93', '#b450d6', '#c85099', '#81dc88', '#bc707a', '#a80cc6', '#266af4', '#6043b1', '#ee007c', '#e07f4b', '#e9f26d', '#9aa803', '#14ed04', '#55d40f', '#fab120', '#1c6b64', '#27c9ad', '#824e2a', '#b34149', '#96f684', '#e1ab51', '#635f12', '#5b95a7', '#1c5b22', '#55df5a', '#a47d61', '#9a491b', '#1f45f7', '#6ca29c', '#c5a715', '#a655f7', '#f74090', '#2577d5', '#9ee76f', '#22c01a', '#fabb7c', '#a4cdcf', '#9b0525', '#adf30f', '#1f738d', '#bb173f', '#3cc809', '#4f0110', '#76040d', '#f578f7', '#2bc21f', '#9597f7', '#8459d9', '#329cd0', '#9de477', '#4781fc', '#fecf42', '#e2b6ce', '#cfb109', '#364784', '#4532eb', '#941f7c', '#ab29a3', '#6ca99c', '#54a749', '#84230c', '#8bc063', '#67f846', '#4be798', '#013d0f', '#cf82a3', '#110404', '#e5c8d4', '#ee8a16', '#89460b', '#17672f', '#c19d44', '#b39921', '#f7438e', '#9324a1', '#82bfc6', '#b1a6e4', '#56513b', '#b3c2fb', '#2a434e', '#8e8cd1', '#91d5db', '#14ec07', '#14e7f7', '#bdea8b', '#73f303', '#d1c906', '#000321', '#1907c9', '#552b37', '#1499c5', '#4e0e58', '#f2d4a8', '#f44e30', '#e4d358', '#701166', '#168598', '#42650f', '#4b963b', '#b93479', '#5e1e17', '#b80582', '#5ca925', '#3d54c6', '#1e45f9', '#fd232d', '#134886', '#fe6739', '#1d1cfa', '#6924cc', '#b9c080', '#804e5b', '#0e3cb3', '#bbbbe3', '#168717', '#735890', '#abbca1', '#371551', '#975abc', '#6e5a3c', '#018b63', '#0143be', '#61a13a', '#e9f979', '#feaff5', '#0ba143', '#cc6a9c', '#7599d9', '#a0d008', '#5b27ac', '#719fb0', '#b669e0', '#4ac979', '#ad490a', '#ed68d5', '#e9887d', '#4031fb', '#f6156f', '#276755', '#6dcbce', '#60991d', '#507688', '#ca98f6', '#c6843e', '#4d4a25', '#8fea3c', '#a53ce6', '#1f16eb', '#ea126a', '#e20607', '#641c88', '#97fcc2', '#7c2d09', '#91211b', '#3018f0', '#5d504e', '#b5d205', '#bfbbb5', '#21397e', '#bb1e68', '#17b8aa', '#f8a8b3', '#4e8c73', '#9ac69e', '#58dd10', '#3e4b8a', '#fd2787', '#630a35', '#774fc9', '#e52839', '#d5b371', '#c0c0b9', '#950313', '#c73161', '#12604c', '#c6d14b', '#d6b6a8', '#db7c2d', '#2346fe', '#da8827', '#9c9f99', '#8d3d85', '#e40a2e', '#20bfb5', '#70472e', '#ec87aa', '#8bfa41', '#d1a5e7', '#541270', '#aa2663', '#14ac09', '#6f9cb2', '#2702df', '#2a426a', '#f78491', '#679d22', '#cfad3b', '#3023c8', '#a5f61e', '#0ba8fd', '#d3fca1', '#073c2d', '#b3643e', '#abce4f', '#4c4bba', '#6529fa', '#23228c', '#b4aced', '#776937', '#9dabc0', '#63f55b', '#415286', '#45137f', '#585fad', '#8d8d67', '#1c2c9f', '#0e47e8', '#8eb0b0', '#559884', '#3cb425', '#45bb4e', '#3686ef', '#1130ea', '#c3591c', '#2c39cc', '#b01232', '#2ab113', '#f391fe', '#d2cec5', '#9bf89f', '#051a42', '#ed6fcf', '#fe7fe5', '#c2925a', '#a1b0f1', '#7aaecb', '#681ea5', '#043aeb', '#df3f99', '#faec82', '#b798e0', '#1f828f', '#91bd83', '#cc9209', '#03afb5', '#3062e1', '#2045d1', '#e45bf6', '#8b59de', '#856fd8', '#c91b31', '#7b43a4', '#38a3c2', '#583c83', '#e0dad4', '#3d4db2', '#4aee3c', '#95fb65', '#9c840f', '#4ddc61', '#0924cd', '#fa9218', '#2d9a23', '#def281', '#3cda64', '#f74d97', '#da0382', '#636760', '#3a20cf', '#d5c4f0', '#61a8f7', '#d7e649', '#4ac80c', '#5b686e', '#7a8e1b', '#3a9ab0', '#9b97fc', '#ecdc25', '#266505', '#b6e4e4', '#dd5f76', '#77ad2c', '#9b87b0', '#e8077d', '#24c696', '#aeaf48', '#e1e1aa', '#188ef5', '#805e1c', '#945faa', '#4d0ed1', '#6ca92e', '#668501', '#d488b7', '#ade797', '#054625', '#8f3369', '#805f12', '#bf7680', '#bd9589', '#3127a5', '#3794ae', '#8eafed', '#61aae6', '#0be95b', '#fae52a', '#47c5bb', '#8382ab', '#632ab2', '#4873a4', '#970fbc', '#ec8e0d', '#a69e1a', '#98048b', '#035d30', '#7ccddf', '#be11f2', '#f1bf94', '#2b9a44', '#7cce82', '#9f90f8', '#3e1156', '#7b6857', '#daf5d5', '#6ee335', '#267878', '#67de78', '#236c54', '#3b1aec', '#551181', '#c5571c', '#dc10fc', '#52205e', '#41a05c', '#f4ba7a', '#c205cb', '#7c0779', '#a4d86b', '#5afb53', '#81fd52', '#8e70f9', '#34c715', '#43a329', '#797968', '#b51f34', '#721d62', '#cc73a9', '#dd00c0', '#819923', '#c382cc', '#3059de', '#dc94a6', '#cf89d2', '#a37ed2', '#8ea8dd', '#d53b4a', '#72a46f', '#4878f5', '#1c635c', '#8ca745', '#62d764', '#0c35b6', '#b92efe', '#4f144f', '#6507c0', '#920d32', '#113fc5', '#3c4fb1', '#dba26e', '#35ae7c', '#e8d2da', '#6d5670', '#35e603', '#19d3d6', '#06da43', '#284317', '#e1ed78', '#2df3de', '#441908', '#2642cb', '#86f7bd', '#714540', '#b7491e', '#f1e072', '#91e8c7', '#a93cb7', '#b7dbe9', '#015fc0', '#675b43', '#306842', '#b115ed', '#96e7b3', '#9f9527', '#38965f', '#3d0aec', '#5f86f9', '#f57005', '#f2fb2d', '#e9018a', '#183fef', '#b0c8ca', '#554076', '#5dcf35', '#09ec08', '#18b9c4', '#43e29d', '#77244b', '#fefb49', '#04ce9b', '#5e1e02', '#8b4448', '#e8a0e6', '#5d9782', '#86696c', '#089896', '#5be604', '#b033dc', '#85b2a8', '#7a5fd9', '#4cb070', '#d0fe22', '#d03e26', '#b0350b', '#302ffd', '#284a6c', '#f08c51', '#8053b7', '#65bab3', '#e269e4', '#4dbf13', '#5a2255', '#4dbc72', '#048779', '#02dc08', '#17b8b3', '#0f4808', '#40b729', '#483574', '#48ac05', '#78815e', '#21d0c3', '#1e227b', '#1fd4e8', '#1e6edc', '#147ad0', '#a94372', '#9ab7e5', '#bb61c9', '#29b91a', '#236d53', '#03793e', '#0df96a', '#fc9a1f', '#33ee69', '#3239f2', '#6236a4', '#bc340e', '#accbcd', '#59d166', '#8a1654', '#73a2c5', '#941645', '#68b8df', '#acf589', '#d6a82c', '#b99001', '#d9b5e6', '#1b463d', '#a166c0', '#b9f226', '#029c73', '#d595c7', '#6e25e0', '#99f3fb', '#5a8456', '#bc7ed4', '#944cdb', '#a3f078', '#b7310b', '#2dc5ee', '#175a47', '#564ce6', '#716a7c', '#2aef59', '#b64467', '#d8a568', '#8a8b15', '#f1fdaf', '#fe5c29', '#1eeeb7', '#65764a', '#3ce75f', '#64f2dc', '#cb39e2', '#d02ea4', '#a0856e', '#25195b', '#77ed69', '#89cad5', '#f930a2', '#b9404a', '#e8301d', '#86f63f', '#62e76d', '#c1415b', '#d50c9d', '#5008f0', '#e14954', '#2feb55', '#dde36e', '#3503af', '#8efcd7', '#2a75d9', '#c760b8', '#5554c3', '#2a4c13', '#ee9438', '#573a88', '#540a6c', '#433692', '#fcb514', '#7ec936', '#7a35a2', '#7cb539', '#8d9488', '#1d0ca3', '#175e4e', '#3fdb45', '#860218', '#6a4342', '#f1d774', '#3cc055', '#ec5de8', '#321485', '#34b40f', '#f0843d', '#cd43db', '#782a3b', '#0276d7', '#98a8f0', '#040a3d', '#dd9029', '#f3328c', '#5dd663', '#2cbd22', '#352d83', '#7392ef', '#e7020a', '#64f0c0', '#82f217', '#98fe02', '#27accd', '#299c32', '#829b74', '#351bb4', '#20d8ec', '#1de776', '#bc891e', '#9603af', '#1e551a', '#9dc3ce', '#205c0c', '#6ee829', '#a5b11c', '#d3644c', '#bd290e', '#d422f3', '#d2d6f8', '#5c42c0', '#fa5579', '#58c5df', '#033ff1', '#d37282', '#8b183b', '#097330', '#09b42e', '#a72f6f', '#636c35', '#0b0e5b', '#e6a275', '#0ce01b', '#ca5f7c', '#ee8a75', '#1903d6', '#238dfb', '#af1b68', '#7a35e6', '#0764f8', '#04c55d', '#c777a7', '#81a6b9', '#62c110', '#289d8a', '#fe79a0', '#69731a', '#522586', '#862c66', '#837781', '#5cb253', '#f8c7a6', '#b6b43c', '#b0d657', '#a44ce5', '#9b2635', '#218589', '#0c1567', '#23af0d', '#49dae7', '#855c03', '#fed3ab', '#43918d', '#6c2f2e', '#aa17b2', '#74187b', '#70c4ba', '#16052e', '#a39960', '#bd773b', '#14198b', '#5b2e34', '#d1190b', '#53023e', '#bce545', '#c7b9a6', '#6e384d', '#ac3cf5', '#6d830e', '#8a8db4', '#01f2d5', '#1e3788', '#2208a4', '#451de7', '#40421b', '#90e18f', '#69e223', '#3997c9', '#dca1dd', '#4f89e3', '#bace44', '#48ec43', '#fa49c2', '#e9a731', '#9651d9', '#48a5b7', '#a5715f', '#cf54fa', '#2887da', '#f9c1ae', '#90c694', '#4856f9', '#845289', '#eb14d0', '#003d85', '#562c38', '#ed6be2', '#d8c305', '#92c43c', '#b081dd', '#0bbc16', '#5b432e', '#149113', '#23fb21', '#3d055d', '#45d774', '#79cede', '#af82ec', '#ad4c58', '#f6392d', '#49fa48', '#c4a567', '#e9e080', '#9d5ed1', '#6efeae', '#e9f7b2', '#926f65', '#0fbbad', '#ce42cc', '#298132', '#4a9ec8', '#0b6e05', '#a49808', '#111cc3', '#f8f15f', '#62b96e', '#5f534e', '#215d1d', '#fc8bd4', '#6824f6', '#89750a', '#daee43', '#95c130', '#2379cf', '#67c311', '#3cd06a', '#2c41ca', '#c788e4', '#0af1c3', '#d08a25', '#77bfdd', '#ecf408', '#8cbdf2', '#cfce78', '#1e8510', '#039096', '#079c0b', '#184462', '#4ae113', '#c461be', '#0b6568', '#4b0816', '#6b8622', '#5216bc', '#64b9ba', '#a5d2d0', '#04998b', '#ae7354', '#326994', '#37e36c', '#abb1e8', '#d8c9d0', '#92aa49', '#8ad145', '#daadf7', '#cc6a03', '#3a263b', '#a3b12c', '#b66e1c', '#c5aa42', '#04f729', '#c94a38', '#a1a609', '#4cfc04', '#ade256', '#43fa50', '#c79f32', '#d746bb', '#ae27ee', '#fdc235', '#1cd202', '#843c2d', '#35420d', '#87be83', '#1de221', '#be0494', '#fe0087', '#f6f463', '#8b3943', '#08c6f1', '#8bac22', '#b800b7', '#db03b7', '#53000a', '#8f4d10', '#6c7e01', '#22a0fc', '#eec00e', '#28f011', '#31c864', '#527862', '#2fec05', '#974634', '#a50878', '#496030', '#3a587d', '#dd2102', '#63db29', '#321df8', '#dc8a56', '#f2a516', '#fb80e9', '#58f795', '#bf95d3', '#8c7bfe', '#28c146', '#67ab78', '#810cfd', '#191828', '#1b90a9', '#5b4c30', '#f134bd', '#39dbee', '#115fd1', '#e404bc', '#cc9ae3', '#6925da', '#265cf7', '#030b9e', '#f7b404', '#72dea6', '#2c4d02', '#6cee1a', '#ad3a7e', '#cd4377', '#834fa6', '#2f2e49', '#4ef729', '#b88f6c', '#1b5f4d', '#0dfea4', '#383503', '#c0f2a5', '#b0cd1f', '#b97658', '#7df5f5', '#6d099e', '#13f862', '#008913', '#5fa159', '#39393d', '#e38221', '#aba47f', '#ee2794', '#9b8794', '#4a9745', '#ddfc96', '#2182f0', '#9e4f90', '#a09884', '#f4c314', '#d43c31', '#fe8f83', '#b5596d', '#ae4fee', '#13987a', '#b109a8', '#c2cbb8', '#cfd8c9', '#ab46ad', '#b8c9a2', '#470e93', '#1b6251', '#87f033', '#d2af99', '#3633b0', '#9ab6bb', '#d24add', '#e7010b', '#35dfdc', '#564de3', '#638ef7', '#954632', '#e8e42b', '#c144dd', '#67fb58', '#a40836', '#f0bf54', '#e8b324', '#89f0fd', '#55d0d7', '#f3bf21', '#7fba3e', '#a82fbb', '#4cb66d', '#decdce', '#db3240', '#709b67', '#d21b86', '#4f9b96', '#8378b8', '#66f671', '#795795', '#9689e2', '#2187eb', '#d02f4d', '#858ec0', '#af75e5', '#44ee7c', '#73ddd1', '#f36c0d', '#7609e4', '#69777c', '#aa5b94', '#dd1225', '#6fc3eb', '#b8d6e2', '#008dee', '#a8dc2a', '#6404ea', '#38e945', '#142b74', '#81aed8', '#a5396b', '#b1daf1', '#259162', '#361a76', '#5bbf17', '#d531aa', '#c9dcd4', '#cc1473', '#cf3096', '#ad928e', '#23b26e', '#eb9e00', '#b19177', '#369921', '#cafce7', '#99514d', '#5dc1eb', '#358e66', '#055eab', '#546e1c', '#559267', '#cff019', '#efd11d', '#da5ad4', '#f9e680', '#d413dc', '#a9e570', '#16142e', '#65659c', '#ed51f8', '#ba56a9', '#b1dcee', '#646eb0', '#6c5c24', '#445ee4', '#e2f121', '#40c77b', '#2c6c15', '#111b3e', '#f7740b', '#41aa46', '#989f17', '#88c39a', '#0298cf', '#6baabe', '#2e85d4', '#68f2a6', '#bb1772', '#2bd959', '#35d78f', '#a09856', '#1f466f', '#808759', '#697959', '#331700', '#c14556', '#e932c5', '#e804cf', '#575840', '#706c37', '#49f149', '#b63f2a', '#3aa384', '#7de52f', '#f55a33', '#76745f', '#c14358', '#f5fd27', '#f854be', '#771311', '#70b407', '#a8d517', '#005c4d', '#809a5b', '#a60679', '#ed53d5', '#83f3f0', '#f5b2c8', '#3fb7ab', '#a5180e', '#285329', '#2f126c', '#3a3b4d', '#c070da', '#59d80f', '#cfe9cd', '#9e0045', '#ce819c', '#66ec1c', '#985b5f', '#5a1b86', '#817bd8', '#92d76e', '#6ea8d5', '#ca249b', '#65a1c9', '#91c02a', '#b6c4ae', '#6520af', '#578bee', '#00e4e7', '#578fb4', '#dc9714', '#fbd8e5', '#e8cb3a', '#388f2f', '#3a157a', '#37ab35', '#c378dd', '#d6ab5c', '#d4313c', '#041b4d', '#466ae7', '#43beac', '#2526d7', '#1415fa', '#b530ed', '#2893f9', '#70f52d', '#93a3bb', '#10c1f8', '#65e671', '#793f1a', '#9324aa', '#0551f7', '#3a72ca', '#647c47', '#a79d87', '#59e3ca', '#12d2c5', '#15182f', '#e93c3d', '#07fcd8', '#37015a', '#955e6a', '#4fee32', '#c4dc6b', '#c06273', '#7ad3e0', '#20e2cc', '#accec3', '#19b454', '#8db2bb', '#aaaf54', '#3807fb', '#851d85', '#5b835d', '#8dbab6', '#62e1e3', '#e5ebbc', '#a323c6', '#7318a6', '#a809b8', '#d8b318', '#93712a', '#1885f7', '#a27468', '#7976fa', '#49d350', '#4ced2a', '#0b6537', '#5807d6', '#b36431', '#19060f', '#8a095b', '#4817cb', '#b4225f', '#4776e8', '#8fa87f', '#24f37a', '#2aeb69', '#dc5e7c', '#b926f2', '#3ef2f1', '#04c40e', '#e2c458', '#3f9f62', '#a3e493', '#1e9577', '#05b168', '#aea050', '#b0d423', '#084679', '#7d1e0a', '#bd64d1', '#5cd11e', '#91bd5c', '#e86719', '#6fd258', '#547b88', '#6fa43e', '#73fce6', '#215537', '#2a8028', '#b05628', '#16910c', '#867cf7', '#3650c7', '#212c96', '#de7654', '#17f349', '#969763', '#727ffc', '#a5a831', '#60f8fd', '#eadf31', '#6a7953', '#da52ca', '#37ce9b', '#efea08', '#64e529', '#c9df79', '#585603', '#4b514a', '#827cb1', '#2353b6', '#82f493', '#3da26b', '#b79617', '#92379c', '#34feb1', '#8b401f', '#6a5441', '#f31dbe', '#bc1256', '#a83337', '#2fe8f9', '#94039f', '#06d816', '#2232d4', '#5c8801', '#ac1c83', '#a0f67d', '#204400', '#5b660c', '#a03e1d', '#c48bb0', '#d10f87', '#69ba1c', '#bb4aae', '#8491af', '#241790', '#b6ac59', '#de1936', '#3314f1', '#c717c2', '#ef36ac', '#47f62c', '#c6bbae', '#39117e', '#69a87b', '#140f34', '#e15ae6', '#05f561', '#eecbf4', '#fa5725', '#117f3c', '#0c47b6', '#f3f754', '#2cf89b', '#413599', '#9814d6', '#ee39bd', '#ae07e7', '#8a9fa8', '#c03247', '#fec207', '#6ce8c9', '#462b17', '#4bc1e7', '#837743', '#4c7921', '#a6f754', '#059d8b', '#330306', '#4dda12', '#9e9a99', '#418cef', '#34b09f', '#1bb4d4', '#850ce1', '#29a6ca', '#2231bd', '#369d44', '#f322e5', '#69e533', '#a2d02f', '#aca372', '#10b948', '#4eaeed', '#5fd9dc', '#bc4d0e', '#345664', '#756d82', '#75283b', '#74d1c9', '#0e5a9e', '#ad02fd', '#82f4d8', '#40801b', '#16258d', '#89a550', '#1f3fef', '#b3386a', '#2ab946', '#4398dc', '#7627d2', '#409927', '#d4db56', '#47a746', '#8b5459', '#c32bc3', '#7fd982', '#ec3f0f', '#c99ee2', '#253f67', '#adeef5', '#d80ed0', '#46a20e', '#a942ab', '#72d071', '#90a5fc', '#cf466e', '#3ff110', '#950128', '#0799f1', '#cc5773', '#603b7c', '#4aea85', '#78c78d', '#fe8b5f', '#5c762e', '#fcd9e0', '#451403', '#7576c2', '#c40c8a', '#dd01e5', '#dac938', '#b1bee8', '#8af464', '#978131', '#aa6745', '#e07083', '#b61e92', '#55a39f', '#1158bd', '#d61c62', '#f08f0f', '#bb9d10', '#1b136b', '#aeeeb2', '#2983cb', '#77bb9d', '#41db34', '#36c89a', '#61d04b', '#db5716', '#d62dd6', '#e23cbc', '#0eb60f', '#d02b18', '#c4f284', '#5cbd22', '#0d0a72', '#58af1b', '#1e9e08', '#2358a2', '#bee942', '#0dbf39', '#a64440', '#c955d4', '#dc102b', '#022992', '#13aad1', '#7b4d31', '#1c90a8', '#4f05b9', '#aa2154', '#7ff61b', '#781949', '#12a65a', '#c0012c', '#cd9a64', '#1a97de', '#82fe29', '#638765', '#80e200', '#12b469', '#21d758', '#da8889', '#2554d5', '#025fd6', '#78bd9d', '#791a9c', '#13a77d', '#e295f7', '#48e70e', '#b0cdb2', '#8bbeab', '#5a7a19', '#2229c3', '#986b6d', '#a37cb8', '#d20926', '#dd0378', '#82e0a7', '#60ed36', '#a3107e', '#d201c3', '#6680af', '#e07bc4', '#ebfbc9', '#93b77a', '#967dd1', '#6a07a8', '#07a996', '#706eef', '#a33b18', '#8f0e5a', '#dd068b', '#429878', '#30fa80', '#9cbb8e', '#e481c8', '#2dd10c', '#a83218', '#4361b9', '#96cbfe', '#d068d2', '#53d56a', '#f0e38c', '#f084fa', '#1c19d5', '#ec317d', '#b9fca8', '#2daf78', '#07990a', '#3f9caf', '#22694c', '#804255', '#67ee87', '#b96afa', '#4bf3da', '#557926', '#697152', '#4748ee', '#9759c5', '#907b71', '#7d6ac2', '#3a3a15', '#e752b9', '#083205', '#6464d9', '#afde8a', '#889198', '#8b2e47', '#34af0d', '#a23def', '#c8721c', '#7165d0', '#c5ddfc', '#8aa469', '#ca9988', '#77a31b', '#2e19fe', '#a9b699', '#604e1b', '#f1950e', '#b32222', '#85bbd1', '#5a7052', '#595177', '#50f0eb', '#dc00de', '#04c1fc', '#af2c51', '#c904d6', '#8535e8', '#095920', '#425e06', '#698dc7', '#fca60f', '#282def', '#1d0b74', '#f441a1', '#16816f', '#daf860', '#61e4d3', '#572f30', '#fdbd62', '#cefeda', '#fb0120', '#d68deb', '#80a6aa', '#d53a18', '#0f9501', '#ae5287', '#65f5e0', '#63d066', '#e38439', '#01efe8', '#d9bff0', '#e69612', '#69fd0d', '#4665fe', '#f5f9be', '#f2838a', '#aeef29', '#2876d2', '#41220f', '#91dc3a', '#cd990b', '#78a638', '#88f885', '#dbfc4b', '#c9fb32', '#316345', '#c7ee59', '#96d4fa', '#fc25e3', '#1280b0', '#e77a1f', '#61abc9', '#3e6a64', '#e9f426', '#29aa62', '#8c339a', '#9890af', '#624eb2', '#e9934a', '#2f7ea4', '#8cbb44', '#786223', '#6b0850', '#de267c', '#5f1910', '#c433f1', '#7faf19', '#19ae71', '#045fe6', '#5f0c7f', '#990e28', '#caea60', '#aae6d4', '#dae8e1', '#22378e', '#8ffdea', '#61ae6e', '#2879be', '#0b1ce1', '#46c02b', '#15145b', '#e1a658', '#095850', '#876b82', '#ec48df', '#bad7e4', '#0fa357', '#b42718', '#71ea6c', '#1f6cd4', '#540935', '#5c915a', '#39b7a0', '#460786', '#74d6d6', '#9a4d76', '#3b484d', '#b7741e', '#5c90f9', '#56a3b0', '#6fa353', '#995e81', '#3a585c', '#842c92', '#b68d83', '#e46a6c', '#dc8d49', '#cae1ae']
colors = {
    'Person': '#9c0ef3',
    'Appellation in a Language': '#89e240',
    'Birth':'#98922c',
    'Death':'#6f034b',
    'Religious entity': '#e6586b',
}

def get_color_of_entity(pk):
    global entities, colors
    entity = list(filter(lambda entity: entity['pk'] == pk, entities))[0]
    return colors[entity['class']]


**BIRTHS**

In [84]:
def handle_birth(doc, verbose=False):
    
    # Detetion parameters
    exact_words = ['birth', 'born']
    lemmas = ['died']

    # Detection
    extract = False
    for token in doc:
        if token.text in exact_words: extract = True
        if token.lemma in lemmas: extract = True
        if extract: break
    if not extract: return []

    # Extraction
    if verbose: print(f'BIRTH')
    persons = list(filter(lambda entity: entity.label_ == "PERSON", doc.ents))
    numbers = list(filter(lambda token: token.pos_ == "NUM", doc))
    gpes = list(filter(lambda entity: entity.label_ == "GPE", doc.ents))

    # Checks
    if len(persons) > 1 or len(numbers) > 1 or len(gpes) > 1:
        print("-------")
        print(f"Problem in parsing document '{doc.text}' when extracting birth:")
        print("--> Persons:", persons)
        print("--> Numbers:", numbers)
        print("--> GPEs:", gpes)
        return []

    person_name = persons[0].text if len(persons) > 0 else 'Unknown person'
    birth = get_entity('Birth', person_name)

    # Extract persons
    if len(persons) > 0: 
        if verbose: print("-- Person(s) found:", persons)
        person = get_entity('Person', persons[0].text)
        aial = get_entity('Appellation in a Language', persons[0].text)
        yield (birth['pk'], "brought into life", person['pk'])
        yield (aial['pk'], "is appellation in a language of", person['pk'])
        yield (aial['pk'], "refers to name", persons[0].text)

    # Extract dates
    if len(numbers) > 0: 
        if verbose: print("-- Number(s) found:", numbers)
        yield (birth['pk'], "at some time within", parse_date(numbers[0].text))

    # Extract places
    if len(gpes) > 0: 
        if verbose: print("-- GPE(s) found:", gpes)
        geoplace = get_entity('Geographical Place', gpes[0].text)
        aial = get_entity('Appellation in a Language', gpes[0].text)
        yield (birth['pk'], "took place at", geoplace['pk'])
        yield (aial['pk'], "is appellation in a language of", geoplace['pk'])
        yield (aial['pk'], "refers to name", gpes[0].text)

**DEATH**

In [85]:
def handle_death(doc, verbose=False):
    
    # Detetion parameters
    exact_words = ['death', 'died']
    lemmas = ['died']

    # Detection
    extract = False
    for token in doc:
        if token.text in exact_words: extract = True
        if token.lemma in lemmas: extract = True
        if extract: break
    if not extract: return []


    # Extraction
    if verbose: print(f'DEATH')
    persons = list(filter(lambda entity: entity.label_ == "PERSON", doc.ents))
    numbers = list(filter(lambda token: token.pos_ == "NUM", doc))
    gpes = list(filter(lambda entity: entity.label_ == "GPE", doc.ents))

    # Checks
    if len(persons) > 1 or len(numbers) > 1 or len(gpes) > 1:
        print("-------")
        print(f"Problem in parsing document '{doc.text}' when extracting death:")
        print("--> Persons:", persons)
        print("--> Numbers:", numbers)
        print("--> GPEs:", gpes)
        return []

    person_name = persons[0].text if len(persons) > 0 else 'Unknown person'
    death = get_entity('Death', person_name)

    # Extract persons
    if len(persons) > 0: 
        if verbose: print("-- Person(s) found:", persons)
        person = get_entity('Person', persons[0].text)
        aial = get_entity('Appellation in a Language', persons[0].text)
        yield (death['pk'], "was death of", person['pk'])
        yield (aial['pk'], "is appellation in a language of", person['pk'])
        yield (aial['pk'], "refers to name", persons[0].text)

    # Extract dates
    if len(numbers) > 0: 
        if verbose: print("-- Number(s) found:", numbers)
        yield (death['pk'], "at some time within", parse_date(numbers[0].text))

    # Extract places
    if len(gpes) > 0: 
        if verbose: print("-- GPE(s) found:", gpes)
        geoplace = get_entity('Geographical Place', gpes[0].text)
        aial = get_entity('Appellation in a Language', gpes[0].text)
        yield (death['pk'], "took place at", geoplace['pk'])
        yield (aial['pk'], "is appellation in a language of", geoplace['pk'])
        yield (aial['pk'], "refers to name", gpes[0].text)

**CONFESSION**

In [60]:
def handle_confession(doc, verbose=False):

    # Detection parameters
    exact_words = ['catholic', 'protestant']
    lemmas = []

    # Detection
    extract = False
    for token in doc:
        if token.text in exact_words: extract = True
        if token.lemma in lemmas: extract = True
        if extract: break
    if not extract: return []

    # Extraction
    if verbose: print(f'CONFESSION')
    persons = list(filter(lambda entity: entity.label_ == "PERSON", doc.ents))
    confessions = list(filter(lambda token: token.text in exact_words, doc))

    # Checks
    if len(persons) > 1 or len(confessions) > 1:
        print("-------")
        print(f"Problem in parsing document '{doc.text}' when extracting confessions:")
        print("--> Persons:", persons)
        print("--> Confessions:", confessions)
        return []

    # Is the information complete?
    if len(persons) == 0 or len(confessions) == 0: 
        if verbose: print('No couple confession/person found')
        return []

    if verbose: print("-- NORP(s) found:", confessions)
    if verbose: print("-- Person(s) found:", persons)   

    confession = get_entity('Religious entity', confessions[0].text)
    aial_confession = get_entity('Appellation in a Language', confessions[0].text)
    person = get_entity('Person', persons[0].text)
    aial_person = get_entity('Appellation in a Language', persons[0].text)

    yield (confession['pk'], "pertains to", person['pk'])
    yield (aial_confession['pk'], "is appellation in a language of", confession['pk'])
    yield (aial_confession['pk'], "refers to name", confessions[0].text) 
    yield (aial_person['pk'], "is appellation in a language of", person['pk'])
    yield (aial_person['pk'], "refers to name", persons[0].text) 

**Compute**

In [114]:
graph = []

for doc in nlp.pipe(assertions):
    # print(doc)
    graph += handle_birth(doc, False)
    graph += handle_death(doc, False)
    graph += handle_confession(doc, False)

for i, triplet in enumerate(graph):
    if isinstance(triplet[2], str):
        graph[i] = (graph[i][0], graph[i][1], graph[i][2].title())


graph = pd.DataFrame(data=graph, columns=['subject', 'predicate', 'object'])
graph.drop_duplicates(inplace=True)

graph['subject_label'] = [get_label_of_entity(pk) for pk in graph['subject']]
graph['object_label'] = [get_label_of_entity(pk) if isinstance(pk, int) else f"{pk}\n(Value)" for pk in graph['object']]
graph['subject_color'] = [get_color_of_entity(pk) if isinstance(pk, int) else f"#000" for pk in graph['subject']]
graph['object_color'] = [get_color_of_entity(pk) if isinstance(pk, int) else f"#000" for pk in graph['object']]

graph_small = graph[['subject_label', 'predicate', 'object_label']]
graph_small.columns = ['subject', 'predicate', 'object']

graph_small

Unnamed: 0,subject,predicate,object
0,Christoph Blocher\n(Birth),brought into life,Christoph Blocher\n(Person)
1,Christoph Blocher\n(Appellation in a Language),is appellation in a language of,Christoph Blocher\n(Person)
2,Christoph Blocher\n(Appellation in a Language),refers to name,Christoph Blocher\n(Value)
3,Christoph Blocher\n(Birth),at some time within,"(1940, 10, 11)\n(Value)"
4,Protestant\n(Religious entity),pertains to,Christoph Blocher\n(Person)
5,Protestant\n(Appellation in a Language),is appellation in a language of,Protestant\n(Religious entity)
6,Protestant\n(Appellation in a Language),refers to name,Protestant\n(Value)
7,Christoph Blocher\n(Appellation in a Language),is appellation in a language of,Christoph Blocher\n(Person)
8,Christoph Blocher\n(Appellation in a Language),refers to name,Christoph Blocher\n(Value)


# Show graph

In [119]:
nodes_subject = graph[['subject_label', 'subject_color']].drop_duplicates(subset=['subject_label'])
nodes_object = graph[['object_label', 'object_color']].drop_duplicates(subset=['object_label'])

# network = Network(height=750, width=1500, notebook=True, cdn_resources='remote')
network = Network(height=750, width=1500, notebook=True, cdn_resources="in_line", select_menu=True)
network.add_nodes(nodes_subject['subject_label'].tolist(), color=nodes_subject['subject_color'].tolist())
network.add_nodes(nodes_object['object_label'].tolist(), color=nodes_object['object_color'].tolist())

for i, row in graph.iterrows():
    network.add_edge(str(row['subject_label']), str(row['object_label']), label=row['predicate'])


network.set_options("""
    const options = {
        "nodes": {"font": {"face": "tahoma"}},
        "edges": {
            "arrows": {"to": {"enabled": true}},
            "font": {"size": 10,"face": "tahoma","align": "top"}
        }
    }
""")

# network.show_buttons(filter_=['physics'])
network.show('graph.html', local=True, notebook=True)

graph.html
