# Sandbox for testing


# TODO

* Normalize attributes: id, type (entity class), desc (name in person, attribute value in value-nodes)
* Link to URL, use MHK_HOST variable to form url MHK_HOST/{dbname}/id/{id}
* How to color nodes by type
* Test other layouts

## SQL Alchemy tutorial

In [1]:
import sqlalchemy

sqlalchemy.__version__

'1.4.6'

## Connection string

In [7]:
import os
from dotenv import dotenv_values
from sqlalchemy import create_engine,text

home_dir=os.getenv('HOME')
mhk_env = dotenv_values(home_dir+"/.mhk")
mhk_home_dir=mhk_env['HOST_MHK_HOME']
print("mhk-home: "+mhk_home_dir)
app_env = dotenv_values(mhk_home_dir+'/app/.env')
pwd = app_env['MYSQL_ROOT_PASSWORD']
connection_string = "mysql+mysqlconnector://root:{pwd}@localhost:3307/mysql".format(pwd=pwd)
engine = create_engine(connection_string,echo=False,future=True)
with engine.connect() as conn:
    result = conn.execute(text("SELECT table_schema FROM information_schema.tables WHERE  table_name = 'entities'"))
    print( [dbname for (dbname,) in result])


mhk-home: /Users/joaquimcarvalho/mhk-home
['ilhavo', 'mhk', 'soure_editor', 'toliveira_china']


### Set database

In [8]:
db='toliveira_china'
connection_string = "mysql+mysqlconnector://root:{pwd}@localhost:3307/{db}".format(pwd=pwd,db=db)

In [9]:
from sqlalchemy import create_engine,text

engine = create_engine(connection_string,echo=True,future=True)
with engine.connect() as conn:
    result = conn.execute(text("select class, count(*) from entities group by class"))
    print(result.all())

2021-05-31 12:35:59,281 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'sql_mode'
2021-05-31 12:35:59,282 INFO sqlalchemy.engine.Engine [raw sql] {}
2021-05-31 12:35:59,291 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2021-05-31 12:35:59,291 INFO sqlalchemy.engine.Engine [generated in 0.00326s] {}
2021-05-31 12:35:59,303 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2021-05-31 12:35:59,303 INFO sqlalchemy.engine.Engine [raw sql] {}
2021-05-31 12:35:59,315 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-05-31 12:35:59,316 INFO sqlalchemy.engine.Engine select class, count(*) from entities group by class
2021-05-31 12:35:59,317 INFO sqlalchemy.engine.Engine [generated in 0.00376s] {}
[('act', 33), ('attribute', 19340), ('carta', 5), ('class', 17), ('evento', 45), ('person', 2283), ('relation', 3418), ('rperson', 65), ('source', 30)]
2021-05-31 12:35:59,387 INFO sqlalchemy.engine.Engine ROLLBACK


In [14]:
from itertools import combinations
import networkx as nx
from sqlalchemy.orm import Session

attribute='wicky-viagem'
mode='cliques' # value-node cliques 

# 
G = nx.Graph()

stmt = text("select distinct the_value from attributes where the_type = :the_type and the_value <> '?'").bindparams(the_type=attribute)
with Session(engine) as session:
    result = session.execute(stmt)
    for avalue, in result:
        sql = "select entity,the_date from attributes where the_type=:the_type and the_value = :the_value"
        result = session.execute(text(sql),{'the_type':attribute,'the_value':avalue})
        entities = result.all()

        if (mode=="value-node"):
            G.add_node(avalue,attribute=attribute)
            for (entity,date) in entities:
                G.add_edge(avalue,entity,date = date)
        elif (len(entities)>1):
            pairs = list(combinations(entities,2))
            # TODO: optional date range filtering
            for ((e1,d1),(e2,d2)) in pairs:
                G.add_edges_from([(e1,e2,{'date1':d1,'date2':d2,'attribute':attribute,'value':avalue})]) 
#
G

 12:37:08,388 INFO sqlalchemy.engine.Engine [cached since 51.82s ago] {'the_type': 'wicky-viagem', 'the_value': '8'}
2021-05-31 12:37:08,397 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:37:08,397 INFO sqlalchemy.engine.Engine [cached since 51.83s ago] {'the_type': 'wicky-viagem', 'the_value': '5'}
2021-05-31 12:37:08,403 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:37:08,404 INFO sqlalchemy.engine.Engine [cached since 51.84s ago] {'the_type': 'wicky-viagem', 'the_value': '131'}
2021-05-31 12:37:08,414 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:37:08,415 INFO sqlalchemy.engine.Engine [cached since 51.85s ago] {'the_type': 'wicky-viagem', 'the_value': '154'}
2021-05-31 12:37:08,442 INFO sqlalchemy.engin

<networkx.classes.graph.Graph at 0x10999aca0>

In [16]:
G.nodes()


NodeView(('baltasar-diego-da-rocha', 'deh-christophe-cloche', 'deh-claude-motel', 'deh-domenico-fuciti', 'deh-edmond-poncet', 'deh-germain-macret', 'deh-giandomenico-gabiani', 'deh-goncalo-de-oliveira', 'deh-ignace-baudet-de-beauregard', 'deh-ignace-baudet-de-beauregard-ref1', 'deh-ignace-baudet-de-beauregard-ref2', 'deh-ignace-baudet-de-beauregard-ref3', 'deh-jacques-motel', 'deh-jean-forget', 'deh-joseph-francois-tissanier', 'deh-louis-gobet', 'deh-manoel-soares', 'deh-nicolas-motel', 'deh-pedro-de-lis', 'deh-pierre-albier', 'deh-adam-algenler', 'deh-beat-amrhyn', 'deh-didachus-garces', 'deh-didachus-garces-ref1', 'deh-francesco-maria-gatinara', 'deh-francois-belgoder', 'deh-francois-belgoder-ref1', 'deh-jean-van-moll', 'deh-jean-van-moll-ref1', 'deh-joao-fernandes-fou', 'deh-lorenzo-tanaglia', 'deh-lorenzo-tanaglia-ref1', 'deh-prospero-intorcetta', 'deh-simao-rodrigues', 'deh-thomas-van-der-elst', 'deh-thomas-van-der-elst-ref1', 'simao-rodrigues-ref1', 'deh-adam-weidenfied', 'deh-an

In [17]:
cliques=nx.find_cliques(G)
for c in cliques:
    print(c)

['deh-paulo-de-mesquita', 'deh-martim-correa', 'deh-policarpo-de-sousa', 'deh-bento-de-abreu', 'deh-antonio-de-magalhaes', 'deh-policarpo-de-sousa-ref1', 'deh-domingos-pinheiro', 'deh-luis-de-sequeira']
['deh-didachus-garces-ref1', 'deh-didachus-garces', 'deh-jean-van-moll-ref1', 'deh-simao-rodrigues', 'deh-joao-fernandes-fou', 'deh-lorenzo-tanaglia-ref1', 'deh-prospero-intorcetta', 'deh-jean-van-moll', 'deh-francois-belgoder-ref1', 'deh-francois-belgoder', 'deh-francesco-maria-gatinara', 'deh-thomas-van-der-elst', 'deh-beat-amrhyn', 'deh-thomas-van-der-elst-ref1', 'deh-lorenzo-tanaglia', 'deh-adam-algenler', 'simao-rodrigues-ref1']
['deh-bartolomeo-tedeschi', 'deh-pedro-ribeiro', 'deh-pedro-marques-senior', 'deh-vincenzo-carruba', 'deh-antonio-de-andrade']
['deh-joao-de-lemos', 'deh-joao-de-barros', 'deh-francisco-rebelo', 'deh-agostinho-de-barros', 'deh-manuel-de-aguiar', 'deh-joao-de-barros-ref1']
['deh-joao-de-lemos', 'deh-manuel-de-carvalho']
['deh-muzio-rocchi', 'deh-sebastiao-fe

## Timelink functions

### Generate networks from database

In [19]:
def network_from_attribute(engine,attribute: str, mode='cliques'):
    """ Generate a network from common attribute values

    Args:   
        engine: a sqlalchemy engine
        attribute (str): the type of attribute
    
    Returns:
        a networkx Graph object (networkx.classes.graph.Graph)

    This function will generate a network connecting the
    entities that have the same value for the 
    attribute given in the parameter.

    The network will have as Edge Attributes
    - attribute: the name of the attribute given in the parameter
    - value: the value common to the two nodes
    - date1,date2: the dates of the attribute in each node.

    """

    sql = "select distinct the_value from attributes where the_type = :the_type and the_value <> '?'"
    G = nx.Graph()
    with engine.connect() as conn:
        result = conn.execute(text(sql),[{'the_type':attribute}])
        values= result.all()
        for (avalue,) in values:
            sql = "select entity,the_date from attributes where the_type=:the_type and the_value = :the_value"
            result = conn.execute(text(sql),[{'the_type':attribute,'the_value':avalue}])
            entities = result.all()
            if (len(entities)>1):
                pairs = list(combinations(entities,2))
                # TODO: optional date range filtering
                for ((e1,d1),(e2,d2)) in pairs:
                    G.add_edges_from([(e1,e2,{'date1':d1,'date2':d2,'attribute':attribute,'value':avalue})])     
    return G

In [20]:

G = network_from_attribute(engine,'jesuita-entrada',mode='cliques')
cliq=nx.find_cliques(G)

select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:38:48,336 INFO sqlalchemy.engine.Engine [cached since 151.8s ago] {'the_type': 'jesuita-entrada', 'the_value': 'Avignon'}
2021-05-31 12:38:48,346 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:38:48,347 INFO sqlalchemy.engine.Engine [cached since 151.8s ago] {'the_type': 'jesuita-entrada', 'the_value': 'Lisboa'}
2021-05-31 12:38:48,360 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:38:48,361 INFO sqlalchemy.engine.Engine [cached since 151.8s ago] {'the_type': 'jesuita-entrada', 'the_value': 'Chieri'}
2021-05-31 12:38:48,368 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-31 12:38:48,369 INFO sqlalchemy.engi

In [21]:
for c in cliq:
    print(c)


['deh-paulo-de-mesquita', 'deh-joaquim-lobo', 'deh-luis-de-franca', 'deh-antonio-pires', 'deh-marcos-silveiro', 'deh-jose-pacheco', 'deh-joao-mendes', 'deh-antonio-da-costa-i-ref2', 'deh-jose-bernardo-de-almeida', 'deh-nicolau-da-fonseca', 'deh-melchor-diaz-ref1', 'deh-theodore-villers', 'deh-manuel-jose-ref2', 'deh-joao-da-cunha', 'deh-joao-de-sa', 'deh-pedro-de-meireles', 'deh-cristoforo-fiori', 'deh-carlos-de-resende', 'deh-bento-ferreira', 'deh-manuel-teixeira', 'deh-aleixo-rodrigues', 'deh-domingos-alvares', 'deh-francisco-da-veiga', 'deh-joao-mourao', 'deh-manuel-da-fonseca', 'deh-miguel-vieira', 'deh-baltasar-gago', 'deh-manuel-rodrigues-ii-ref2', 'deh-reginaldo-burger', 'deh-duarte-de-sande', 'deh-diogo-vidal', 'deh-antonio-de-saldanha', 'deh-antonio-ferreira', 'deh-manuel-jose-de-carvalho', 'deh-domingos-magalhaes', 'deh-antonio-da-costa-iii', 'deh-jose-pereira', 'deh-matias-correa', 'deh-michel-alfonso-chen']
['deh-luis-de-mendanha', 'deh-pedro-martins-ref3', 'deh-joao-ribeir