# Sandbox for testing


## SQL Alchemy tutorial

In [9]:
import sqlalchemy

sqlalchemy.__version__

'1.4.12'

## Connection string

In [10]:
import os
from dotenv import dotenv_values

home_dir=os.getenv('HOME')
mhk_env = dotenv_values(home_dir+"/.mhk")
mhk_home_dir=mhk_env['HOST_MHK_HOME']
print("mhk-home: "+mhk_home_dir)
app_env = dotenv_values(mhk_home_dir+'/app/.env')
pwd = app_env['MYSQL_ROOT_PASSWORD']
connection_string = "mysql+mysqlconnector://root:{pwd}@localhost:3307/mysql".format(pwd=pwd)


mhk-home: /Users/jrc/mhk-home


### Set database

In [11]:
db='toliveira'
connection_string = "mysql+mysqlconnector://root:{pwd}@localhost:3307/{db}".format(pwd=pwd,db=db)

In [12]:
from sqlalchemy import create_engine,text

engine = create_engine(connection_string,echo=True,future=True)
with engine.connect() as conn:
    result = conn.execute(text("select class, count(*) from entities group by class"))
    print(result.all())

2021-05-30 20:05:46,612 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'sql_mode'
2021-05-30 20:05:46,613 INFO sqlalchemy.engine.Engine [raw sql] {}
2021-05-30 20:05:46,636 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2021-05-30 20:05:46,637 INFO sqlalchemy.engine.Engine [generated in 0.00613s] {}
2021-05-30 20:05:46,660 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2021-05-30 20:05:46,661 INFO sqlalchemy.engine.Engine [raw sql] {}
2021-05-30 20:05:46,690 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-05-30 20:05:46,691 INFO sqlalchemy.engine.Engine select class, count(*) from entities group by class
2021-05-30 20:05:46,692 INFO sqlalchemy.engine.Engine [generated in 0.00748s] {}
[('act', 33), ('attribute', 19340), ('carta', 5), ('class', 17), ('evento', 45), ('person', 2283), ('relation', 3418), ('rperson', 64), ('source', 30)]
2021-05-30 20:05:46,719 INFO sqlalchemy.engine.Engine ROLLBACK


In [21]:
from itertools import combinations
import networkx as nx
from sqlalchemy.orm import Session

attribute='wicky-viagem'
mode='value-node' # value-node cliques 

# 
G = nx.Graph()

stmt = text("select distinct the_value from attributes where the_type = :the_type and the_value <> '?'").bindparams(the_type=attribute)
with Session(engine) as session:
    result = session.execute(stmt)
    for avalue, in result:
        sql = "select entity,the_date from attributes where the_type=:the_type and the_value = :the_value"
        result = session.execute(text(sql),{'the_type':attribute,'the_value':avalue})
        entities = result.all()

        if (mode=="value-node"):
            G.add_node(avalue,attribute=attribute)
            for (entity,date) in entities:
                G.add_edge(avalue,entity,date = date)
        elif (len(entities)>1):
            pairs = list(combinations(entities,2))
            # TODO: optional date range filtering
            for ((e1,d1),(e2,d2)) in pairs:
                G.add_edges_from([(e1,e2,{'date1':d1,'date2':d2,'attribute':attribute,'value':avalue})]) 
#
G

1-05-30 20:10:44,833 INFO sqlalchemy.engine.Engine [cached since 295.6s ago] {'the_type': 'wicky-viagem', 'the_value': '8'}
2021-05-30 20:10:44,844 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 20:10:44,846 INFO sqlalchemy.engine.Engine [cached since 295.6s ago] {'the_type': 'wicky-viagem', 'the_value': '5'}
2021-05-30 20:10:44,856 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 20:10:44,857 INFO sqlalchemy.engine.Engine [cached since 295.6s ago] {'the_type': 'wicky-viagem', 'the_value': '131'}
2021-05-30 20:10:44,867 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 20:10:44,868 INFO sqlalchemy.engine.Engine [cached since 295.7s ago] {'the_type': 'wicky-viagem', 'the_value': '154'}
2021-05-30 20:10:44,881 INFO sqlalchem

<networkx.classes.graph.Graph at 0x11f54ed30>

In [22]:
G.nodes()


NodeView(('87', 'baltasar-diego-da-rocha', 'deh-christophe-cloche', 'deh-claude-motel', 'deh-domenico-fuciti', 'deh-edmond-poncet', 'deh-germain-macret', 'deh-giandomenico-gabiani', 'deh-goncalo-de-oliveira', 'deh-ignace-baudet-de-beauregard', 'deh-ignace-baudet-de-beauregard-ref1', 'deh-ignace-baudet-de-beauregard-ref2', 'deh-ignace-baudet-de-beauregard-ref3', 'deh-jacques-motel', 'deh-jean-forget', 'deh-joseph-francois-tissanier', 'deh-louis-gobet', 'deh-manoel-soares', 'deh-nicolas-motel', 'deh-pedro-de-lis', 'deh-pierre-albier', '106', '99', 'deh-adam-algenler', 'deh-beat-amrhyn', 'deh-didachus-garces', 'deh-didachus-garces-ref1', 'deh-francesco-maria-gatinara', 'deh-francois-belgoder', 'deh-francois-belgoder-ref1', 'deh-jean-van-moll', 'deh-jean-van-moll-ref1', 'deh-joao-fernandes-fou', 'deh-lorenzo-tanaglia', 'deh-lorenzo-tanaglia-ref1', 'deh-prospero-intorcetta', 'deh-simao-rodrigues', 'deh-thomas-van-der-elst', 'deh-thomas-van-der-elst-ref1', 'simao-rodrigues-ref1', '103', 'deh

In [23]:
cliques=nx.find_cliques(G)
for c in cliques:
    print(c)

['deh-jean-simon-bayard', '115']
['deh-nikolaus-fiva', '72']
['31', 'deh-joao-da-rocha']
['31', 'deh-diogo-de-mesquita']
['31', 'deh-joao-soeiro']
['31', 'deh-francesco-de-petris']
['31', 'deh-mateus-de-couros']
['31', 'deh-houang-francisco-martins-ref2']
['31', 'deh-pero-da-cruz']
['deh-joao-fernandes-fou', '99']
['deh-jose-pereira', '118']
['144', 'deh-antonio-goncalves-ref1']
['deh-giacomo-antonini', '153']
['80', 'deh-hendrik-uwens']
['80', 'deh-johannes-ciermans']
['deh-francesco-maria-gatinara', '99']
['deh-antonio-preto', '89']
['deh-antonio-da-silva', '117']
['deh-anton-gogeisl', '156']
['deh-joao-de-seixas', '160']
['63', 'deh-andrius-rudamina']
['deh-tome-pereira', '94']
['69', 'deh-antonio-ferreira-ref1']
['76', 'deh-lucas-correa']
['deh-diogo-de-sotomaior', '92']
['deh-luigi-gonzaga', '129']
['64', 'deh-francisco-ferreira-fei-ref1']
['158', 'deh-hermann-engers']
['158', 'deh-jakob-graff']
['158', 'deh-josef-kayser']
['deh-hernando-de-alcaraz', '16']
['deh-manuel-teixeira', 

## Timelink functions

### Generate networks from database

In [79]:
def network_from_attribute(engine,attribute: str, mode='cliques'):
    """ Generate a network from common attribute values

    Args:   
        engine: a sqlalchemy engine
        attribute (str): the type of attribute
    
    Returns:
        a networkx Graph object (networkx.classes.graph.Graph)

    This function will generate a network connecting the
    entities that have the same value for the 
    attribute given in the parameter.

    The network will have as Edge Attributes
    - attribute: the name of the attribute given in the parameter
    - value: the value common to the two nodes
    - date1,date2: the dates of the attribute in each node.

    """

    sql = "select distinct the_value from attributes where the_type = :the_type and the_value <> '?'"
    G = nx.Graph()
    with engine.connect() as conn:
        result = conn.execute(text(sql),[{'the_type':attribute}])
        values= result.all()
        for (avalue,) in values:
            sql = "select entity,the_date from attributes where the_type=:the_type and the_value = :the_value"
            result = conn.execute(text(sql),[{'the_type':attribute,'the_value':avalue}])
            entities = result.all()
            if (len(entities)>1):
                pairs = list(combinations(entities,2))
                # TODO: optional date range filtering
                for ((e1,d1),(e2,d2)) in pairs:
                    G.add_edges_from([(e1,e2,{'date1':d1,'date2':d2,'attribute':attribute,'value':avalue})])     
    return G

In [81]:

G = network_from_attribute(engine,'jesuita-entrada')
cliq=nx.find_cliques(G)

ntity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 16:52:06,569 INFO sqlalchemy.engine.Engine [cached since 42.95s ago] {'the_type': 'jesuita-entrada', 'the_value': 'Avignon'}
2021-05-30 16:52:06,584 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 16:52:06,585 INFO sqlalchemy.engine.Engine [cached since 42.97s ago] {'the_type': 'jesuita-entrada', 'the_value': 'Lisboa'}
2021-05-30 16:52:06,608 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 16:52:06,609 INFO sqlalchemy.engine.Engine [cached since 42.99s ago] {'the_type': 'jesuita-entrada', 'the_value': 'Chieri'}
2021-05-30 16:52:06,629 INFO sqlalchemy.engine.Engine select entity,the_date from attributes where the_type=%(the_type)s and the_value = %(the_value)s
2021-05-30 16:52:06,630 INFO sqlalchemy.engine.Engin

In [82]:
for c in cliq:
    print(c)


['deh-ferdinando-bonaventura-moggi', 'deh-alexandre-de-rhodes', 'deh-george-brett-keynes', 'deh-baldassare-citadella', 'deh-matteo-ricci', 'deh-giovanni-filippo-de-marini', 'deh-stanislao-torrente', 'deh-johann-adam-schall-von-bell', 'deh-lazzaro-cattaneo']
['deh-etienne-joseph-le-couteulx', 'deh-abraham-le-royer', 'deh-pierre-noel-le-cheron-dincarville', 'deh-paul-gobert', 'deh-touissant-masson', 'deh-etienne-yang', 'deh-pierre-foureau', 'deh-louis-seguin', 'deh-charles-francois-xavier-de-brevedent', 'deh-prost-ref1', 'deh-julien-placide-hervieu', 'deh-louis-bazin', 'deh-emeric-langlois-de-chavagnac', 'deh-antoine-chomel', 'deh-joseph-nicolas-charenton', 'deh-jean-regis-lieou', 'deh-georges-berthe', 'deh-jacques-le-faure', 'deh-simao-rodrigues-de-azevedo', 'deh-aloys-kao', 'deh-gabriel-leon-lamy', 'deh-jean-de-fontaney', 'deh-louis-porquet', 'deh-pierre-de-goville', 'deh-thomas-jean-baptiste-lieou', 'deh-pierre-ladmiral-ref4', 'deh-francois-xavier-ignace-lan', 'deh-titus-simeon-lecler