# Database overview notebook

> First time use: follow instructions in the README.md file in this directory.

## Setup

In [340]:
from timelink.mhk.utilities import get_engine, get_dbnames, get_connection_string
from sqlalchemy import engine, text, MetaData
print('Available databases:')
print(get_dbnames())
#%load_ext sql
#%config SqlMagic.displaycon=False

Available databases:
['china', 'soure_cloud', 'ucprosop']


## Connect to database

### Choose the database

In [341]:
db = 'soure_editor'
connection_string = get_connection_string(db)
engine = get_engine(db)

## Database status

In [342]:
with engine.connect() as connection:
    classes = connection.execute(
        text('select class,count(*) as n from entities group by class')
    )
    classes
for c in classes:
    print(c['class'],c.n)

act 10844
acta 180
acusacoes 315
aforamento 2
attribute 66135
cartaperdao 2
caso 40
class 26
crime 1
divida 3
escritura 243
geoentity 9
good 6
group_element 10870
item 92
object 2
person 52134
relation 87353
rperson 357
source 184


## Map MHK database

In [343]:

from sqlalchemy import Column,String,Integer,DateTime, ForeignKey
from sqlalchemy.orm import declarative_base, relationship

Base = declarative_base()


## Entities
     
        |-----------|--------------|------|-----|---------------------+-----------------------------+
        | Field     | Type         | Null | Key | Default             | Extra                       |
        +-----------+--------------+------+-----+---------------------+-----------------------------+
        | id        | varchar(64)  | NO   | PRI | NULL                |                             |
        | class     | varchar(64)  | YES  | MUL | NULL                |                             |
        | inside    | varchar(64)  | YES  | MUL | NULL                |                             |
        | the_order | decimal(6,0) | YES  |     | NULL                |                             |
        | the_level | decimal(6,0) | YES  |     | NULL                |                             |
        | the_line  | decimal(6,0) | YES  |     | NULL                |                             |
        | groupname | varchar(32)  | YES  | MUL | NULL                |                             |
        | updated   | timestamp    | NO   | MUL | CURRENT_TIMESTAMP   | on update CURRENT_TIMESTAMP |
        | indexed   | timestamp    | NO   | MUL | 1974-04-25 00:00:01 |                             |
        +-----------+--------------+------+-----+---------------------+-----------------------------+

In [344]:

class Entity(Base):
    __tablename__ = 'entities'

    id = Column(String, primary_key=True)
    pom_class = Column('class',String,ForeignKey('classes.id'))
    inside = Column(String, ForeignKey('entities.id'))
    the_order = Column(Integer)
    the_level = Column(Integer)
    the_line = Column(Integer)
    groupname = Column(String)
    updated = Column(DateTime)
    indexed = Column(DateTime)


    rels_in = relationship("Relation", back_populates="dest")
    rels_out = relationship("Relation",back_populates="org")

    # see https://docs.sqlalchemy.org/en/14/orm/inheritance.html
    # To handle non mapped pom_class see https://github.com/sqlalchemy/sqlalchemy/issues/5445
    #
    #    __mapper_args__ = {
    #       "polymorphic_identity": "entity",
    #    "polymorphic_on": case(
    #        [(type.in_(["parent", "child"]), type)], else_="entity"
    #    ),
    # 
    #  This defines what mappings do exist
    # [aclass.__mapper_args__['polymorphic_identity'] for aclass in Entity.__subclasses__()]

    __mapper_args__ = {
        'polymorphic_identity': 'entity',
        'polymorphic_on':pom_class
    }

    # untested
    @classmethod  # untested
    def get_subclasses(cls):
        for subclass in cls.__subclasses__():
            yield from subclass.get_subclasses()
            yield subclass

    @classmethod 
    def get_pom_classes(cls):
        return [aclass.__mapper_args__['polymorphic_identity'] 
                    for aclass 
                    in Entity.__subclasses__()]

    def __repr__(self):
        return (
            f'Entity(id="{self.id}", '
            f'pom_class="{self.pom_class}",'
            f'inside="{self.inside}", '
            f'the_order={self.the_order}, '
            f'the_level={self.the_level}, '
            f'the_line={self.the_line}, '
            f'groupname="{self.groupname}", '
            
            f'updated={self.updated}, '
            f'indexed={self.indexed},'
            f')'
        )
        
    def __str__(self):
        return (f'{self.groupname}${self.id}/type={self.pom_class}')


## Attributes
    +-----------+----------------+------+-----+---------+-------+
    | Field     | Type           | Null | Key | Default | Extra |
    +-----------+----------------+------+-----+---------+-------+
    | id        | varchar(64)    | NO   | PRI | NULL    |       |
    | entity    | varchar(64)    | YES  | MUL | NULL    |       |
    | the_type  | varchar(512)   | YES  | MUL | NULL    |       |
    | the_value | varchar(1024)  | YES  | MUL | NULL    |       |
    | the_date  | varchar(24)    | YES  | MUL | NULL    |       |
    | obs       | varchar(16000) | YES  |     | NULL    |       |
    +-----------+----------------+------+-----+---------+-------+

In [345]:

class Attribute(Entity):
    __tablename__ = 'attributes'

    id = Column(String, ForeignKey('entities.id'), primary_key=True)
    entity = Column(String,ForeignKey('entities.id'))
    the_type = Column(String)
    the_value = Column(String)
    the_date = Column(String)
    obs = Column(String)

    the_entity = relationship("Entity",foreign_keys=[entity],back_populates="attributes",)

    __mapper_args__ = {
        'polymorphic_identity':'attribute',     
        'inherit_condition': id == Entity.id
    }

    def __repr__(self):
        sr = super().__repr__()
        return (
            f'Attribute(id={sr}, '
            f'entity="{self.entity}", '
            f'the_type="{self.the_type}", '
            f'the_value="{self.the_value}", '
            f'the_date="{self.the_date}"", '
            f'obs={self.obs}'
            f')'
        )

    def __str__(self):
        r = f'     ls${self.the_type}/{self.the_value}/{self.the_date}'
        if self.obs is not None:
                r = (f'{r}/obs={self.obs}')
        return r


Entity.attributes = relationship("Attribute", foreign_keys=[Attribute.entity], back_populates="the_entity")


## Relations   
    +-------------+----------------+------+-----+---------+-------+
    | Field       | Type           | Null | Key | Default | Extra |
    +-------------+----------------+------+-----+---------+-------+
    | id          | varchar(64)    | NO   | PRI | NULL    |       |
    | origin      | varchar(64)    | YES  | MUL | NULL    |       |
    | destination | varchar(64)    | YES  | MUL | NULL    |       |
    | the_date    | varchar(24)    | YES  | MUL | NULL    |       |
    | the_type    | varchar(32)    | YES  | MUL | NULL    |       |
    | the_value   | varchar(256)   | YES  | MUL | NULL    |       |
    | obs         | varchar(16000) | YES  |     | NULL    |       |
    +-------------+----------------+------+-----+---------+-------+

In [346]:
class Relation(Entity):  # should extend Entity but gives error

    __tablename__ = 'relations'

    id = Column(String,ForeignKey('entities.id'), primary_key=True)
    #rel_entity = relationship("Entity",foreign_keys='id',back_populates='rel')
    origin = Column(String,ForeignKey('entities.id'))
    org = relationship(Entity,foreign_keys=[origin], back_populates='rels_out')

    destination = Column(String,ForeignKey('entities.id'))
    dest = relationship("Entity",foreign_keys=[destination], back_populates="rels_in")
    the_type = Column(String)
    the_value = Column(String)
    the_date = Column(String)
    obs = Column(String)

    __mapper_args__ = {
        'polymorphic_identity':'relation',
        'inherit_condition': id == Entity.id
    }

    def __repr__(self):
        sr = super().__repr__()
        return (
            f'Relation(id={sr}, '
            f'origin="{self.origin}", '
            f'destination="{self.destination}", '
            f'the_type="{self.the_type}", '
            f'the_value="{self.the_value}", '
            f'the_date="{self.the_date}"", '
            f'obs={self.obs}'
            f')'
        )

    def __str__(self):
        if self.dest is not None and self.dest.pom_class == 'person':
            r = f'rel${self.the_type}/{self.the_value}/{self.dest.name}/{self.destination}/{self.the_date}'
        else:
            r = f'rel${self.the_type}/{self.the_value}/{self.destination}/{self.the_date}'
        if self.obs is not None:
                r = (f'{r}  /obs={self.obs}')
        return r

Entity.rels_out = relationship("Relation",foreign_keys=[Relation.origin],back_populates="dest")
Entity.rels_in = relationship("Relation",foreign_keys=[Relation.destination],back_populates="org")

  Entity.rels_out = relationship("Relation",foreign_keys=[Relation.origin],back_populates="dest")
  Entity.rels_out = relationship("Relation",foreign_keys=[Relation.origin],back_populates="dest")
  Entity.rels_out = relationship("Relation",foreign_keys=[Relation.origin],back_populates="dest")
  Entity.rels_in = relationship("Relation",foreign_keys=[Relation.destination],back_populates="org")
  Entity.rels_in = relationship("Relation",foreign_keys=[Relation.destination],back_populates="org")
  Entity.rels_in = relationship("Relation",foreign_keys=[Relation.destination],back_populates="org")


## Persons    
    +-------+----------------+------+-----+---------+-------+
    | Field | Type           | Null | Key | Default | Extra |
    +-------+----------------+------+-----+---------+-------+
    | id    | varchar(64)    | NO   | PRI | NULL    |       |
    | name  | varchar(128)   | YES  | MUL | NULL    |       |
    | sex   | char(1)        | YES  |     | NULL    |       |
    | obs   | varchar(16000) | YES  |     | NULL    |       |
    +-------+----------------+------+-----+---------+-------+

In [347]:
class Person(Entity):
    __tablename__ = 'persons'

    id = Column(String, ForeignKey('entities.id'), primary_key=True)
    name = Column(String)
    sex = Column(String(1))
    obs = Column(String)

    __mapper_args__ = {
        'polymorphic_identity':'person'
    }

    def __repr__(self):
        sr = super().__repr__()
        f'Person(id={sr}, '
        f'name="{self.name}", '
        f'sex="{self.sex}", '
        f'obs={self.obs}'
        f')'

        
    def __str__(self):
        r = f'{self.groupname}${self.name}/{self.sex}/id={self.id}'
        if self.obs is not None:
                r = (f'{r}  /obs={self.obs}')
        return r

## Objects

    +----------+----------------+------+-----+---------+-------+
    | Field    | Type           | Null | Key | Default | Extra |
    +----------+----------------+------+-----+---------+-------+
    | id       | varchar(64)    | NO   | PRI | NULL    |       |
    | name     | varchar(64)    | YES  | MUL | NULL    |       |
    | the_type | varchar(32)    | YES  |     | NULL    |       |
    | obs      | varchar(16000) | YES  |     | NULL    |       |
    +----------+----------------+------+-----+---------+-------+

In [348]:
class Object(Entity):
    __tablename__ = 'objects'

    id = Column(String, ForeignKey('entities.id'), primary_key=True)
    name = Column(String)
    the_type = Column(String(32))
    obs = Column(String)

    __mapper_args__ = {
        'polymorphic_identity':'object'
    }

    def __repr__(self):
        sr = super().__repr__()
        f'Object(id={sr}, '
        f'name="{self.name}", '
        f'the_type="{self.the_type}", '
        f'obs={self.obs}'
        f')'

    def __str__(self):
        r = f'{self.groupname}${self.name}/{self.the_type}/id={self.id}'
        if self.obs is not None:
                r = (f'{r}  /obs={self.obs}')
        return r


## Acts

    +----------+----------------+------+-----+---------+-------+
    | Field    | Type           | Null | Key | Default | Extra |
    +----------+----------------+------+-----+---------+-------+
    | id       | varchar(64)    | NO   | PRI | NULL    |       |
    | the_type | varchar(32)    | YES  |     | NULL    |       |
    | the_date | varchar(24)    | YES  | MUL | NULL    |       |
    | loc      | varchar(64)    | YES  |     | NULL    |       |
    | ref      | varchar(64)    | YES  |     | NULL    |       |
    | obs      | varchar(16000) | YES  |     | NULL    |       |
    +----------+----------------+------+-----+---------+-------+


In [349]:
class Act(Entity):
    __tablename__ = 'acts'

    id = Column(String, ForeignKey('entities.id'), primary_key=True)
    the_type = Column(String(32))
    the_date = Column(String)
    loc = Column(String)
    ref = Column(String)
    obs = Column(String)

    __mapper_args__ = {
        'polymorphic_identity':'act'
    }


    def __repr__(self):
        sr = super().__repr__()
        return (
            f'Act(id={sr}, '
            f'the_type="{self.the_type}", '
            f'the_date="{self.the_date}", '
            f'local="{self.loc}", '
            f'ref="{self.ref}", '
            f'obs={self.obs}'
            f')'
            )

    def __str__(self):
        r = f'{self.groupname}${self.id}/{self.the_date}/type={self.the_type}/ref={self.ref}/loc={self.loc}'
        if self.obs is not None:
                r = (f'{r}  /obs={self.obs}')
        return r


## Sources

    +-----------+----------------+------+-----+---------+-------+
    | Field     | Type           | Null | Key | Default | Extra |
    +-----------+----------------+------+-----+---------+-------+
    | id        | varchar(64)    | NO   | PRI | NULL    |       |
    | the_type  | varchar(32)    | YES  |     | NULL    |       |
    | the_date  | varchar(24)    | YES  |     | NULL    |       |
    | loc       | varchar(64)    | YES  |     | NULL    |       |
    | ref       | varchar(64)    | YES  |     | NULL    |       |
    | kleiofile | varchar(1024)  | YES  |     | NULL    |       |
    | replaces  | varchar(254)   | YES  |     | NULL    |       |
    | obs       | varchar(16000) | YES  |     | NULL    |       |
    +-----------+----------------+------+-----+---------+-------+

In [350]:
class Source(Entity):
    __tablename__ = 'sources'

    id = Column(String, ForeignKey('entities.id'), primary_key=True)
    the_type = Column(String(32))
    the_date = Column(String)
    loc = Column(String)
    ref = Column(String)
    kleiofile = Column(String)
    replaces = Column(String)
    obs = Column(String)

    __mapper_args__ = {
        'polymorphic_identity':'source'
    }

    def __repr__(self):
        sr = super().__repr__()
        return (
            f'Source(id={sr}, '
            f'the_type="{self.the_type}", '
            f'the_date="{self.the_date}", '
            f'local="{self.loc}", '
            f'ref="{self.ref}", '
            f'kleiofile="{self.kleiofile}", '
            f'replaces="{self.replaces}", '
            f'obs={self.obs}'
            f')'
        )

    def __str__(self):
        r = f'{self.groupname}${self.id}/{self.the_date}/type={self.the_type}/ref={self.ref}/loc={self.loc}/kleiofile={self.kleiofile}/replaces={self.replaces}'
        if self.obs is not None:
                r = (f'{r}  /obs={self.obs}')
        return r

## Classes

    +------------+-------------+------+-----+---------+-------+
    | Field      | Type        | Null | Key | Default | Extra |
    +------------+-------------+------+-----+---------+-------+
    | id         | varchar(64) | NO   | PRI | NULL    |       |
    | table_name | varchar(32) | YES  |     | NULL    |       |
    | group_name | varchar(32) | YES  |     | NULL    |       |
    | super      | varchar(64) | YES  |     | NULL    |       |
    +------------+-------------+------+-----+---------+-------+



In [351]:
from typing import Optional

class PomSomMapper(Entity):
    __tablename__ = 'classes'

    id = Column(String, ForeignKey('entities.id'), primary_key=True)
    table_name = Column(String)
    class_group = Column("group_name",String(32))
    super_class = Column("super", String)

    

    __mapper_args__ = {
        'polymorphic_identity':'class',
        'inherit_condition': id == Entity.id
    }

    @classmethod
    def find_pom_class(cls,session, poly_ident: String):
        """
        Find a pom_class by looking up the POM hierarchy.

        """
        current_class: Optional["PomSomMapper"] = session.query(Entity).get(poly_ident)
        if current_class is None:
            #print("POM class not found")
            return 'entity'
        if poly_ident in Entity.get_pom_classes():
            #print("Returning "+current_class.id)
            return current_class.id
        else:
            #print("Not found, going up the inheritance chain")
            return find_pom_class(session, current_class.super_class)   

    def __repr__(self):
        return (
            f'PomSomMapper(id={self.id}, '
            f'table_name="{self.table_name}", '
            f'class_group="{self.class_group}", '
            f'super_class="{self.super_class}" '
            f')'
        )

    def __str__(self):
        r = f'{self.id} table {self.table_name}\n'
        for cattr in self.class_attributes:
            r = r + f'{cattr.the_class}.{cattr.name} \tclass {cattr.colclass} \tcol {cattr.colname} \ttype {cattr.coltype} size {cattr.colsize} precision {cattr.colprecision} primary key {cattr.pkey} \n'
        return r

In [353]:
C = PomSomMapper.find_pom_class(session,'acta')
print(C)

Not found, going up the inheritance chain
act


In [283]:
from typing import Optional
def find_pom_class(session, poly_ident: String):
    current_class: Optional[PomSomMapper] = session.query(PomSomMapper).get(poly_ident)
    #print(current_class.id)
    if current_class is None:
        return 'entity'
    if poly_ident in Entity.get_pom_classes():
        #print("Returning "+current_class.id)
        return current_class.id
    else:
        #print("Not found, going up the inheritance chain")
        return find_pom_class(session, current_class.super_class)   

In [285]:
C = find_pom_class(session, 'acta')
print(C)

act



## Class attributes

    +--------------+--------------+------+-----+---------+-------+
    | Field        | Type         | Null | Key | Default | Extra |
    +--------------+--------------+------+-----+---------+-------+
    | the_class    | varchar(64)  | YES  | MUL | NULL    |       |
    | name         | varchar(32)  | YES  |     | NULL    |       |
    | colname      | varchar(32)  | YES  |     | NULL    |       |
    | colclass     | varchar(32)  | YES  |     | NULL    |       |
    | coltype      | varchar(32)  | YES  |     | NULL    |       |
    | colsize      | decimal(5,0) | YES  |     | NULL    |       |
    | colprecision | decimal(6,0) | YES  |     | NULL    |       |
    | pkey         | decimal(1,0) | YES  |     | NULL    |       |
    +--------------+--------------+------+-----+---------+-------+


In [278]:
class PomClassAttributes(Base):
    __tablename__ = 'class_attributes'

    the_class = Column(String, ForeignKey('classes.id'),primary_key=True)
    
    pom_class = relationship("PomSomMapper",foreign_keys=[the_class], back_populates='class_attributes')
    name = Column(String(32), primary_key=True)
    colname = Column(String(32))
    colclass = Column(String(32))
    coltype = Column(String)
    colsize = Column(Integer)
    colprecision = Column(Integer)
    pkey = Column(Integer)

PomSomMapper.class_attributes = relationship("PomClassAttributes", back_populates="pom_class")



In [230]:
from timelink.kleio.groups import KGroup

for gn in [(g._name,g) for g in KGroup.all_subclasses()]:
    print(gn)



('kleio', <class 'timelink.kleio.groups.KKleio'>)
('source', <class 'timelink.kleio.groups.KSource'>)
('act', <class 'timelink.kleio.groups.KAct'>)
('person', <class 'timelink.kleio.groups.KPerson'>)
('object', <class 'timelink.kleio.groups.KAbstraction'>)
('object', <class 'timelink.kleio.groups.KObject'>)
('ls', <class 'timelink.kleio.groups.KLs'>)
('atr', <class 'timelink.kleio.groups.KAtr'>)
('attr', <class 'timelink.kleio.groups.KAttribute'>)
('rel', <class 'timelink.kleio.groups.KRelation'>)


## Testing

In [231]:
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()


In [232]:
attribute = session.query(Attribute).where(Entity.pom_class  == 'attribute').first()
print(attribute)
print(repr(attribute))

     ls$residencia/barrosas/16970205/obs=%barrosos. 
Attribute(id=Entity(id="$c51-2-att3-12", pom_class="attribute",inside="$c51-2", the_order=164, the_level=5, the_line=136, groupname="ls", updated=2021-11-01 01:40:25, indexed=1974-04-25 00:00:01,), entity="$c51-2", the_type="residencia", the_value="barrosas", the_date="16970205"", obs=%barrosos. )


In [233]:
entity = attribute.the_entity
print(entity)

pnoivo$domingos andre/m/id=$c51-2


In [234]:
relation = session.query(Relation).where(Relation.pom_class  == 'relation').first()

print(repr(relation))
print(relation)


Relation(id=Entity(id="01705-a5e-rel16-4", pom_class="relation",inside="01705-a5e", the_order=273, the_level=5, the_line=329, groupname="rel", updated=2021-11-01 01:43:51, indexed=1974-04-25 00:00:01,), origin="01705-a5e", destination="1705-a5", the_type="sociabilidade", the_value="foram juntas 'a romaria", the_date="17051005"", obs=None)
rel$sociabilidade/foram juntas 'a romaria/catarina leitao/1705-a5/17051005


In [235]:
relation = session.query(Relation).where(Relation.the_type  == 'parentesco').first()
print(repr(relation))

print(relation.org)
print("   ",relation)
print(relation.dest)

Relation(id=Entity(id="01705-a5e-rel17-4", pom_class="relation",inside="01705-a5e", the_order=274, the_level=5, the_line=331, groupname="rel", updated=2021-11-01 01:43:51, indexed=1974-04-25 00:00:01,), origin="01705-a5e", destination="1705-a5ea", the_type="parentesco", the_value="foi mulher", the_date="17051005"", obs=None)
referida$maria fernandes/f/id=01705-a5e
    rel$parentesco/foi mulher/manuel cordeiro/1705-a5ea/17051005
referido$manuel cordeiro/m/id=1705-a5ea


In [236]:
act = session.query(Act).where(Entity.pom_class  == 'act').first()

print(repr(act))
print(act)

Act(id=Entity(id="1700-142", pom_class="act",inside="casamentos 1700", the_order=2, the_level=2, the_line=6, groupname="cas", updated=2021-11-01 01:41:00, indexed=2021-11-01 01:41:00,), the_type="cas", the_date="17000110", local="luis alvares pinto", ref="igreja matriz", obs=None)
cas$1700-142/17000110/type=cas/ref=igreja matriz/loc=luis alvares pinto


In [181]:
acta = session.query(Act).where(Entity.pom_class  == 'acta').first()

print(repr(acta))
print(acta)

AssertionError: No such polymorphic_identity 'acta' is defined

In [None]:
source = session.query(Source).where(Entity.pom_class  == 'source').first()

print(repr(source))
print(source)

Source(id=Entity(id="1D-8-2-4", pom_class="source",inside="root", the_order=1, the_level=1, the_line=4, groupname="fonte", updated=2021-11-01 01:44:51, indexed=1974-04-25 00:00:01,), the_type="livro de notas", the_date="14031691", local="AUC D-8-2-4", ref="None", kleiofile="/kleio-home/sources/soure-fontes/sources/1685-1720/notariais/1D-8-2-4.cli", replaces="n2.23", obs=legivel)
fonte$1D-8-2-4/14031691/type=livro de notas/ref=None/loc=AUC D-8-2-4/kleiofile=/kleio-home/sources/soure-fontes/sources/1685-1720/notariais/1D-8-2-4.cli/replaces=n2.23  /obs=legivel


In [180]:
pom_classes = session.query(PomSomMapper).where(Entity.pom_class  == 'class').all()
for pom_class in pom_classes:
    print(repr(pom_class))
    print(pom_class)

PomSomMapper(id=act, table_name="acts", class_group="historical-act", super_class="entity" )
act table acts
act.date 	class date 	col the_date 	type varchar size 24 precision 0 primary key 0 
act.id 	class id 	col id 	type varchar size 64 precision 0 primary key 1 
act.loc 	class loc 	col loc 	type varchar size 64 precision 0 primary key 0 
act.obs 	class obs 	col obs 	type varchar size 1024 precision 0 primary key 0 
act.ref 	class ref 	col ref 	type varchar size 64 precision 0 primary key 0 
act.type 	class type 	col the_type 	type varchar size 32 precision 0 primary key 0 

PomSomMapper(id=acta, table_name="actas", class_group="amz", super_class="act" )
acta table actas
acta.ano 	class year 	col the_year 	type numeric size 4 precision 0 primary key 0 
acta.dia 	class day 	col the_day 	type numeric size 2 precision 0 primary key 0 
acta.fol 	class fol 	col fol 	type varchar size 64 precision 0 primary key 0 
acta.id 	class id 	col id 	type varchar size 64 precision 0 primary key 1 
a

In [237]:
print(list(Entity.get_subclasses()))
for aclass in Entity.get_subclasses():
    print(aclass.__mapper_args__['polymorphic_identity'])
print([aclass.__mapper_args__['polymorphic_identity'] for aclass in Entity.get_subclasses()])
print("pom_classes",Entity.get_pom_classes())

[<class '__main__.Attribute'>, <class '__main__.Relation'>, <class '__main__.Person'>, <class '__main__.Object'>, <class '__main__.Act'>, <class '__main__.Source'>, <class '__main__.PomSomMapper'>]
attribute
relation
person
object
act
source
class
['attribute', 'relation', 'person', 'object', 'act', 'source', 'class']
pom_classes ['attribute', 'relation', 'person', 'object', 'act', 'source', 'class']


In [188]:
dir(aclass)

['__abstract__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__mapper__',
 '__mapper_args__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__table__',
 '__tablename__',
 '__weakref__',
 '_sa_class_manager',
 '_sa_registry',
 'attributes',
 'class_attributes',
 'class_group',
 'get_subclasses',
 'groupname',
 'id',
 'indexed',
 'inside',
 'metadata',
 'pom_class',
 'registry',
 'rels_in',
 'rels_out',
 'super_class',
 'table_name',
 'the_level',
 'the_line',
 'the_order',
 'updated']

In [63]:
from sqlalchemy import select
from sqlalchemy.orm import Session

stmt = select(Person).first()
print(stmt)
with Session(engine) as session:
    for row in session.execute(stmt):
        entity: Entity=row[0]
        print(f'{entity.groupname}${entity.name}/id={entity.id}#{entity.pom_class}')
        for attribute in entity.attributes:
            print(f'     ls${attribute.the_type}/{attribute.the_value}/{attribute.the_date}',end='')
            if attribute.obs is not None:
                print(f'/obs={attribute.obs}')
            else:
                print()
        if len(entity.rels_out)>0:
            for rel in entity.rels_out:
                print(f'>{rel}')
        if len(entity.rels_in)>0:
            for rel in entity.rels_in:
                    print(rel,'<')               


SELECT entities.class, persons.id, entities.id AS id_1, entities.inside, entities.the_order, entities.the_level, entities.the_line, entities.groupname, entities.updated, entities.indexed, persons.name, persons.sex, persons.obs 
FROM entities JOIN persons ON entities.id = persons.id 
WHERE persons.id = :id_2
n$Arnaldo Henriques de Abreu/id=140625#person
     ls$código-de-referência/"PT/AUC/ELU/UC-AUC/B/001-001/A/000336"/20200211
     ls$data-do-registo/2018-04-19 14:33:58/20200211
     ls$url/"https://pesquisa.auc.uc.pt/details?id=140625"/20200211
     ls$uc.entrada/1835-10-17/1835-10-17
     ls$uc.saida/1838-10-09/1838-10-09
     ls$uc.entrada.ano/1835/1835-10-17
     ls$uc.saida.ano/1838/1838-10-09
     ls$nome/Arnaldo Henriques de Abreu/1835-10-17
     ls$nome.primeiro/Arnaldo/1835-10-17
     ls$nome.apelido/Henriques de Abreu/1835-10-17
     ls$nome.apelido/Abreu/1835-10-17
     ls$naturalidade/Porto/1835-10-17
     ls$naturalidade.ano/Porto.1835/1835-10-17
     ls$nome-geografico/P

## Source files

In [37]:
from pathlib import Path

kleio_files = [f.stem for f in list(Path('../sources').rglob('*.cli'))]
print("Number of kleio_files:", len(kleio_files))

imported_sources = %sql select sources.id, sources.kleiofile, entities.updated from sources left join entities on (sources.id=entities.id) order by updated desc 
sources_in_db = [s.id for s in imported_sources]
print("Number of imported files:",len(sources_in_db))

print("Files not in the database:", set(kleio_files)-set(sources_in_db))
print("Imported sources no file found:", set(sources_in_db)-set(kleio_files))



Number of kleio_files: 235
235 rows affected.
Number of imported files: 235
Files not in the database: set()
Imported sources no file found: set()


## Analyse attributes extracted from records

### Attributes in the database

In [8]:
nml = %sql select the_type, count(*) as tot from attributes group by the_type
for the_type, tot in nml:
    print(f'{tot:6} | {the_type}')

28 rows affected.
105300 | código-de-referência
   275 | colegio
105300 | data-do-registo
   186 | ec
 53627 | exame
 94164 | faculdade
 94164 | faculdade.ano
106372 | grau
106372 | grau.ano
319291 | matricula
319291 | matricula.ano
 98904 | naturalidade
 98904 | naturalidade.ano
105300 | nome
107486 | nome-geografico
101392 | nome-geografico.ano
198660 | nome.apelido
105300 | nome.primeiro
  8155 | nome.vide
  9975 | nota
  3142 | ordem-religiosa
  7036 | padre
  4170 | titulo
105300 | uc.entrada
 99014 | uc.entrada.ano
105300 | uc.saida
 99281 | uc.saida.ano
105300 | url


In [11]:
p1540 = %sql select id, name, pobs from nattributes where the_type='uc.entrada' and the_value like '1540%'
for id, name, pobs in p1540:
    print(f'{id:5} | {name} \n {pobs}\n\n')

473 rows affected.


ValueError: Unknown format code 'r' for object of type 'str'