In [18]:
import psycopg2
#import fileinput
from lib.database import Database

#p is a predicate
def get_rule(p):    
    timestamp_query = """ SELECT yagoA.id, yagoA.subject, yagoA.predicate, yagoA.object, yagoB.predicate, yagoB.object
                            FROM yagofacts yagoA
                                 JOIN yagofacts yagoB ON yagoA.object = yagoB.subject
                            where yagoA.predicate = %s
                            AND yagoB.predicate IN (select subject
                                                        from yagofacts sub
                                                        where sub.predicate = 'rdfs:subPropertyOf' and 
                                                        sub.object = '<startsExistingOnDate>')
                            ORDER BY yagoA.subject
                            """
    
    if p in ['actedIn', 'directed', 'wroteMusicFor', 'created', 'participatedIn', 'hasChild']:
        return timestamp_query
    elif p in ['worksAt', 'isPoliticianOf', 'graduatedFrom']:
         return 'interval_time'
    else:
        return 0

#rule can be a sql query or a function's name
def apply_rule(rule, predicate):
    if rule == 'interval_time':
        return interval_time(predicate)
    
    conn = None
    try:
        conn = Database.connect_dbs()
        cur = conn.cursor()
        cur.execute(rule, (predicate, ))
        data = cur.fetchall()      
        
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
    
    return data

## Interval Time

In [19]:
import re
import psycopg2
import datetime
from datetime import timedelta
from utils import *
from lib.database import Database

def valid_after(data, bday):
    founding_date = None
    if ','.join(data).find('<wasCreatedOnDate>') > 0:
        founding_date = strtodate(parse_date(','.join(data)))
    #18 years aprox 6570 days
    legal_age = bday + timedelta(days=6570)
    #TODO: verify other cases
    if (bday is not None) and (founding_date is not None) and (founding_date >= legal_age):
        return founding_date        
    else:
        return legal_age #bday

def valid_before(data, dday):
    closing_date = None
    if ','.join(data).find('<wasDestroyedOnDate>') > 0:
        closing_date = strtodate(parse_date(','.join(data)))
    
    if (dday is None) and (closing_date is None):
        return None
    elif (dday is not None) and (closing_date is not None) and dday >= closing_date:
        return closing_date
    elif (dday is None) and (closing_date is not None):
        return closing_date
    else:
        return dday

def interval_time(predicate):    
    
    query = """SELECT yagoA.id, yagoA.subject, yagoA.predicate, yagoA.object, yagoB.predicate, yagoB.object
                FROM yagofacts yagoA
                JOIN yagofacts yagoB ON yagoA.object = yagoB.subject
                where yagoA.predicate = %s
                AND yagoB.predicate IN ('<wasCreatedOnDate>', '<wasDestroyedOnDate>')
                ORDER BY yagoA.subject"""

    qsubj = "select * from yagofacts where subject = %s and predicate in ('<wasBornOnDate>', '<diedOnDate>')"
    conn = None
    content = []
    try:
        conn = Database.connect_dbs()
        cur = conn.cursor()  
        cursb = conn.cursor()
        
        cur.execute(query, (predicate, ))
        data = cur.fetchall()
        for row in data:
            validAfter, validBefore = None, None
            cursb.execute(qsubj, (row[1], ))
            person = cursb.fetchall()
            
            if len(person)<=0: continue            
            for p in person:
                if strtodate(parse_date(p[3])) is None: continue  
                if p[2] == '<wasBornOnDate>' and ','.join(row).find('<wasCreatedOnDate>') > 0:
                    validAfter = valid_after(row, strtodate(parse_date(p[3])))
                elif p[2] == '<wasBornOnDate>' and ','.join(row).find('<wasDestroyedOnDate>') > 0:
                        closing_day = strtodate(parse_date(','.join(row)))                            
                        validBefore = valid_before(row, closing_day)
                else:
                    validBefore = valid_before(row, strtodate(parse_date(p[3])))
                
                predicate = re.sub('[<>]', '', predicate.strip())
                info = {
                    'name': p[1], 
                    'predicate': predicate,
                    'company': row[3],
                    'after': "validAfter",
                    'validAfter': validAfter,
                    'before': "validBefore",
                    'validBefore': validBefore
                }
                # Organize records to compose <s,p,o> after d1 before d2                 
                if len(content) > 0:
                    for c in content:
                        if info.get('company') in c.get('company') and info.get('name') in c.get('name'):
                            if info.get('validAfter') is None:
                                info['validAfter'] = c.get('validAfter')
                            if info.get('validBefore') is None:
                                info['validBefore'] = c.get('validBefore')

                            #Remove current dictionary from list
                            content.remove(c)
                            break
                
                content.append(info)
                print(info.get('name'))
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
    return content

In [16]:
import json
import csv 
import os
import fileinput
from utils import *

if __name__ == '__main__':    
    
    predicates = ['actedIn', 'directed', 'wroteMusicFor', 'created', 'participatedIn', 'hasChild', 'worksAt', 'isPoliticianOf', 'graduatedFrom']
    file_name = ""
    count = 0
    for p in predicates:
        rule = get_rule(p)
        file_name = p
        p = '<'+p+'>'
        if rule:      
            results = apply_rule(rule, p)
            if isinstance(results[0], tuple): 
                file = open("outputs/"+file_name+".txt", "w+")
                for result in results:
                    file.write(str(result)+"\n")
                file.close()
            else:
                with open("outputs/"+file_name+".csv", "w+") as f:
                    w = csv.writer(f, delimiter = '\t')
                    for info in results:
                        tmp = []
                        for item in info.items():
                            tmp.append(converttostr(item[1]))
                        w.writerow(tmp)
                f.close()
            
        if p in ['<actedIn>', '<directed>', '<wroteMusicFor>', '<created>', '<participatedIn>', '<hasChild>']:
            with fileinput.FileInput("outputs/"+file_name+".txt", inplace=True, backup='.bak') as file:
                for line in file:
                    print(line.replace("wasCreatedOnDate", "validOnDate"), end='') #TODO: Replace validOnDate by inDateTime
    
    #Evaluation- in progress