## Init Relational Database

In [1]:
import os

yago_db_file_path = 'yago.db'
if os.path.isfile(yago_db_file_path):
    os.remove(yago_db_file_path)

In [2]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

engine = create_engine('sqlite:///yago.db')
DBSession = sessionmaker(bind=engine)
db_session = DBSession()

Define tables:

In [3]:
from sqlalchemy import Column, String, Integer, ForeignKey, CHAR, Boolean, and_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm.exc import FlushError
import sqlite3

Base = declarative_base()

In [4]:
class Country(Base):
    __tablename__ = 'country'
    
    country_id = Column(String(64), primary_key=True)
    
    def __init__(self, country_id):
        self.country_id = country_id

In [5]:
class Politician(Base):
    __tablename__ = 'politician'
    
    politician_id = Column(String(64), primary_key=True)
    family_name = Column(String(64))
    given_name = Column(String(64))
    gender = Column(String(8))
    labels = relationship('PoliticianLabel', back_populates='politician')
    
    def __init__(self, politician_id):
        self.politician_id = politician_id

In [6]:
class PoliticianLabel(Base):
    __tablename__ = 'politician_label'
    
    politician_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    politician = relationship("Politician", back_populates='labels')
    label = Column(String(256), primary_key=True)
    lang = Column(String(4), primary_key=True)
    is_translated = Column(Boolean, default=False)
    
    def __init__(self, politician, label, lang, is_translated=False):
        self.politician = politician
        self.label = label
        self.lang = lang
        self.is_translated = is_translated

In [7]:
class PresidentOfCountry(Base):
    __tablename__ = 'president_of_country'
    
    politician_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    politician = relationship(Politician)
    country_id = Column(String(64), ForeignKey('country.country_id'), primary_key=True)
    country = relationship(Country)
    
    def __init__(self, politician, country):
        self.politician = politician
        self.country = country

In [8]:
class PoliticalParty(Base):
    __tablename__ = 'political_party'
    
    party_id = Column(String(128), primary_key=True)
    labels = relationship('PoliticalPartyLabel', back_populates='party')
    
    def __init__(self, party_id):
        self.party_id = party_id

In [9]:
class PoliticalPartyLabel(Base):
    __tablename__ = 'political_party_label'
    
    party_id = Column(String(128), ForeignKey('political_party.party_id'), primary_key=True)
    party = relationship("PoliticalParty", back_populates='labels')
    label = Column(String(256), primary_key=True)
    lang = Column(String(4), primary_key=True)
    is_translated = Column(Boolean, default=False)
    
    def __init__(self, party, label, lang, is_translated=False):
        self.party = party
        self.label = label
        self.lang = lang
        self.is_translated = is_translated

In [10]:
class PoliticalPartyAffiliation(Base):
    __tablename__ = 'political_party_affiliation'

    politician_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    politician = relationship(Politician)
    party_id = Column(String(64), ForeignKey('political_party.party_id'), primary_key=True)
    party = relationship(PoliticalParty)
    
    def __init__(self, politician, party):
        self.politician = politician
        self.party = party

In [11]:
class War(Base):
    __tablename__ = 'war'
    
    war_id = Column(String(128), primary_key=True)
    labels = relationship('WarLabel', back_populates='war')
    
    def __init__(self, war_id):
        self.war_id = war_id

In [12]:
class WarLabel(Base):
    __tablename__ = 'war_label'
    
    war_id = Column(String(128), ForeignKey('war.war_id'), primary_key=True)
    war = relationship("War", back_populates='labels')
    label = Column(String(256), primary_key=True)
    lang = Column(String(4), primary_key=True)
    is_translated = Column(Boolean, default=False)
    
    def __init__(self, war, label, lang, is_translated=False):
        self.war = war
        self.label = label
        self.lang = lang
        self.is_translated = is_translated

In [13]:
class CountryDealsWith(Base):
    __tablename__ = 'country_deals_with'

    first_country_id = Column(String(64), ForeignKey('country.country_id'), primary_key=True)
    first_country = relationship(Country, foreign_keys=[first_country_id])
    second_country_id = Column(String(64), ForeignKey('country.country_id'), primary_key=True)
    second_country = relationship(Country, foreign_keys=[second_country_id])
    
    def __init__(self, first_country, second_country):
        self.first_country = first_country
        self.second_country = second_country

In [14]:
class HasChild(Base):
    __tablename__ = 'has_child'

    parent_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    parent = relationship(Politician, foreign_keys=[parent_id])
    child_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    child = relationship(Politician, foreign_keys=[child_id])
    
    def __init__(self, parent, child):
        self.parent = parent
        self.child = child

In [15]:
class PoliticianSuicide(Base):
    __tablename__ = 'politician_suicide'

    politician_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    politician = relationship(Politician)
    
    def __init__(self, politician):
        self.politician = politician

In [16]:
class Activist(Base):
    __tablename__ = 'activist'

    activist_id = Column(String(64), primary_key=True)
    labels = relationship('ActivistLabel', back_populates='activist')
    
    def __init__(self, activist_id):
        self.activist_id = activist_id

In [17]:
class ActivistLabel(Base):
    __tablename__ = 'activist_label'

    activist_id = Column(String(64), ForeignKey('activist.activist_id'), primary_key=True)
    activist = relationship("Activist", back_populates='labels')
    label = Column(String(256), primary_key=True)
    lang = Column(String(4), primary_key=True)
    is_translated = Column(Boolean, default=False)
    
    def __init__(self, activist, label, lang, is_translated=False):
        self.activist = activist
        self.label = label
        self.lang = lang
        self.is_translated = is_translated

In [18]:
class Ambassador(Base):
    __tablename__ = 'ambassador'

    ambassador_id = Column(String(64), primary_key=True)
    country_id = Column(String(64), ForeignKey('country.country_id'))
    country = relationship(Country, foreign_keys=[country_id])
    labels = relationship('AmbassadorLabel', back_populates='ambassador')
    
    def __init__(self, ambassador_id, country):
        self.ambassador_id = ambassador_id
        self.country = country

In [19]:
class AmbassadorLabel(Base):
    __tablename__ = 'ambassador_label'

    ambassador_id = Column(String(64), ForeignKey('ambassador.ambassador_id'), primary_key=True)
    ambassador = relationship("Ambassador", back_populates='labels')
    label = Column(String(256), primary_key=True)
    lang = Column(String(4), primary_key=True)
    is_translated = Column(Boolean, default=False)
    
    def __init__(self, ambassador, label, lang, is_translated=False):
        self.ambassador = ambassador
        self.label = label
        self.lang = lang
        self.is_translated = is_translated

In [20]:
class ParliamentRepresentative(Base):
    __tablename__ = 'parliament_representative'

    politician_id = Column(String(64), ForeignKey('politician.politician_id'), primary_key=True)
    politician = relationship(Politician)
    country_id = Column(String(64), ForeignKey('country.country_id'), primary_key=True)
    country = relationship(Country)

    
    def __init__(self, politician, country):
        self.politician = politician
        self.country = country

In [21]:
Base.metadata.create_all(engine)

## Label Extraction Tools

In [22]:
def decompose_label(label):
    decomposed_label = label.split('@')
    if len(decomposed_label) == 1:
        return decomposed_label[0], None
    else:
        return decomposed_label[0], decomposed_label[1]

In [23]:
import re

persian_char_regex = r'^[\u0600-\u06FF\uFB8A\u067E\u0686\u06AF\u200C\u200F ]+$'
persian_char_pattern = re.compile(persian_char_regex)

def fix_yago_label_lang(label, lang):
    # Some persian labels have @eng in the end
    if lang == 'eng' or lang is None:
        if persian_char_pattern.match(label):
            return 'fas'
        
    if lang is not None:
        return lang
    return ''
    

In [24]:
from google.cloud import translate

translate_client = translate.Client()

def translate_label(label, target_lang='fa'):
    return translate_client.translate(label, target_language=target_lang)['translatedText']

## Add data to database

In [25]:
from neo4j.v1 import GraphDatabase

neo_driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "123"))
neo_session = neo_driver.session()

In [26]:
query_limit = '' # 'LIMIT 500'

Populate tables:

In [27]:
from itertools import chain

def add_iran_deals(tx):
    query_iran_first = tx.run("MATCH (first:Entity)-[:dealsWith]->(second:Entity) "
                              "WHERE first.name = '<Iran>'"
                              "RETURN first.name, second.name {}".format(query_limit))
    query_iran_second = tx.run("MATCH (first:Entity)-[:dealsWith]->(second:Entity) "
                               "WHERE second.name = '<Iran>'"
                               "RETURN first.name, second.name {}".format(query_limit))
    for record in chain(query_iran_first, query_iran_second):
        first_country_name = record['first.name']
        second_country_name = record['second.name']

        first_country = db_session.query(Country).filter(Country.country_id == first_country_name).first()
        second_country = db_session.query(Country).filter(Country.country_id == second_country_name).first()
        
        if first_country is not None and second_country is not None:
            continue
        
        if first_country is None:
            first_country = Country(first_country_name)
            db_session.add(first_country)
            
        if second_country is None:
            second_country = Country(second_country_name)
            db_session.add(second_country)
            
        deal = CountryDealsWith(first_country, second_country)
        db_session.add(deal)

neo_session.read_transaction(add_iran_deals)

In [28]:
from itertools import chain

def add_iranian_politicians(tx):
    query_by_type = tx.run("MATCH (politician:Entity)-[:type]->(polType:Entity) "
                           "MATCH (politician)-[:label]->(polLabel:Label) "
                           "WHERE polType.name = \"<wikicat_Iranian_politicians>\" "
                           "RETURN politician.name, polLabel.name {}".format(query_limit))
    query_by_is_politician = tx.run("MATCH (politician:Entity)-[:isPoliticianOf]->(country:Entity) "
                                    "MATCH (politician)-[:label]->(polLabel:Label) "
                                    "WHERE country.name = \"<Iran>\" "
                                    "RETURN politician.name, polLabel.name {}".format(query_limit))
    query_majlis_representatives = tx.run("MATCH (politician:Entity)-[:type]->(polType:Entity) "
                                          "MATCH (politician)-[:label]->(polLabel:Label) "
                                          "WHERE polType.name = \"<wikicat_Iranian_Majlis_Representatives>\" "
                                          "RETURN politician.name, polLabel.name {}".format(query_limit))

    for record in chain(query_by_type, query_by_is_politician, query_majlis_representatives):
        politician_name = record['politician.name']
  
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            politician = Politician(politician_name)
            db_session.add(politician)
        
        label, lang = decompose_label(record['polLabel.name'])
        fixed_lang = fix_yago_label_lang(label, lang)
        
        if db_session.query(PoliticianLabel) \
                .filter(and_(PoliticianLabel.politician_id == politician_name,
                       PoliticianLabel.label == label,
                       PoliticianLabel.lang == fixed_lang)).one_or_none() is None:
            db_session.add(PoliticianLabel(politician, label, fixed_lang))

neo_session.read_transaction(add_iranian_politicians)

In [29]:
from itertools import chain

def add_iranian_politicians_name(tx):
    query_by_type = tx.run("MATCH (politician:Entity)-[:type]->(polType:Entity {name: '<wikicat_Iranian_politicians>'}) "
                           "WITH politician AS person "
                           "MATCH (person)-[:hasGivenName]->(givenName:Label) "
                           "MATCH (person)-[:hasFamilyName]->(familyName:Label) "
                           "RETURN person.name, givenName.name, familyName.name")
    query_by_is_politician = tx.run("MATCH (politician:Entity)-[:isPoliticianOf]->(country:Entity {name: '<Iran>'}) "
                                    "WITH politician AS person "
                                    "MATCH (person)-[:hasGivenName]->(givenName:Label) "
                                    "MATCH (person)-[:hasFamilyName]->(familyName:Label) "
                                    "RETURN person.name, givenName.name, familyName.name")
    query_majlis_representatives = tx.run("MATCH (politician:Entity)-[:type]->(polType:Entity {name: '<wikicat_Iranian_Majlis_Representatives>'}) "
                           "WITH politician AS person "
                           "MATCH (person)-[:hasGivenName]->(givenName:Label) "
                           "MATCH (person)-[:hasFamilyName]->(familyName:Label) "
                           "RETURN person.name, givenName.name, familyName.name")


    for record in chain(query_by_type, query_by_is_politician, query_majlis_representatives):
        politician_name = record['person.name']
        givenName_name = record['givenName.name']
        familyName_name = record['familyName.name']
        
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            continue

        politician.given_name = givenName_name
        politician.family_name = familyName_name
        db_session.add(politician)

neo_session.read_transaction(add_iranian_politicians_name)

In [30]:
from itertools import chain

def add_iranian_politicians_gender(tx):
    query_by_type = tx.run("MATCH (politician:Entity)-[:type]->(polType:Entity {name: '<wikicat_Iranian_politicians>'}) "
                           "WITH politician AS person "
                           "MATCH (person)-[:hasGender]->(gender:Entity) "
                           "RETURN person.name, gender.name")
    query_by_is_politician = tx.run("MATCH (politician:Entity)-[:isPoliticianOf]->(country:Entity {name: '<Iran>'}) "
                                    "WITH politician AS person "
                                    "MATCH (person)-[:hasGender]->(gender:Entity) "
                                    "RETURN person.name, gender.name")

    for record in chain(query_by_type, query_by_is_politician):
        politician_name = record['person.name']
        gender_name = record['gender.name'][1:-1]
        
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            continue

        politician.gender = gender_name
        db_session.add(politician)

neo_session.read_transaction(add_iranian_politicians_gender)

In [31]:
from itertools import chain

def add_politicians_children(tx):
    query_by_type = tx.run("MATCH (politician:Entity)-[:type]->(:Entity {name: '<wikicat_Iranian_politicians>'}) "
                           "WITH politician AS parent "
                           "MATCH (parent)-[:hasChild]->(child:Entity) "
                           "RETURN parent.name, child.name")
    query_by_is_politician = tx.run("MATCH (politician:Entity)-[:isPoliticianOf]->(:Entity {name: '<Iran>'}) "
                                    "WITH politician AS parent "
                                    "MATCH (parent)-[:hasChild]->(child:Entity) "
                                    "RETURN parent.name, child.name")

    for record in chain(query_by_type, query_by_is_politician):
        parent_name = record['parent.name']
        child_name = record['child.name']
        
        parent = db_session.query(Politician).filter(Politician.politician_id == parent_name).first()
        child = db_session.query(Politician).filter(Politician.politician_id == child_name).first()
        if parent is None or child is None:
            continue

        has_child = db_session.query(HasChild).filter(HasChild.parent_id == parent_name and HasChild.child_id == child_name).first()
        if has_child is not None:
            continue
        relationship = HasChild(parent, child)
        db_session.add(relationship)

neo_session.read_transaction(add_politicians_children)

In [32]:
def add_wikicat_presidents_of_iran(tx):
    iran = db_session.query(Country).filter(Country.country_id == '<Iran>').first()
    
    for record in tx.run("MATCH (politician:Entity)-[:type]->(polType:Entity) "
                         "WHERE polType.name = \"<wikicat_Presidents_of_Iran>\" "
                         "RETURN politician.name {}".format(query_limit)):
        politician_name = record['politician.name']
        
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            politician = Politician(politician_name)
            db_session.add(politician)
        
        president_of_iran = PresidentOfCountry(politician, iran)
        db_session.add(president_of_iran)

neo_session.read_transaction(add_wikicat_presidents_of_iran)

In [33]:
def add_wikicat_political_parties_in_iran(tx):
    for record in tx.run("MATCH (party:Entity)-[:type]->(partyType:Entity) "
                         "MATCH (party)-[:label]->(partyLabel:Label) "
                         "MATCH (politician:Entity)-[:isAffiliatedTo]->(party) "
                         "WHERE partyType.name = \"<wikicat_Political_parties_in_Iran>\" "
                         "RETURN party.name, politician.name, partyLabel.name {}".format(query_limit)):
        party_name = record['party.name']
        politician_name = record['politician.name']

        party = db_session.query(PoliticalParty).filter(PoliticalParty.party_id == party_name).first()
        if party is None:
            party = PoliticalParty(party_name)
            db_session.add(party)
        
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            politician = Politician(politician_name)
            db_session.add(politician)
        
        if db_session.query(PoliticalPartyAffiliation) \
                .filter(and_(PoliticalPartyAffiliation.politician_id == politician_name,
                             PoliticalPartyAffiliation.party_id == party_name)).one_or_none() is None:
            affiliation = PoliticalPartyAffiliation(politician, party)
            db_session.add(affiliation)
        
        label, lang = decompose_label(record['partyLabel.name'])
        fixed_lang = fix_yago_label_lang(label, lang)
        
        if db_session.query(PoliticalPartyLabel) \
                .filter(and_(PoliticalPartyLabel.party_id == party_name,
                       PoliticalPartyLabel.label == label,
                       PoliticalPartyLabel.lang == fixed_lang)).one_or_none() is None:
            db_session.add(PoliticalPartyLabel(party, label, fixed_lang))

neo_session.read_transaction(add_wikicat_political_parties_in_iran)

In [34]:
def add_wikicat_wars_involving_iran(tx):    
    for record in tx.run("MATCH (war:Entity)-[:type]->(warType:Entity) "
                         "MATCH (war)-[:label]->(warLabel:Label) "
                         "WHERE warType.name = \"<wikicat_Wars_involving_Iran>\" "
                         "RETURN war.name, warLabel.name {}".format(query_limit)):
        war_name = record['war.name']

        war = db_session.query(War).filter(War.war_id == war_name).first()
        if war is None:
            war = War(war_name)
            db_session.add(war)
            
        label, lang = decompose_label(record['warLabel.name'])
        fixed_lang = fix_yago_label_lang(label, lang)
        
        if db_session.query(WarLabel) \
                .filter(and_(WarLabel.war_id == war_name,
                       WarLabel.label == label,
                       WarLabel.lang == fixed_lang)).one_or_none() is None:
            db_session.add(WarLabel(war, label, fixed_lang))


neo_session.read_transaction(add_wikicat_wars_involving_iran)

In [35]:
def add_suicides(tx):
    for record in tx.run("MATCH (politician:Entity)-[:type]->(:Entity {name: '<wikicat_Iranian_politicians_who_committed_suicide>'})"
                         "RETURN politician.name"):
        politician_name = record['politician.name']
        
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            politician = Politician(politician_name)
            db_session.add(politician)
        
        politician_suicide = PoliticianSuicide(politician)
        db_session.add(politician_suicide)

neo_session.read_transaction(add_suicides)

In [36]:
def add_activist(tx):
    for record in tx.run("MATCH (activist:Entity)-[:type]->(:Entity {name: '<wikicat_Iranian_activists>'})"
                         "MATCH (activist)-[:label]->(activistLabel:Label) "
                         "RETURN activist.name, activistLabel.name"):
        activist_name = record['activist.name']
        
        activist = db_session.query(Activist).filter(Activist.activist_id == activist_name).first()
        if activist is None:
            activist = Activist(activist_name)
            db_session.add(activist)
            
        label, lang = decompose_label(record['activistLabel.name'])
        fixed_lang = fix_yago_label_lang(label, lang)
        
        if db_session.query(ActivistLabel) \
                .filter(and_(ActivistLabel.activist_id == activist_name,
                       ActivistLabel.label == label,
                       ActivistLabel.lang == fixed_lang)).one_or_none() is None:
            db_session.add(ActivistLabel(activist, label, fixed_lang))


neo_session.read_transaction(add_activist)

In [37]:
def add_ambassador_to_iran(tx):
    for record in tx.run("MATCH (ambassador:Entity)-[:type]-(:Entity {name: '<wikicat_Ambassadors_to_Iran>'}) "
                         "RETURN ambassador.name"):
        ambassador_name = record['ambassador.name']

        ambassador = db_session.query(Ambassador).filter(Ambassador.ambassador_id == ambassador_name).first()
        if ambassador is not None:
            continue
        ambassador = Ambassador(ambassador_name)
        db_session.add(ambassador)
        
def add_ambassador_of_iran(tx):
    iran = db_session.query(Country).filter(Country.country_id == "<Iran>").first()
    
    for record in tx.run("MATCH (ambassador:Entity)-[:type]-(:Entity {name: '<wikicat_Ambassadors_of_Iran>'}) "
                         "MATCH (ambassador)-[:label]->(ambassadorLabel:Label) "
                         "RETURN ambassador.name, ambassadorLabel.name"):
        ambassador_name = record['ambassador.name']

        ambassador = db_session.query(Ambassador).filter(Ambassador.ambassador_id == ambassador_name).first()
        if ambassador is not None:
            continue
        ambassador = Ambassador(ambassador_name, iran)
        db_session.add(ambassador)
        
        label, lang = decompose_label(record['ambassadorLabel.name'])
        fixed_lang = fix_yago_label_lang(label, lang)
        
        if db_session.query(AmbassadorLabel) \
                .filter(and_(AmbassadorLabel.ambassador_id == ambassador_name,
                       AmbassadorLabel.label == label,
                       AmbassadorLabel.lang == fixed_lang)).one_or_none() is None:
            db_session.add(AmbassadorLabel(ambassador, label, fixed_lang))


# neo_session.read_transaction(add_ambassador_to_iran)
neo_session.read_transaction(add_ambassador_of_iran)

In [38]:
def add_iran_parliament_representative(tx):
    iran = db_session.query(Country).filter(Country.country_id == '<Iran>').first()
    
    for record in tx.run("MATCH (politician:Entity)-[:type]->(:Entity {name: '<wikicat_Iranian_Majlis_Representatives>'}) "
                         "RETURN politician.name"):
        politician_name = record['politician.name']
        
        politician = db_session.query(Politician).filter(Politician.politician_id == politician_name).first()
        if politician is None:
            continue
        
        representative = ParliamentRepresentative(politician, iran)
        db_session.add(representative)

neo_session.read_transaction(add_iran_parliament_representative)

In [39]:
def entities_without_label(entity_type, lang='fas'):
    entities = []
    for entity in db_session.query(entity_type).all():
        if lang not in {label.lang for label in entity.labels}:
            entities.append(entity)
                
    return entities

In [40]:
def add_persian_labels(entity_type, label_type):
    entities = entities_without_label(entity_type, 'fas')
    print("[before translate] {} without Persian label: {}".format(entity_type.__name__, len(entities)))
    
    for entity in entities:    
        eng_labels = [label for label in entity.labels if label.lang == 'eng']
        if eng_labels:
            db_session.add(label_type(entity, translate_label(eng_labels[0].label), 'fas', True))
        elif entity.labels:
            db_session.add(label_type(entity, translate_label(entity.labels[0].label), 'fas', True))
            
    print("[after translate] {} without Persian label: {}".format(entity_type.__name__,
                                                                  len(entities_without_label(entity_type))))

In [41]:
add_persian_labels(Politician, PoliticianLabel)

add_persian_labels(Activist, ActivistLabel)

add_persian_labels(PoliticalParty, PoliticalPartyLabel)

add_persian_labels(War, WarLabel)

add_persian_labels(Ambassador, AmbassadorLabel)

[before translate] Politician without Persian label: 163
[after translate] Politician without Persian label: 39
[before translate] Activist without Persian label: 21
[after translate] Activist without Persian label: 0
[before translate] PoliticalParty without Persian label: 1
[after translate] PoliticalParty without Persian label: 0
[before translate] War without Persian label: 13
[after translate] War without Persian label: 0
[before translate] Ambassador without Persian label: 31
[after translate] Ambassador without Persian label: 0


In [42]:
db_session.commit()

In [43]:
db_session.close()