In [7]:
import csv

with open("company_main_info.csv", newline="", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    print(reader.fieldnames)  # <-- This shows all headers exactly
    for row in reader:
        print(row['registration_number'], row['company_name'])
        break  # print only first row


['registration_number', 'company_name', 'registration_date', 'legal_form', 'profit_type']
3199665 Ташир агро фийд


In [11]:
import re

def sanitize_uri(s):
    # Replace spaces with underscores
    s = s.replace(" ", "_")
    # Remove or replace illegal characters
    s = re.sub(r'[^a-zA-Z0-9_-]', '', s)
    return s


In [8]:
import csv
from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDF, XSD, RDFS

kg = Namespace("http://example.org/mongolia-kg#")

g = Graph()
g.bind("kg", kg)

with open("company_main_info.csv", newline="", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for row in reader:
        le = kg[f"LE_{row['registration_number']}"]

        g.add((le, RDF.type, kg.LimitedLiabilityCompany))
        g.add((le, RDF.type, kg.ForProfitEntity))
        g.add((le, kg.registrationNumber, Literal(row["registration_number"])))
        g.add((le, kg.registeredName, Literal(row["company_name"])))
        g.add((le, kg.registrationDate, Literal(row["registration_date"].replace('.', '-'), datatype=XSD.date)))
        g.add((le, RDFS.label, Literal(row["company_name"])))

g.serialize("company_main_info.ttl", format="turtle")


<Graph identifier=Nfb9e42b190e747949f05cc58d740ef65 (<class 'rdflib.graph.Graph'>)>

In [12]:
person_id = f"{row['registration_number']}_{sanitize_uri(row['name'])}"
person = kg[f"Person_{person_id}"]
shareholders_info

In [14]:
import csv
import re
from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDF, XSD, RDFS

def sanitize_uri(s):
    s = s.replace(" ", "_")
    s = re.sub(r'[^a-zA-Z0-9_-]', '', s)
    return s

kg = Namespace("http://example.org/mongolia-kg#")
g = Graph()
g.bind("kg", kg)

with open("shareholders_info.csv", newline="", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for row in reader:
        person_id = f"{row['registration_number']}_{sanitize_uri(row['name'])}"
        person = kg[f"Person_{person_id}"]

        g.add((person, RDF.type, kg.Person))
        g.add((person, kg.classification, Literal(row["classification"])))
        g.add((person, kg.country, Literal(row["country"])))
        g.add((person, kg.parentName, Literal(row["parent_name"])))
        g.add((person, kg.name, Literal(row["name"])))
        g.add((person, kg.gender, Literal(row["gender"])))
        g.add((person, kg.registrationDate, Literal(row["registration_date"], datatype=XSD.date)))
        g.add((person, RDFS.label, Literal(row["name"])))

g.serialize("shareholders_info.ttl", format="turtle")


<Graph identifier=Nd018fe276bee4f9a9222dd33cc08ae5a (<class 'rdflib.graph.Graph'>)>

In [15]:
import csv
import re
from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDF, XSD, RDFS

def sanitize_uri(s):
    s = s.replace(" ", "_")
    s = re.sub(r'[^a-zA-Z0-9_-]', '', s)
    return s

kg = Namespace("http://example.org/mongolia-kg#")
g = Graph()
g.bind("kg", kg)

with open("authorized_reps.csv", newline="", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for row in reader:
        # Unique Person IRI
        person_id = f"{row['registration_number']}_{sanitize_uri(row['name'])}"
        person = kg[f"Person_{person_id}"]

        # Add person info
        g.add((person, RDF.type, kg.Person))
        g.add((person, kg.position, Literal(row["position"])))
        g.add((person, kg.country, Literal(row["country"])))
        g.add((person, kg.parentName, Literal(row["parent_name"])))
        g.add((person, kg.name, Literal(row["name"])))
        g.add((person, kg.gender, Literal(row["gender"])))
        g.add((person, kg.registrationDate, Literal(row["registration_date"], datatype=XSD.date)))
        g.add((person, RDFS.label, Literal(row["name"])))

        # AuthorizedRepresentative instance
        authrep_id = f"AuthRep_{row['registration_number']}_{sanitize_uri(row['name'])}"
        authrep = kg[authrep_id]
        g.add((authrep, RDF.type, kg.AuthorizedRepresentative))
        g.add((authrep, kg.representedBy, person))
        
        # Link company to authorized representative
        company = kg[f"LE_{row['registration_number']}"]
        g.add((authrep, kg.representsEntity, company))
        g.add((company, kg.hasAuthorizedRepresentative, authrep))

g.serialize("authorized_reps.ttl", format="turtle")


<Graph identifier=Nd22ef272246d472fa6937bbe979b34dc (<class 'rdflib.graph.Graph'>)>

In [16]:
import csv
import re
from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDF, XSD, RDFS

def sanitize_uri(s):
    s = s.replace(" ", "_")
    s = re.sub(r'[^a-zA-Z0-9_-]', '', s)
    return s

kg = Namespace("http://example.org/mongolia-kg#")
g = Graph()
g.bind("kg", kg)

with open("ubos.csv", newline="", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for row in reader:
        # Unique Person IRI
        person_id = f"{row['registration_number']}_{sanitize_uri(row['name'])}"
        person = kg[f"Person_{person_id}"]

        # Add basic person info
        g.add((person, RDF.type, kg.Person))
        g.add((person, kg.classification, Literal(row["classification"])))
        g.add((person, kg.parentName, Literal(row["parent_name"])))
        g.add((person, kg.name, Literal(row["name"])))
        g.add((person, kg.country, Literal(row["country"])))
        g.add((person, kg.gender, Literal(row["gender"])))
        g.add((person, RDFS.label, Literal(row["name"])))

        # UBO instance
        ubo_id = f"UBO_{row['registration_number']}_{sanitize_uri(row['name'])}"
        ubo = kg[ubo_id]
        g.add((ubo, RDF.type, kg.BeneficialOwnership))
        g.add((ubo, kg.uboPerson, person))
        
        # Link company to UBO
        company = kg[f"LE_{row['registration_number']}"]
        g.add((ubo, kg.belongsToCompany, company))
        g.add((company, kg.hasUltimateBeneficialOwner, ubo))

g.serialize("ubos.ttl", format="turtle")


<Graph identifier=Nf9e6407daa84441facb5e7b239d18a7e (<class 'rdflib.graph.Graph'>)>

In [17]:
with open("all_kg.ttl", "w", encoding="utf-8") as outfile:
    for fname in ["companies.ttl", "shareholders.ttl", "authorized_reps.ttl", "ubos.ttl"]:
        with open(fname, "r", encoding="utf-8") as infile:
            outfile.write(infile.read())
            outfile.write("\n")


In [1]:
import pandas as pd
from rdflib import Graph, Namespace, Literal, URIRef
from rdflib.namespace import RDF, OWL, XSD
import hashlib

# Namespace matching the diagram
KG = Namespace("http://example.com/mn-legal-kg/")

def safe_xsd_date(date_value):
    if pd.isna(date_value):
        return None
    date_str = str(date_value).strip()
    if not date_str or date_str.lower() in ['nat', '']:
        return None
    try:
        dt = pd.to_datetime(date_str, errors='raise')
        return dt.strftime('%Y-%m-%d')
    except:
        print(f"Warning: Skipped invalid date '{date_str}'")
        return None

def person_uri(father_name, first_name, country="Монгол Улс", gender=""):
    father_name = str(father_name or "").strip()
    first_name = str(first_name or "").strip()
    country = str(country or "Монгол Улс").strip()
    gender = str(gender or "").strip()
    unique_str = f"{father_name}_{first_name}_{country}_{gender}"
    hash_val = hashlib.md5(unique_str.encode('utf-8')).hexdigest()
    return KG[f"person_{hash_val}"]

def entity_uri(reg_num):
    return KG[f"entity_{str(reg_num).strip()}"]

# Load files
company_df = pd.read_excel("company_main_info.xlsx")
shareholders_df = pd.read_excel("shareholders_info.xlsx")
representatives_df = pd.read_excel("reprentatives.xlsx")
beneficials_df = pd.read_excel("beneficials.xlsx")

g = Graph()
g.bind("kg", KG)

# === Declare ALL Classes from the ER Diagram ===
classes = [
    "LegalEntity", "Person", "BusinessActivity", "RestructuringEvent", "Position"
]
for cls in classes:
    g.add((KG[cls], RDF.type, OWL.Class))

# === Declare ALL Object Properties ===
object_props = [
    "hasActivity", "hasRestructuring", "hasShareholder",
    "hasAuthorizedRep", "hasBeneficialOwner", "holdsPosition"
]
for prop in object_props:
    g.add((KG[prop], RDF.type, OWL.ObjectProperty))

# === Declare ALL Data Properties ===
data_props = [
    "registrationNumber", "name", "registrationDate", "legalForm", "entityType",
    "address", "activityDirection", "status", "restructuringType", "previousName",
    "patronymic", "firstName", "gender", "countryName", "positionTitle"
]
for prop in data_props:
    g.add((KG[prop], RDF.type, OWL.DatatypeProperty))

# === Legal Entities ===
for _, row in company_df.iterrows():
    reg_num = str(row["registration_number"]).strip()
    entity = entity_uri(reg_num)
    g.add((entity, RDF.type, KG.LegalEntity))
    g.add((entity, KG.registrationNumber, Literal(reg_num)))
    g.add((entity, KG.name, Literal(str(row["company_name"]).strip())))
    g.add((entity, KG.legalForm, Literal(str(row["legal_form"]).strip())))
    g.add((entity, KG.entityType, Literal(str(row["profit_type"]).strip())))  # maps to entityType

    date_str = safe_xsd_date(row["registration_date"])
    if date_str:
        g.add((entity, KG.registrationDate, Literal(date_str, datatype=XSD.date)))

# === Shareholders ===
for _, row in shareholders_df.iterrows():
    reg_num = str(row["Регистрийн дугаар"]).strip()
    entity = entity_uri(reg_num)
    patronymic = row["Эцэг/эх/-ийн нэр"]
    first_name = row["Нэр"]
    country = row["Улсын нэр"]
    gender = row["Хүйс"]

    person = person_uri(patronymic, first_name, country, gender)

    g.add((person, RDF.type, KG.Person))
    g.add((person, KG.patronymic, Literal(str(patronymic).strip())))
    g.add((person, KG.firstName, Literal(str(first_name).strip())))
    g.add((person, KG.countryName, Literal(str(country).strip())))
    g.add((person, KG.gender, Literal(str(gender).strip())))

    date_str = safe_xsd_date(row["Бүртгэсэн огноо"])
    if date_str:
        g.add((person, KG.registrationDate, Literal(date_str, datatype=XSD.date)))

    g.add((entity, KG.hasShareholder, person))

# === Authorized Representatives ===
for _, row in representatives_df.iterrows():
    reg_num = str(row["Регистрийн дугаар"]).strip()
    entity = entity_uri(reg_num)
    patronymic = row["Эцэг/эх/-ийн нэр"]
    first_name = row["Нэр"]
    country = row["Улсын нэр"]
    gender = row["Хүйс"]
    position_title = row["Албан тушаал"]

    person = person_uri(patronymic, first_name, country, gender)

    g.add((person, RDF.type, KG.Person))
    g.add((person, KG.patronymic, Literal(str(patronymic).strip())))
    g.add((person, KG.firstName, Literal(str(first_name).strip())))
    g.add((person, KG.countryName, Literal(str(country).strip())))
    g.add((person, KG.gender, Literal(str(gender).strip())))

    date_str = safe_xsd_date(row["Бүртгэсэн огноо"])
    if date_str:
        g.add((person, KG.registrationDate, Literal(date_str, datatype=XSD.date)))

    # Create Position individual
    position_id = KG[f"position_{hashlib.md5(position_title.encode()).hexdigest()}"]
    g.add((position_id, RDF.type, KG.Position))
    g.add((position_id, KG.positionTitle, Literal(str(position_title).strip())))

    g.add((person, KG.holdsPosition, position_id))
    g.add((entity, KG.hasAuthorizedRep, person))

# === Beneficial Owners ===
for _, row in beneficials_df.iterrows():
    reg_num = str(row["Регистрийн дугаар"]).strip()
    entity = entity_uri(reg_num)
    patronymic = row["Эцэг/эх/-ийн нэр"]
    first_name = row["Нэр"]
    country = "Монгол Улс" if "Улсын нэр" not in beneficials_df.columns else row.get("Улсын нэр", "Монгол Улс")
    gender = row["Хүйс"]

    person = person_uri(patronymic, first_name, country, gender)

    g.add((person, RDF.type, KG.Person))
    g.add((person, KG.patronymic, Literal(str(patronymic).strip())))
    g.add((person, KG.firstName, Literal(str(first_name).strip())))
    g.add((person, KG.countryName, Literal(str(country).strip())))
    g.add((person, KG.gender, Literal(str(gender).strip())))

    g.add((entity, KG.hasBeneficialOwner, person))

# Save aligned TTL
g.serialize(destination="mn_legal_kg_aligned.ttl", format="turtle", encoding="utf-8")
print("Success! Generated 'mn_legal_kg_aligned.ttl' – fully aligned with your ER diagram.")
print("Now load this into GraphDB for beautiful schema and visual exploration!")

Success! Generated 'mn_legal_kg_aligned.ttl' – fully aligned with your ER diagram.
Now load this into GraphDB for beautiful schema and visual exploration!
