In [2]:
#%pip install -q pandas matplotlib seaborn scipy scikit-learn
from collections import Counter, defaultdict
import pickle
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


'''
This script fetches wikidata/wikipedia information for all the extracted entities.
"TPT/Entities" structures are created with the script find_time_series_entities.py
'''


'\nThis script fetches wikidata/wikipedia information for all the extracted entities.\n"TPT/Entities" structures are created with the script find_time_series_entities.py\n'

In [4]:
ENTITY_ROOT = "TPT/Entities"

# Step 1: Traverse all entity subfolders
grouped_entities = defaultdict(list)

for root, dirs, files in os.walk(ENTITY_ROOT):
    for file in files:
        if file.endswith(".pkl"):
            file_path = os.path.join(root, file)
            period_year = os.path.basename(root).replace("_txts", "")  # e.g. d27-y3
            try:
                with open(file_path, "rb") as f:
                    entities = pickle.load(f)
                    grouped_entities[period_year].extend(entities)
            except Exception as e:
                print(f"❌ Failed to load {file_path}: {e}")

In [5]:
from collections import defaultdict

entity_counts = {}
total_entities = 0
for period, entities in grouped_entities.items():
    entity_counts[period] = len(entities)
    total_entities += len(entities)

print(f"Total entities: {total_entities}")

# Sort periods in chronological order
sorted_periods = sorted(entity_counts.keys())
counts = [entity_counts[period] for period in sorted_periods]

Total entities: 314753


In [6]:
# Helper to extract (dönem, yıl) as sortable integers
def parse_period_year(name):
    match = re.match(r'd(\d+)-y(\d+)', name)
    if match:
        return int(match.group(1)), int(match.group(2))
    return float("inf"), float("inf")  # push malformed folders to the end

In [7]:
# Helper to recombine subwords
def recombine_subwords(entities):
    merged = []
    current = None

    for ent in entities:
        word = ent["word"]

        if word.startswith("##") and current:
            current["word"] += word[2:]
            current["end"] = ent["end"]
        else:
            if current:
                merged.append(current)
            current = ent.copy()

    if current:
        merged.append(current)

    return merged

In [8]:
# Step 2: Sort keys chronologically (dönem then yıl)
sorted_keys = sorted(grouped_entities.keys(), key=parse_period_year)
recombined_entities = {}
for group in sorted_keys:
    raw_entities = grouped_entities[group]
    merged_entities = recombine_subwords(raw_entities)
    recombined_entities[group] = merged_entities


In [9]:
from __future__ import annotations
import os, re, json, pickle, hashlib, unicodedata
from collections import Counter
from typing import Dict, List

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from rapidfuzz import process, fuzz
from scipy import sparse
from sklearn.feature_extraction.text import TfidfTransformer
import matplotlib.pyplot as plt

# 1. Normalize entity keys
import string

def normalise_entity_key(ent) -> str | None:
    if isinstance(ent, dict):
        word = ent.get('word', '').strip()
        group = ent.get('entity_group', '').strip()
    elif isinstance(ent, (list, tuple)) and len(ent) == 2:
        word, group = ent[0].strip(), ''
    else:
        word, group = str(ent).strip(), ''

    # Remove all punctuation
    word = word.translate(str.maketrans('', '', string.punctuation))

    # Filter out empty or too-short strings
    if len(word) < 2:
        return None

    return f"{word}-{group}" if group else word


ORG_ALIASES = {
    "TBMM": "TBMM",
    "Türkiye Büyük Millet Meclisi": "TBMM",
    "TÜRKİYE BÜYÜK MİLLET MECLİSİ": "TBMM",
    "Büyük Millet Meclisi": "TBMM",
    "BMM": "TBMM",
    "T B M M": "TBMM",
    "T. B. M. M.": "TBMM",
    "T.B.M.M.": "TBMM",
    "T.B.M.M": "TBMM",
    "TÜRKİYE BÜYÜK MİLLET MECLİSİ’NDE": "TBMM",
    "TBMM’de": "TBMM",
    "TBMM'DE": "TBMM",
    "TBMM'de": "TBMM",
    "tbmm": "TBMM",
    "tbmm’de": "TBMM",
    "Türkiye Büyük Millet Meclisi’nde": "TBMM",
    "TÜRKİYE BÜYÜK MİLLET MECLİSİ'NDE": "TBMM",
    "Türkiye B. M. Meclisi": "TBMM", 
    "T.Büyük Millet Meclisi": "TBMM", 
    "Tiirkiye" : "Türkiye",
    "Tıirkiye": "Türkiye"

}
HAND_CANONICALS = set(ORG_ALIASES.values())

def _clean(s: str) -> str:
    return re.sub(r"\W+", "", unicodedata.normalize("NFKD", s).lower())

def choose_canonical(cluster: List[str], freq: Dict[str, int] | None = None) -> str:
    for v in cluster:                       # 1) handcrafted wins
        if v in HAND_CANONICALS:
            return v
    if freq:                                # 2) most frequent spelling
        best = max(cluster, key=lambda x: freq.get(x, 0))
        if freq.get(best, 0) > 1:
            return best
    return min(cluster, key=lambda x: (_clean(x), len(_clean(x))))  # 3) shortest


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
"""
RapidFuzz aliasing
--------------------------------------------
* parallel fuzzy clustering (RapidFuzz cdist + C++ threads)
* cache alias_map.pkl

needs 
grouped_entities   # dict[bucket, list[NER‑dict]]
sorted_keys        # same keys, sorted
recombine_subwords # fn(tokens) -> list[NER‑dict]

"""


from rapidfuzz import process, fuzz
import numpy as np

def build_alias_map_fast(
    names: list[str],
    *,
    threshold: int = 90,
    min_len: int = 3,
    freq: dict[str, int] | None = None,
    cache_path: str = "alias_map.pkl",
    workers: int = -1,
    desc: str = "Alias clustering"
) -> dict[str, str]:
    
    names = sorted(set(names))
    sig = hashlib.md5(json.dumps(names).encode()).hexdigest()

    if os.path.exists(cache_path):
        with open(cache_path, "rb") as f:
            cached_sig, alias_map = pickle.load(f)
        if cached_sig == sig:
            print("✔ alias_map loaded from cache.")
            return alias_map

    clusters: list[list[str]] = []
    canon_keys: list[str] = []          

    for name in tqdm(names, desc=desc, unit="name"):

        key = _clean(name) 

        if len(key) < min_len:
            clusters.append([name])
            continue

        if canon_keys:
            # 1×J matrix of similarity scores (float32)
            scores = process.cdist(
                [key], canon_keys,
                scorer=fuzz.token_set_ratio,
                workers=workers
            )
            best_idx = int(np.argmax(scores))
            best_score = float(scores[0, best_idx])
        else:
            best_score = -1  # forces new cluster

        if best_score < threshold:
            canon_keys.append(key)
            clusters.append([name])
        else:
            clusters[best_idx].append(name)


    alias_map: dict[str, str] = {}
    for cl in clusters:
        canon = ORG_ALIASES.get(cl[0], choose_canonical(cl, freq))
        for v in cl:
            alias_map[v] = canon

    with open(cache_path, "wb") as f:
        pickle.dump((sig, alias_map), f)

    return alias_map


In [12]:
if __name__ == "__main__":


    # 5.1 gather every raw entity string (filter out nonsense)
    all_entities = [
        ent_key for bucket in grouped_entities.values()
        for e in recombine_subwords(bucket)
        if (ent_key := normalise_entity_key(e)) is not None
    ]

    freq = Counter(all_entities)


    # 5.2 build (or load) alias map
    alias_map = build_alias_map_fast(
        all_entities,
        threshold=90,
        freq=freq,
        workers=-1        
    )

    entity_counts_by_group = {}
    for bucket_key in sorted_keys:
        merged = recombine_subwords(grouped_entities[bucket_key])
        counts = Counter(
            alias_map.get(normalise_entity_key(e), normalise_entity_key(e))
            for e in merged
            if normalise_entity_key(e) is not None 
        )
        entity_counts_by_group[bucket_key] = dict(counts)

Alias clustering: 100%|██████████| 36231/36231 [09:00<00:00, 67.04name/s] 


In [13]:
entity_counts_by_group

{'d01-y1': {' B M M-ORG': 71,
  'Tokad-LOC': 13,
  'Nazım Ba-PER': 1,
  'Trabzon-LOC': 39,
  'Hamid-PER': 2,
  'Elâzizbataları Tetkik Encümeni-LOC': 1,
  'Ankara-LOC': 47,
  'Mustafa Kemal Paşa-PER': 10,
  'Afyonkarahisar-LOC': 2,
  'Mehmed Şükrü-PER': 12,
  'Bo-LOC': 1,
  'lu-LOC': 2,
  'Nuri-PER': 5,
  'Çorum-LOC': 31,
  'Dursun-PER': 7,
  'Feridrum-PER': 1,
  'Erzurum-LOC': 51,
  'Celşleddin Arif-PER': 2,
  'İcra-ORG': 3,
  'Çubukabat-LOC': 1,
  'Zir-LOC': 1,
  'Hüsrev-PER': 13,
  'İçel-LOC': 10,
  'Ali-PER': 4,
  'Anamur-LOC': 2,
  'Bolu-LOC': 43,
  'Hilmi-PER': 4,
  'Ereğli-LOC': 5,
  'Ali Sabri E-PER': 1,
  'Biga-LOC': 10,
  'Büyük-ORG': 1,
  'Karahisarı Şarki-LOC': 13,
  'Mustafa-PER': 44,
  'Fuad-PER': 9,
  'Meclis-ORG': 19,
  'Maraş-LOC': 12,
  'Yakup Harndi-PER': 1,
  'Suvas-LOC': 1,
  'Hayıi-PER': 1,
  'Divanı Riyaset-ORG': 31,
  'Hamjtngın-PER': 1,
  'Musta-PER': 1,
  'Tokat-LOC': 8,
  'İzzet-PER': 5,
  'Kângın-LOC': 1,
  'Neşet-PER': 4,
  'Urfa-LOC': 3,
  'Nusret-PER': 2,


In [14]:
# form a df entity name, total count
entity_counts_df = pd.DataFrame.from_dict(
    entity_counts_by_group, orient="index"
).fillna(0).astype(int)
entity_counts_df = entity_counts_df.T
entity_counts_df

Unnamed: 0,d01-y1,d01-y2,d01-y3,d01-y4,d02-y1,d02-y2,d02-y3,d02-y4,d02-y5,d03-y1,...,d27-y2,d27-y3,d27-y4,d27-y5,d28-y1,d23-y1,d24-y1,d25-y1,d25-y2,d27-y1
B M M-ORG,71,69,70,3,18,23,45,64,40,47,...,0,0,0,0,0,0,0,0,0,0
Tokad-LOC,13,17,19,8,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Nazım Ba-PER,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Trabzon-LOC,39,14,23,8,15,19,18,6,3,8,...,13,5,9,9,2,0,0,0,0,0
Hamid-PER,2,4,5,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Mehmet Tahtasız-PER,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
Barış Bektaş-PER,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
Ekrem Karakaya-PER,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
Eylem Ertuğ Ertuğrul-PER,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [15]:
# alltime counts
alltime_counts = entity_counts_df.sum(axis=1).sort_values(ascending=False)
alltime_counts

İstanbul-LOC                        5702
 B M M-ORG                          5164
Türkiye Büyük Millet Meclisi-ORG    4325
TBMM-ORG                            3759
Ankara-LOC                          3578
                                    ... 
Ahmet YeşÜtoağ-PER                     1
Şabanoğlu İsmail Eski-PER              1
Eemzi Ko çak-PER                       1
Bola-LOC                               1
Mehmet Kamaç-PER                       1
Length: 23998, dtype: int64

In [16]:
# turn into df 
alltime_counts_df = pd.DataFrame(alltime_counts).reset_index()
alltime_counts_df.columns = ["entity", "count"]
alltime_counts_df
df = alltime_counts_df.copy()

### data fetching

In [17]:
# Ontology

df_sorted = df.sort_values("count", ascending=False)
df_sorted.head(30)


Unnamed: 0,entity,count
0,İstanbul-LOC,5702
1,B M M-ORG,5164
2,Türkiye Büyük Millet Meclisi-ORG,4325
3,TBMM-ORG,3759
4,Ankara-LOC,3578
5,Meclis-ORG,3202
6,İzmir-LOC,2683
7,Türkiye-LOC,2299
8,Konya-LOC,1706
9,Bursa-LOC,1559


In [18]:
# %pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import time
from SPARQLWrapper import SPARQLWrapper, JSON
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, FOAF


sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.addCustomHttpHeader("User-Agent", "TurkishParliamentBot/1.0 (irem.yildirim1@std.bogazici.edu.tr)")

# Split by type
df_per = df_sorted[df_sorted["entity"].str.endswith("-PER")]
df_loc = df_sorted[df_sorted["entity"].str.endswith("-LOC")]
df_org = df_sorted[df_sorted["entity"].str.endswith("-ORG")]

# Order by tfidf
df_per = df_per.sort_values("count", ascending=False)
df_loc = df_loc.sort_values("count", ascending=False)
df_org = df_org.sort_values("count", ascending=False)


In [19]:
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# Person Query Function
def query_person_wikidata(name):
    query = f"""
    SELECT ?person ?personLabel ?dob ?representsLabel ?memberOfLabel WHERE {{
      ?person rdfs:label "{name}"@tr .
      ?person wdt:P31 wd:Q5 .  # instance of human

      OPTIONAL {{ ?person wdt:P569 ?dob. }}                    # birthDate
      OPTIONAL {{ ?person wdt:P768 ?represents. }}            # represents (constituency)
      OPTIONAL {{ ?person wdt:P102 ?memberOf. }}              # memberOf (political party)

      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "tr,en". }}
    }} LIMIT 1
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

# Location Query Function
def query_location(name):
    query = f"""
    SELECT ?place ?placeLabel ?countryLabel ?coordinate ?population ?municipalityLabel WHERE {{
      ?place rdfs:label "{name}"@tr .

      OPTIONAL {{ ?place wdt:P17 ?country. }}                 # country
      OPTIONAL {{ ?place wdt:P625 ?coordinate. }}            # coordinate
      OPTIONAL {{ ?place wdt:P1082 ?population. }}           # population
      OPTIONAL {{ ?place wdt:P131 ?municipality. }}          # administrative parent

      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "tr,en". }}
    }} LIMIT 1
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

# Organization Query Function
def query_organization(name):
    query = f"""
    SELECT ?org ?orgLabel ?foundation ?orgTypeLabel WHERE {{
      ?org rdfs:label "{name}"@tr .

      OPTIONAL {{ ?org wdt:P571 ?foundation. }}              # foundation date
      OPTIONAL {{ ?org wdt:P31 ?orgType. }}                  # type (e.g., political party, institution)

      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "tr,en". }}
    }} LIMIT 1
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


In [20]:
enriched_data = []

# Normalize entity names (remove -PER tag)
df_per["entity"] = df_per["entity"].str.replace("-PER", "", regex=False)

for name in df_per["entity"].head(50):  # Adjust range as needed
    print(name)
    try:
        result = query_person_wikidata(name)
        bindings = result["results"]["bindings"]
        
        if bindings:
            row = bindings[0]
            enriched_data.append({
                "name": name,
                "birthDate": row.get("dob", {}).get("value"),
                "represents": row.get("representsLabel", {}).get("value"),
                "memberOf": row.get("memberOfLabel", {}).get("value"),
                "wikidataID": row.get("person", {}).get("value"),
            })
        else:
            enriched_data.append({
                "name": name,
                "birthDate": None,
                "represents": None,
                "memberOf": None,
                "wikidataID": None,
            })
    
    except Exception as e:
        print(f"Error for {name}: {e}")
        enriched_data.append({
            "name": name,
            "birthDate": None,
            "represents": None,
            "memberOf": None,
            "wikidataID": None,
        })

df_enriched = pd.DataFrame(enriched_data)


 Mesut Yılmaz
Recep Tayyip Erdoğan
Mustafa
Atatürk
Ömer Fethi Gürer
Mehmet Şandır
Cemil Çiçek
Bülent Ecevit
Hasan
Mehmet Ali Şahin
REİS
TU
Turgut özal
Tansu Çiller
Levent Gök
Mehmet Mehdi Eker
Refet
Ali
Hikmet Çetin
Bülent Akarcalı
Durmuş Fikri Sağlar
Mehmet Akif Hamzaçebi
Yıldırım Akbulut
Mehmet Daniş
Ahmet Ersin
Abdülkadir Aksu
Işılay Saygın
İbrahim Köşdere
Ali Cumhur Taşkın
Süleyman Demirel
Binali Yıldırım
Oktay Vural
Abdullah Gül
Necmettin Cevheri
Veysel Eroğlu
Sinan Tekelioğlu
Kamer Genc
Reşat Doğru
Cemil
Kâzım
Rifat Serdaroğlu
M M
Erkan Aydın
 Kemal Anadol
Ahmet Aydın
Özgür Özel
Cüneyt Canver
Hasan Gemici
Mahmut Tanal
Hasan Fehmi


In [21]:
import requests
import re
import time

def get_electoral_district_from_wikipedia(name):
    S = requests.Session()
    url = "https://tr.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "prop": "extracts",
        "titles": name,
        "explaintext": True,
        "redirects": 1
    }

    try:
        response = S.get(url=url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        page = next(iter(data["query"]["pages"].values()))

        if "extract" not in page:
            return None

        text = page["extract"]

        # Improved patterns
        patterns = [
            r"\b([A-ZÇĞİÖŞÜ][a-zçğıöşü]+)\s+milletvekili\b",
            r"\b\d{1,2}\.\s+Dönem\s+([A-ZÇĞİÖŞÜ][a-zçğıöşü]+)\s+milletvekili\b",
            r"\bTBMM.*?\b([A-ZÇĞİÖŞÜ][a-zçğıöşü]+)\s+milletvekili\b"
        ]

        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                return match.group(1).strip()

    except Exception as e:
        print(f"Error fetching district for {name}: {e}")

    return None


# Apply district enrichment
df_enriched["represents"] = df_enriched.get("represents", None)

for i, row in df_enriched.iterrows():
    if not row["represents"]:
        name = row["name"]
        location = get_electoral_district_from_wikipedia(name)
        print(f"Name: {name}, Represents: {location}")
        df_enriched.at[i, "represents"] = location
        time.sleep(1)  # be nice to Wikipedia


Name:  Mesut Yılmaz, Represents: Rize
Name: Recep Tayyip Erdoğan, Represents: Siirt
Name: Mustafa, Represents: None
Name: Atatürk, Represents: Ardahan
Name: Ömer Fethi Gürer, Represents: Niğde
Name: Mehmet Şandır, Represents: None
Name: Cemil Çiçek, Represents: Yozgat
Name: Bülent Ecevit, Represents: Zonguldak
Name: Hasan, Represents: None
Name: Mehmet Ali Şahin, Represents: İstanbul
Name: REİS, Represents: None
Name: TU, Represents: None
Name: Turgut özal, Represents: None
Name: Tansu Çiller, Represents: İstanbul
Name: Levent Gök, Represents: Ankara
Name: Mehmet Mehdi Eker, Represents: None
Name: Refet, Represents: None
Name: Ali, Represents: None
Name: Hikmet Çetin, Represents: İstanbul
Name: Bülent Akarcalı, Represents: None
Name: Durmuş Fikri Sağlar, Represents: Mersin
Name: Mehmet Akif Hamzaçebi, Represents: Trabzon
Name: Yıldırım Akbulut, Represents: Erzincan
Name: Mehmet Daniş, Represents: None
Name: Ahmet Ersin, Represents: None
Name: Abdülkadir Aksu, Represents: Diyarbakır
Nam

In [22]:
def extract_qid_from_sparql_response(response):
    """
    Extracts Q-ID from SPARQL response JSON.
    """
    try:
        results = response['results']['bindings']
        if results:
            uri = results[0]['org']['value']
            qid = uri.split('/')[-1]
            return qid
    except Exception as e:
        print(f"Error extracting QID: {e}")
    return None

In [24]:
# Get unique organization names
orgs = df_enriched["memberOf"].dropna().unique()
org_name_to_qid = {}

for org in orgs:
    try:
        response = query_organization(org)
        qid = extract_qid_from_sparql_response(response)
        print(f"{org} → {qid}")
        org_name_to_qid[org] = qid
        time.sleep(1)  # Respectful scraping
    except Exception as e:
        print(f"Failed to fetch QID for {org}: {e}")
        org_name_to_qid[org] = None

Adalet ve Kalkınma Partisi → Q19077
Cumhuriyet Halk Partisi → Q19079
Milliyetçi Hareket Partisi → Q251077
Doğru Yol Partisi → Q6061183
Anavatan Partisi → Q488511
Adalet Partisi → Q348125


In [26]:
df_enriched["organization_qid"] = df_enriched["memberOf"].map(org_name_to_qid)

In [27]:
df_enriched

Unnamed: 0,name,birthDate,represents,memberOf,wikidataID,organization_qid
0,Mesut Yılmaz,,Rize,,,
1,Recep Tayyip Erdoğan,1954-02-26T00:00:00Z,Siirt,Adalet ve Kalkınma Partisi,http://www.wikidata.org/entity/Q39259,Q19077
2,Mustafa,,,,,
3,Atatürk,,Ardahan,,,
4,Ömer Fethi Gürer,1957-01-01T00:00:00Z,Niğde,Cumhuriyet Halk Partisi,http://www.wikidata.org/entity/Q31188503,Q19079
5,Mehmet Şandır,1947-01-01T00:00:00Z,,Milliyetçi Hareket Partisi,http://www.wikidata.org/entity/Q6042625,Q251077
6,Cemil Çiçek,1946-11-04T00:00:00Z,Yozgat,Adalet ve Kalkınma Partisi,http://www.wikidata.org/entity/Q202971,Q19077
7,Bülent Ecevit,1925-05-28T00:00:00Z,Zonguldak,Cumhuriyet Halk Partisi,http://www.wikidata.org/entity/Q192634,Q19079
8,Hasan,1423-01-01T00:00:00Z,,,http://www.wikidata.org/entity/Q374468,
9,Mehmet Ali Şahin,1950-09-16T00:00:00Z,İstanbul,Adalet ve Kalkınma Partisi,http://www.wikidata.org/entity/Q1396517,Q19077


In [37]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, FOAF

# Create graph and namespaces
g = Graph()
EX = Namespace("http://example.org/mp/")
LOC = Namespace("http://example.org/locations/")
ORG = Namespace("http://example.org/organizations/")

g.bind("ex", EX)
g.bind("foaf", FOAF)

# Loop through enriched MP data
for _, row in df_enriched.iterrows():
    mp_uri = URIRef(EX[row["name"].replace(" ", "_")])
    g.add((mp_uri, RDF.type, FOAF.Person))

    if row["birthDate"]:
        g.add((mp_uri, EX.birthDate, Literal(row["birthDate"])))
    
    if row["represents"]:
        loc_uri = URIRef(LOC[row["represents"].replace(" ", "_")])
        g.add((mp_uri, EX.represents, loc_uri))

    if row["memberOf"]:
        org_uri = URIRef(ORG[row["memberOf"].replace(" ", "_")])
        g.add((mp_uri, EX.memberOf, org_uri))

    if row["wikidataID"]:
        g.add((mp_uri, EX.wikidataID, Literal(row["wikidataID"])))

g.serialize("turkish_mps.ttl", format="turtle")


<Graph identifier=N4c685c5e8a244923a6d12cde739dcf58 (<class 'rdflib.graph.Graph'>)>