In [9]:
import json

def load_jsonl_to_list(filepath):
    data_list = []
    with open(filepath, 'r') as file:
        for line in file:
            data_list.append(json.loads(line))
    return data_list

# 사용 예시
filepath = 'counterfact_memit.jsonl'
data = load_jsonl_to_list(filepath)

In [12]:
data[0]

{'case_id': 0,
 'pararel_idx': 2796,
 'requested_rewrite': {'prompt': 'The mother tongue of {} is',
  'relation_id': 'P103',
  'target_new': {'str': 'English', 'id': 'Q1860'},
  'target_true': {'str': 'French', 'id': 'Q150'},
  'subject': 'Danielle Darrieux'},
 'paraphrase_prompts': ['Shayna does this and Yossel goes still and dies. Danielle Darrieux, a native',
  'An album was recorded for Capitol Nashville but never released. Danielle Darrieux spoke the language'],
 'neighborhood_prompts': ['The mother tongue of Léon Blum is',
  'The native language of Montesquieu is',
  'François Bayrou, a native',
  'The native language of Raymond Barre is',
  'Michel Rocard is a native speaker of',
  'Jacques Chaban-Delmas is a native speaker of',
  'The native language of François Bayrou is',
  'Maurice Genevoix, speaker of',
  'The mother tongue of François Bayrou is',
  'Melchior de Vogüé, speaker of'],
 'attribute_prompts': ['J.\xa0R.\xa0R. Tolkien is a native speaker of',
  'The mother tongue

In [6]:
import requests

def get_wikidata_label(property_id):
    # SPARQL 쿼리 엔드포인트
    url = "https://query.wikidata.org/sparql"

    # SPARQL 쿼리 (입력받은 property_id에 해당하는 속성의 영어 레이블을 가져옴)
    query = f"""
    SELECT ?label WHERE {{
      wd:{property_id} rdfs:label ?label.
      FILTER(LANG(?label) = "en")
    }}
    """

    # 요청 헤더
    headers = {
        "Accept": "application/sparql-results+json"
    }

    # 요청 전송
    response = requests.get(url, headers=headers, params={'query': query})

    # 결과를 JSON으로 변환
    data = response.json()

    # 영어 레이블 반환
    if data['results']['bindings']:
        return data['results']['bindings'][0]['label']['value']
    else:
        return None

# 예시 사용
property_id = "P103"
label = get_wikidata_label(property_id)
print(f"The English label for {property_id} is: {label}")


The English label for P103 is: native language


In [14]:
data[0]['requested_rewrite']['relation_id']

'P103'

In [23]:
rel_list = []
for i in range(len(data)):
    property_id = data[i]['requested_rewrite']['relation_id']
    rel_list.append(property_id)

In [24]:
rel_list = list(set(rel_list))

print(len(rel_list))

34


In [25]:
import time
new_list = []
for property_id in rel_list:
    label = get_wikidata_label(property_id)
    print(f"The English label for {property_id} is: {label}")
    new_list.append(label)
    time.sleep(0.5)

The English label for P138 is: named after
The English label for P641 is: sport
The English label for P37 is: official language
The English label for P27 is: country of citizenship
The English label for P495 is: country of origin
The English label for P19 is: place of birth
The English label for P413 is: position played on team / speciality
The English label for P101 is: field of work
The English label for P264 is: record label
The English label for P740 is: location of formation
The English label for P39 is: position held
The English label for P159 is: headquarters location
The English label for P449 is: original broadcaster
The English label for P364 is: original language of film or TV show
The English label for P30 is: continent
The English label for P463 is: member of
The English label for P1412 is: languages spoken, written or signed
The English label for P140 is: religion or worldview
The English label for P127 is: owned by
The English label for P131 is: located in the administra

In [27]:
new_list

['named after',
 'sport',
 'official language',
 'country of citizenship',
 'country of origin',
 'place of birth',
 'position played on team / speciality',
 'field of work',
 'record label',
 'location of formation',
 'position held',
 'headquarters location',
 'original broadcaster',
 'original language of film or TV show',
 'continent',
 'member of',
 'languages spoken, written or signed',
 'religion or worldview',
 'owned by',
 'located in the administrative territorial entity',
 'employer',
 'genre',
 'capital',
 'manufacturer',
 'place of death',
 'location',
 'twinned administrative body',
 'work location',
 'native language',
 'occupation',
 'instrument',
 'language of work or name',
 'developer',
 'country']

In [30]:
for i in rel_list:
    print(i, end = ',')

P138,P641,P37,P27,P495,P19,P413,P101,P264,P740,P39,P159,P449,P364,P30,P463,P1412,P140,P127,P131,P108,P136,P36,P176,P20,P276,P190,P937,P103,P106,P1303,P407,P178,P17,

In [31]:
import json

def save_list_to_jsonl(data_list, filepath):
    with open(filepath, 'w') as file:
        for item in data_list:
            file.write(json.dumps(item) + '\n')

# 사용 예시

filepath = 'rel_list.jsonl'
save_list_to_jsonl(rel_list, filepath)

In [35]:
import requests

def get_wikidata_labels(entity_id):
    # SPARQL 쿼리 엔드포인트
    url = "https://query.wikidata.org/sparql"

    # SPARQL 쿼리 (입력받은 entity_id에 해당하는 값들의 영어 레이블을 가져옴)
    query = f"""
    SELECT DISTINCT ?valueLabel WHERE {{
      wd:{entity_id} ?relation ?value.
      VALUES ?relation {{
        wdt:P138 wdt:P641 wdt:P37 wdt:P27 wdt:P495 wdt:P19 wdt:P413 wdt:P101
        wdt:P264 wdt:P740 wdt:P39 wdt:P159 wdt:P449 wdt:P364 wdt:P30 wdt:P463
        wdt:P1412 wdt:P140 wdt:P127 wdt:P131 wdt:P108 wdt:P136 wdt:P36 wdt:P176
        wdt:P20 wdt:P276 wdt:P190 wdt:P937 wdt:P103 wdt:P106 wdt:P1303 wdt:P407
        wdt:P178 wdt:P17
      }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
    }}
    LIMIT 10
    """

    # 요청 헤더
    headers = {
        "Accept": "application/sparql-results+json"
    }

    # 요청 전송
    response = requests.get(url, headers=headers, params={'query': query})

    # 결과를 JSON으로 변환
    data = response.json()

    # 영어 레이블 반환
    if data['results']['bindings']:
        return [result['valueLabel']['value'] for result in data['results']['bindings']]
    else:
        return []

# 예시 사용
entity_id = "Q265852"
labels = get_wikidata_labels(entity_id)
print(f"The English labels for {entity_id} are: {labels}")


The English labels for Q265852 are: ['English', 'computer scientist', 'businessperson', 'Apple', 'manager', 'engineer', 'Mobile', 'chief executive officer', 'presenter', 'United States of America']


In [37]:
import requests

def search_entity_id(search_string):
    # SPARQL 쿼리 엔드포인트
    url = "https://query.wikidata.org/sparql"

    # SPARQL 검색 쿼리 (입력받은 search_string에 해당하는 엔티티의 ID를 찾음)
    query = f"""
    SELECT ?item WHERE {{
      ?item rdfs:label "{search_string}"@en.
    }}
    LIMIT 1
    """

    # 요청 헤더
    headers = {
        "Accept": "application/sparql-results+json"
    }

    # 요청 전송
    response = requests.get(url, headers=headers, params={'query': query})

    # 결과를 JSON으로 변환
    data = response.json()

    # 첫 번째 결과에서 엔티티 ID 반환
    if data['results']['bindings']:
        return data['results']['bindings'][0]['item']['value'].split('/')[-1]
    else:
        return None

def get_wikidata_labels(entity_id):
    # SPARQL 쿼리 엔드포인트
    url = "https://query.wikidata.org/sparql"

    # SPARQL 쿼리 (입력받은 entity_id에 해당하는 값들의 영어 레이블을 가져옴)
    query = f"""
    SELECT DISTINCT ?valueLabel WHERE {{
      wd:{entity_id} ?relation ?value.
      VALUES ?relation {{
        wdt:P138 wdt:P641 wdt:P37 wdt:P27 wdt:P495 wdt:P19 wdt:P413 wdt:P101
        wdt:P264 wdt:P740 wdt:P39 wdt:P159 wdt:P449 wdt:P364 wdt:P30 wdt:P463
        wdt:P1412 wdt:P140 wdt:P127 wdt:P131 wdt:P108 wdt:P136 wdt:P36 wdt:P176
        wdt:P20 wdt:P276 wdt:P190 wdt:P937 wdt:P103 wdt:P106 wdt:P1303 wdt:P407
        wdt:P178 wdt:P17
      }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
    }}
    LIMIT 10
    """

    # 요청 헤더
    headers = {
        "Accept": "application/sparql-results+json"
    }

    # 요청 전송
    response = requests.get(url, headers=headers, params={'query': query})

    # 결과를 JSON으로 변환
    data = response.json()

    # 영어 레이블 반환
    if data['results']['bindings']:
        return [result['valueLabel']['value'] for result in data['results']['bindings']]
    else:
        return []

def main(search_string):
    entity_id = search_entity_id(search_string)
    if entity_id:
        labels = get_wikidata_labels(entity_id)
        print(f"The English labels for entity ID {entity_id} are: {labels}")
    else:
        print(f"No entity found for search string '{search_string}'.")

# 예시 사용
search_string = "Tim Cook"
main(search_string)


The English labels for entity ID Q265852 are: ['English', 'computer scientist', 'businessperson', 'Apple', 'manager', 'engineer', 'Mobile', 'chief executive officer', 'presenter', 'United States of America']
