In [18]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import pathlib
import json
import csv

In [19]:
data_path = pathlib.Path("json_wikidata")
# lister les fichiers dans le dossier
json_files = [f for f in listdir(data_path) if isfile(data_path / f)]

In [20]:
def get_label(json_data,lang='en'):
    try:
        label = json_data['labels'][lang]['value']
    except:
        label = ''
    return label

In [21]:
def get_alias_list(json_data,lang='en'):
    alias_list = []
    try:
        aliases = json_data['aliases'][lang]
        for alias in aliases:
            alias_list.append(alias['value'])
    except:
        pass
    return alias_list

In [22]:
def get_chembl_id(json_data):
    try:
        chembl_id = json_data['claims']['P592'][0]['mainsnak']['datavalue']['value']
    except:
        chembl_id = ''
    return chembl_id

In [23]:
def get_chebi_id(json_data):
    try:
        chebi_id = json_data['claims']['P683'][0]['mainsnak']['datavalue']['value']
    except:
        chebi_id = ''
    return chebi_id

In [24]:
def get_atc_code(json_data):
    try:
        atc_code = json_data['claims']['P267'][0]['mainsnak']['datavalue']['value']
    except:
        atc_code = ''
    return atc_code

In [25]:
def get_rxnorm(json_data):
    try:
        rxnorm = json_data['claims']['P3345'][0]['mainsnak']['datavalue']['value']
    except:
        rxnorm = ''
    return rxnorm

In [26]:
def get_sitelink(json_data,site):
    try:
        sitelink=json_data['sitelinks'][site]['url']
    except:
        sitelink=''
    return sitelink

In [27]:
def get_tallman_name(json_data):
    try:
        tallman_name = json_data['claims']['P9989'][0]['mainsnak']['datavalue']['value']
    except:
        tallman_name = ''
    return tallman_name

In [28]:
all_dict_wikidata = []

In [29]:
for json_file in json_files:
    with open(data_path / json_file) as f:
        json_data = json.load(f)
        dict_langdata = {
            'id': json_data['id'],
            'label_uk' : get_label(json_data,'uk'),
            'label_ru' : get_label(json_data,'ru'),
            'label_fr' : get_label(json_data,'fr'),
            'label_en' : get_label(json_data,'en'),
            'alias_list_uk' : get_alias_list(json_data,'uk'),
            'alias_list_ru' : get_alias_list(json_data,'ru'),
            'alias_list_fr' : get_alias_list(json_data,'fr'),
            'alias_list_en' : get_alias_list(json_data,'en'),
            'chembl_id' : get_chembl_id(json_data),
            'chebi_id': get_chebi_id(json_data),
            'atc_code': get_atc_code(json_data),
            'rxnorm': get_rxnorm(json_data),
            'ukwiki_sitelink' : get_sitelink(json_data,'ukwiki'),
            'frwiki_sitelink' : get_sitelink(json_data,'frwiki'),
            'ruwiki_sitelink' : get_sitelink(json_data,'ruwiki'),
            'enwiki_sitelink' : get_sitelink(json_data,'enwiki'),
            'tallman_name' : get_tallman_name(json_data)
        }
        all_dict_wikidata.append(dict_langdata)

In [30]:
df_all_dict_wikidata = pd.DataFrame(all_dict_wikidata)

In [31]:
df_all_dict_wikidata.to_csv("wikidata_names_2024-05-08.csv",index=False,quoting=csv.QUOTE_ALL)