## Analysis of geographical information on the Repertoire


### Setup

In [2]:

from timelink.notebooks import TimelinkNotebook

tlnb = TimelinkNotebook()
tlnb.print_info()



Timelink version: 1.1.25
Project name: dehergne-repertoire
Project home: /Users/jrc/develop/dehergne-repertoire
Database type: sqlite
Database name: dehergne_repertoire
Kleio image: timelinkserver/kleio-server
Kleio server token: CH4xG...
Kleio server URL: http://127.0.0.1:8088
Kleio server home: /Users/jrc/develop/dehergne-repertoire
Kleio server container: stoic_joliot
Kleio version requested: latest
Kleio server version: 12.7.579 (2025-01-29 17:45:15)
SQLite directory: /Users/jrc/develop/dehergne-repertoire/database/sqlite
Database version: 6ccf1ef385a6
Call print_info(show_token=True) to show the Kleio Server token
Call print_info(show_password=True) to show the Postgres password
TimelinkNotebook(project_name=dehergne-repertoire, project_home=/Users/jrc/develop/dehergne-repertoire, db_type=sqlite, db_name=dehergne_repertoire, kleio_image=timelinkserver/kleio-server, kleio_version=latest, postgres_image=postgres, postgres_version=latest)


## The residences of the Jesuits in China
The Repertoir includes two lists of Jesuits residences in China.

One reports to 1644 [Planche: Carte des Chrétientés Chinoises de la fin des Ming (1644)](../sources/dehergne-locations-1644.cli)
and the second to 1701  [VII. Carte des résidences de Chine en l'année 1701](../sources/dehergne-locations-1701.cli).


In [3]:
import re

def get_wikidata_id(geo_entity, if_missing=''):
    """ Check the obs field for wikidata links

    Returns a tuple of the cleaned comment and the wikidata id"""
    extra_info = geo_entity.extra_info
    name_comment = extra_info.get('name', {}).get('comment','')
    name_original = extra_info.get('name', {}).get('original','')

    pattern = r'@wikidata\:\s*(Q[0-9]*)'
    wikidata_in_comment = re.findall(pattern, name_comment)
    comment_without_wikidata = re.sub(pattern, '', name_comment)
    # Sometimes the wikidata id is in the original name
    wikidata_in_original = re.findall(pattern, name_original)
    original_without_wikidata = re.sub(pattern, '', name_original)
    return comment_without_wikidata + original_without_wikidata, wikidata_in_comment[0] if wikidata_in_comment else wikidata_in_original[0] if wikidata_in_original else if_missing



## List of residences in 1644

In [4]:
import pandas as pd
from sqlalchemy import select

geo1, geo2, geo3 = tlnb.db.get_model(['geo1','geo2','geo3'])
stmt = select(geo1).where(geo1.inside == 'deh-chre-1644')

place_list = []

with tlnb.db.session() as session:
    result = session.execute(stmt).fetchall()
    for province, in result:
        comment, wikidata = get_wikidata_id(province, if_missing='No wikidata')
        print(province.id, province.name, wikidata, comment)
        place_list.append({'province': province.name,'fou':'',  'name': province.name, 'wikidata': wikidata, 'comment': comment})
        fous = session.execute(select(geo2).where(geo2.inside == province.id)).fetchall()
        for fou, in fous:
            comment, wikidata = get_wikidata_id(fou, if_missing='No wikidata')
            print(' ', fou.name,  wikidata, comment)
            place_list.append({'province': province.name, 'fou':fou.name,  'name': fou.name, 'wikidata': wikidata, 'comment': comment})
            geo3s = session.execute(select(geo3).where(geo3.inside == fou.id)).fetchall()
            for tcheou_hien, in geo3s:
                comment, wikidata = get_wikidata_id(tcheou_hien, if_missing='No wikidata')
                print('   ', tcheou_hien.name, wikidata,comment,  )
                place_list.append({'province': province.name, 'fou':fou.name,  'name': tcheou_hien.name, 'wikidata': wikidata, 'comment': comment})


deh-r1644-chekiang Chekiang Q16967 Tche-kiang, hoje:Zhejiang, 浙江,  @dehergne:396
  Hangchou Q4970 Hang-tcheou, hoje: Hangzhou, 杭州, 
    Fuyang Q1011103 Fou-yang, hoje:Fuyang, 富阳, 
    Jenho No wikidata Jen-houo, hoje: Renhe, 仁和县, Historical county name. coordinate: 30.448897N, 120.307504E
  Chüchow Q58235 K'iu-tcheou, hoje:Quzhou, 衢州, , in the Chinese translation it is recognized as “遂州”, which is wrong, both phonetically and geographically. In Dehergne(1957), it is noted as "衢州".
  Huchow Q42664 Hou-tcheou, hoje: Huzhou, 湖州, 
    Tehtsing Q1191987 "Tehtsing du Huchow, Té-ts'ing;Teching, hoje: Deqing, 德清, "
  Kashing Q58178 Kia-hing, hoje:Jiaxing, 嘉兴, 
    Kashan Q1361347 Kia-chan, hoje: Jiashan, 嘉善, Kaosham
    Tangsi Q10931032 "T'ang-k'i Tangchi", hoje:Tangqi, 塘栖 , in the Chinese translation it is recognized as “塘拪”
    Tsungteh Q10270889 Tch'ong-té,hoje: Chongde, 崇德县 , Historical county name, located in the present Chongfu 崇福镇Tsungteh (Shihmen)
    Tungsiang Q1204548 T'ong-hiang, ho

Show one place in kleio

In [5]:
place_kleio_id = "deh-r1644-chekiang" 

from timelink.api.models import Geoentity

with tlnb.db.session() as session:
    geo_place = session.get(Geoentity, place_kleio_id)
    print(geo_place.to_kleio())



geo1$Chekiang#Tche-kiang, hoje:Zhejiang, 浙江, @wikidata:Q16967 @dehergne:396/geo1
  atr$geoentity:name@wikidata/"https://www.wikidata.org/wiki/Q16967"#Tche-kiang, hoje:Zhejiang, 浙江, @wikidata:Q16967 @dehergne:396%Q16967/1644
  atr$geoentity:name@dehergne/"https://archive.org/details/bhsi37/page/n396/mode/1up"#Tche-kiang, hoje:Zhejiang, 浙江, @wikidata:Q16967 @dehergne:396%396/1644


In [6]:
pd.set_option('display.max_rows', 300)
# create a dataframe from the list
places_1644_df = pd.DataFrame(place_list)
places_1644_df.head(30)

Unnamed: 0,province,fou,name,wikidata,comment
0,Chekiang,,Chekiang,Q16967,"Tche-kiang, hoje:Zhejiang, 浙江, @dehergne:396"
1,Chekiang,Hangchou,Hangchou,Q4970,"Hang-tcheou, hoje: Hangzhou, 杭州,"
2,Chekiang,Hangchou,Fuyang,Q1011103,"Fou-yang, hoje:Fuyang, 富阳,"
3,Chekiang,Hangchou,Jenho,No wikidata,"Jen-houo, hoje: Renhe, 仁和县, Historical county ..."
4,Chekiang,Chüchow,Chüchow,Q58235,"K'iu-tcheou, hoje:Quzhou, 衢州, , in the Chinese..."
5,Chekiang,Huchow,Huchow,Q42664,"Hou-tcheou, hoje: Huzhou, 湖州,"
6,Chekiang,Huchow,Tehtsing,Q1191987,"""Tehtsing du Huchow, Té-ts'ing;Teching, hoje: ..."
7,Chekiang,Kashing,Kashing,Q58178,"Kia-hing, hoje:Jiaxing, 嘉兴,"
8,Chekiang,Kashing,Kashan,Q1361347,"Kia-chan, hoje: Jiashan, 嘉善, Kaosham"
9,Chekiang,Kashing,Tangsi,Q10931032,"""T'ang-k'i Tangchi"", hoje:Tangqi, 塘栖 , in the ..."


In [7]:
places_1644_df.to_excel('../inferences/places-1644.xlsx', index=False)

## 1701

In [8]:
stmt = select(geo1).where(geo1.inside == 'deh-chre-1701')

place_list = []

with tlnb.db.session() as session:
    result = session.execute(stmt).fetchall()
    for province, in result:
        comment, wikidata = get_wikidata_id(province, if_missing='No wikidata')
        print(province.name, wikidata, comment)
        place_list.append({'province': province.name,'fou':'',  'name': province.name, 'wikidata': wikidata, 'comment': comment})
        fous = session.execute(select(geo2).where(geo2.inside == province.id)).fetchall()
        for fou, in fous:
            comment, wikidata = get_wikidata_id(fou, if_missing='No wikidata')
            print(' ', fou.name,  wikidata, comment)
            place_list.append({'province': province.name, 'fou':fou.name,  'name': fou.name, 'wikidata': wikidata, 'comment': comment})
            geo3s = session.execute(select(geo3).where(geo3.inside == fou.id)).fetchall()
            for tcheou_hien, in geo3s:
                comment, wikidata = get_wikidata_id(tcheou_hien, if_missing='No wikidata')
                print('   ', tcheou_hien.name, wikidata,comment,  )
                place_list.append({'province': province.name, 'fou':fou.name,  'name': tcheou_hien.name, 'wikidata': wikidata, 'comment': comment})


Chekiang Q16967 Zhejiang 浙江， CHEKIANG (actuel ZHEJIANG)
  Hangchou Q4970 Hangzhou 杭州, 
    Haining Q286266 海宁 
  Kashing Q58178 Jiaxing 嘉兴, 
  Kinhwa Q58210 Jinhua 金华, 
    Lanchi Q1023793 Lanxi 兰溪, 
  Ningpo Q42780 Ningbo 宁波, 
    Yenchow Q1334217 Yinzhou 鄞州, 
Fukien Q41705 Fujian 福建, 
  Foochow Q68481 Fuzhou 福州, 
    Lienkong Q204827 Lianjiang 连江, 
  Changchow Q68814 Zhangzhou 漳州, 
  Chüanchow Q68695 Quanzhou 泉州, 
    Amoy Q68744 Xiamen 厦门, 
  Funing Q241877 (alors tcheou indépendant)Funing, , hoje Xiapu 霞浦
    Fuan Q1374581 Fu'an 福安, 
  Hinghwa Q17498990 Xinghua 兴化, , 兴化县，the wikidata code of it dose not contain coordinate information, which can be substituted by Q68579，the present-day Putian 莆田
  Kienning Q11065314 Kienning fou, Jianning 建寧,  the wikidata code of it does not contain coordinate information, which can be substituted by Kien-yang today Jianyang Q639862 see 1644
    Pucheng Q1338032 浦城 
  Shaowu Q1025451 Shaowu 邵武, 
    Kienning hien Q781559 Jianning Xian 建宁, 
  Tingch

In [9]:
pd.set_option('display.max_rows', 300)
# create a dataframe from the list
places_1701_df = pd.DataFrame(place_list)
places_1701_df.head(30)

Unnamed: 0,province,fou,name,wikidata,comment
0,Chekiang,,Chekiang,Q16967,Zhejiang 浙江， CHEKIANG (actuel ZHEJIANG)
1,Chekiang,Hangchou,Hangchou,Q4970,"Hangzhou 杭州,"
2,Chekiang,Hangchou,Haining,Q286266,海宁
3,Chekiang,Kashing,Kashing,Q58178,"Jiaxing 嘉兴,"
4,Chekiang,Kinhwa,Kinhwa,Q58210,"Jinhua 金华,"
5,Chekiang,Kinhwa,Lanchi,Q1023793,"Lanxi 兰溪,"
6,Chekiang,Ningpo,Ningpo,Q42780,"Ningbo 宁波,"
7,Chekiang,Ningpo,Yenchow,Q1334217,"Yinzhou 鄞州,"
8,Fukien,,Fukien,Q41705,"Fujian 福建,"
9,Fukien,Foochow,Foochow,Q68481,"Fuzhou 福州,"


In [10]:
places_1701_df.to_excel('../inferences/places-1701.xlsx', index=False)

## Join the two lists

In [11]:
places_1644_df['year'] = 1644
places_1701_df['year'] = 1701
places_df = pd.concat([places_1644_df, places_1701_df], ignore_index=True)
places_df.sort_values(by=['province', 'fou', 'name','year'], inplace=True)
places_df[['year','province', 'fou', 'name', 'wikidata', 'comment', ]].to_excel('../inferences/places-1644-1701.xlsx', index=False)
places_df[['year','province', 'fou', 'name', 'wikidata', 'comment', ]]

Unnamed: 0,year,province,fou,name,wikidata,comment
88,1644,Anhwei,,Anhwei,Q40956,"Anhui, Ngon-hoei, hoje:Anhui, 安徽,"
89,1644,Anhwei,Chuchow,Chuchow,Q114045,"Chuchow, hoje:Chuzhou, 滁州,"
90,1644,Anhwei,Hweichow,Hweichow,Q4358404,"Hoei-tcheou, hoje:Huizhou, 徽州,"
92,1644,Anhwei,Hweichow,Tungmen,No wikidata,
91,1644,Anhwei,Hweichow,Wuyan hien,Q1357710,"Ou-yuen, hoje:Wuyuan Xian, 婺源县, , Wuyuan histo..."
...,...,...,...,...,...,...
356,1701,Szechwan,Chungking,Chungking,Q11725,"Chongqing 重庆,"
213,1644,Szechwan,Paoning,Paoning,Q10887586,"Pao-ning, hoje: Baoning, 保宁, , Q1200170, 保宁府hi..."
214,1644,Yunnan,,Yunnan,Q43194,"hoje: Yunnan, 云南,"
357,1701,Yunnan,,Yunnan,Q43194,"Yunnan 云南,"


## List all the occurences of a place in the biographies

REDO:

- first link wikidata id geoentity:name@wikidata  with groupname and level. The higher the level the higher the grographical hierarchy
- then loop 

In [12]:
list(tlnb.db.get_view('eattributes').columns)

[Column('id', String(), table=<eattributes>, primary_key=True, nullable=False),
 Column('the_line', Integer(), table=<eattributes>),
 Column('the_level', Integer(), table=<eattributes>),
 Column('the_order', Integer(), table=<eattributes>),
 Column('groupname', String(), table=<eattributes>),
 Column('updated', DateTime(), table=<eattributes>),
 Column('indexed', DateTime(), table=<eattributes>),
 Column('e_extra_info', JSON(), table=<eattributes>),
 Column('attr_id', String(), table=<eattributes>, primary_key=True, nullable=False),
 Column('entity', String(), table=<eattributes>),
 Column('the_type', String(), table=<eattributes>),
 Column('the_value', String(), table=<eattributes>),
 Column('the_date', String(), table=<eattributes>),
 Column('aobs', String(), table=<eattributes>),
 Column('a_extra_info', JSON(), table=<eattributes>)]

In [20]:
import re
import pandas as pd
from sqlalchemy import select

from timelink.kleio.utilities import convert_timelink_date, format_timelink_date
from timelink.api.models import Geoentity
geo1, geo2, geo3 = tlnb.db.get_model(['geo1','geo2','geo3'])
nattributes = tlnb.db.get_view('nattributes')
stmt = select(geo1).where(geo1.the_type == "geoentity:name@wikidata/").order_by(geo1.name,geo1.the_source)

place_list = []

def show_nattributes_for_wikidata(wikidata, ident='', session=None):
    """ Show the nattribute for a given wikidata id
    """
    wikidata_uri = 'https://www.wikidata.org/wiki/' + wikidata
    stmt = select(nattributes).where(nattributes.c.the_value == wikidata_uri).order_by(nattributes.c.the_date)
    results = session.execute(stmt).fetchall()
    for result in results:
        the_date = convert_timelink_date(result.the_date)
        fdate = format_timelink_date(result.the_date)
        print(f"{ident}{fdate} {result.name} ({result.the_type}) {result.id} {result.pobs} ({result.the_date})")

with tlnb.db.session() as session:
    result = session.execute(stmt).fetchall()
    for province, in result:
        print()
        comment, wikidata = get_wikidata_id(province, if_missing='No wikidata')
        print(province.name, wikidata, comment)
        place_list.append({'province': province.name,'fou':'',  'name': province.name, 'wikidata': wikidata, 'comment': comment})

        show_nattributes_for_wikidata(wikidata, '  ', session=session)
        # search all the entities that have this value in one of the attributes
        fous = session.execute(select(geo2).where(geo2.inside == province.id)).fetchall()
        for fou, in fous:
            print()
            comment, wikidata = get_wikidata_id(fou, if_missing='No wikidata')
            print(' ', fou.name,  wikidata, comment)
            place_list.append({'province': province.name, 'fou':fou.name,  'name': fou.name, 'wikidata': wikidata, 'comment': comment})
            show_nattributes_for_wikidata(wikidata, '   ', session=session)
            geo3s = session.execute(select(geo3).where(geo3.inside == fou.id)).fetchall()
            for tcheou_hien, in geo3s:
                print()
                comment, wikidata = get_wikidata_id(tcheou_hien, if_missing='No wikidata')
                print('   ', tcheou_hien.name, wikidata,comment,  )
                place_list.append({'province': province.name, 'fou':fou.name,  'name': tcheou_hien.name, 'wikidata': wikidata, 'comment': comment})
                show_nattributes_for_wikidata(wikidata, '     ', session=session)


KeyboardInterrupt: 