## 0. Setting

In [1]:
import copy
import os
import pprint
from collections import Counter, defaultdict

import schedule
import tqdm
from bson import DBRef, ObjectId

pp = pprint.PrettyPrinter(indent=2)

In [2]:
import sys

codefolder = "C:/ProjectCollections/Programs/Australia_Cultural_Data_Engine/codes"

sys.path.append(codefolder)
from acde import MongoDBManipulation as acde_manip
from daao import MongoDBManipulation as daao_manip
from general import GeneralFunctions as gen_gf
from general import JsonProcessing as gen_jp
from general import MongoDBManipulation as gen_manip

## 1. DAAO DB Meta Collection

1. get all collections and corresponding classes
2. get field information of each collection (Removed, All, Date Objects)

In [3]:
daao_meta = daao_manip.DAAO_MetaCollection()

### 1.1 DAAO Class Extraction

In [4]:
class_names = daao_meta.extract_class()

### 1.2 DAAO Field Name Extraction

In [5]:
daao_objs_info = daao_meta.extract_fieldName()

person: 100%|███████████████████████████████████████████████████████████████████| 30693/30693 [01:01<00:00, 495.42it/s]
personGroup: 100%|████████████████████████████████████████████████████████████████| 4660/4660 [00:05<00:00, 882.80it/s]
event: 100%|███████████████████████████████████████████████████████████████████| 21906/21906 [00:10<00:00, 2162.64it/s]
eventGroup: 100%|████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 2528.67it/s]
recognition: 100%|███████████████████████████████████████████████████████████████| 5481/5481 [00:01<00:00, 3093.36it/s]
work: 100%|████████████████████████████████████████████████████████████████████| 23780/23780 [00:08<00:00, 2854.54it/s]
collection: 100%|██████████████████████████████████████████████████████████████| 11747/11747 [00:02<00:00, 4093.87it/s]
ansicOcc_menu: 100%|██████████████████████████████████████████████████████████████| 825/825 [00:00<00:00, 33539.80it/s]
eventType_menu: 100%|███████████████████

### 1.3 Lookup Project Pipeline Setting

In [6]:
lookupProj_info = daao_meta.construct_LookUpPipelines_relation(daao_objs_info)

## 2. Extract Relationships


### Brief Description

To Comment

1. [Set Up Searching Conditions](#set-up-searching-cond)

2. [Drop Existing "relationship" Collection](#drop-relationship-coll)

3. [Set Up Searching Conditions](#set-up-searching-cond)

4. [Set Up Searching Conditions](#set-up-searching-cond)

5. [Set Up Searching Conditions](#set-up-searching-cond)

### <a name="set-up-searching-cond">Set Up Searching Conditions</a>


In [7]:
search_colls = [
    ("xparty", {"is_deleted": False, "is_shadow": False, "is_locked": False}),
    ("xwork", {"is_deleted": False, "is_locked": False}),
    ("xactivity", {"is_deleted": False, "is_locked": False}),
]
relation_coll_name = "relationship"

### <a name="drop-relationship-coll">Drop Existing "relationship" Collection</a>

In [8]:
daao_meta.daao_db[relation_coll_name].drop()

### <a name="insert-related-resource">Insert Related Resource Relationships</a>

In [9]:
rsc_predicates = {
    "see_alsos": {"term": "has reference", "reverse_term": "is reference of",},
    "references": {"term": "has reference", "reverse_term": "is reference of",},
    "images": {"term": "has image", "reverse_term": "is image of",},
    "portrait_images": {"term": "has portrait", "reverse_term": "is portrait of",},
}

# biography.references is not included
for search_coll, coll_filter in search_colls:
# for search_coll, coll_filter in [("xactivity", {"is_deleted": False, "is_locked": False})]:

    # get all resource related fields
    rsc_fields = set()
    for k in daao_objs_info.keys():
        if daao_objs_info[k].get("collection_name") == search_coll:
            rsc_fields = rsc_fields | set(
                f
                for f in daao_objs_info[k].get("all_fields")
                # need an annotation here
                # how should I adjust portrait_images and images
                if "." not in f
                and "_stub_" not in f
                and f.startswith(("ref", "see_", "portrait", "images"))
            )
    rsc_filter = {"$or": []}
    for f in rsc_fields:
        rsc_filter["$or"].append({f: {"$exists": 1, "$ne": []}})
    rsc_project_fields = {f: 1 for f in rsc_fields}
    rsc_project_fields.update({"_cls": 1})

    # extract and look up the "resource" attributes
    refresh_time = 1
    with tqdm.tqdm(
        total=daao_meta.daao_db[search_coll].count_documents(rsc_filter),
        desc=f"Resources: {search_coll}_relationships_update (Refreshed Time: {refresh_time})",
    ) as pbar, daao_meta.localclient.start_session() as session:
        for r in daao_meta.daao_db[search_coll].find(
            rsc_filter, rsc_project_fields, no_cursor_timeout=True, session=session
        ):
            r_class = r["_cls"].split(".")[-1]
            if r_class == "PersonGroup":
                r_class = "Organization"
            for f in rsc_fields:
                related_obj_relations = r.get(f) if r.get(f) else []
                for related_obj_r in related_obj_relations:
                    # construct relationship document structure
                    relation = {
                        "subject": DBRef(search_coll, r["_id"]),
                        "object": related_obj_r,
                        "_class": "relationship",
                        "relation_class": f"{r_class}_RelatedResource",
                        "predicate": rsc_predicates[f],
                    }
                    # relation_ori = copy.copy(relation)
                    relation = daao_manip.daao_get_dbrefInfo(
                        relation, lookupProj_info, db=daao_meta.daao_db
                    )
                    if relation.get("subject") and relation.get("object"):
                        daao_meta.daao_db[relation_coll_name].insert_one(relation)
            pbar.update(1)
            # if pbar.n > 5:
            #     break

            #####
            # refresh the session for insert
            #####
            if pbar.format_dict["elapsed"] / 1200 > refresh_time:
                daao_meta.localclient.admin.command(
                    "refreshSessions", [session.session_id], session=session
                )
                refresh_time += 1
                pbar.set_description(
                    f"Resources: {search_coll}_relationships_update (Refreshed Time: {refresh_time})"
                )
                pbar.refresh()  # to show the update immediately

Resources: xparty_relationships_update (Refreshed Time: 1): 100%|████████████████| 10778/10778 [02:08<00:00, 83.64it/s]
Resources: xwork_relationships_update (Refreshed Time: 1): 100%|██████████████████| 3035/3035 [00:14<00:00, 203.63it/s]
Resources: xactivity_relationships_update (Refreshed Time: 1): 100%|████████████████| 271/271 [00:01<00:00, 139.57it/s]


### <a name="insert-related-place">Insert Related Place Relationships</a>

#### Set

In [10]:
related_p_fields_mapping = {
    "xparty": {
        "birth.coverage.place",
        "death.burial_place",
        "death.coverage.place",
        "other_occupations.coverage_range.place",
        "residences.coverage_range.place",
        "trainings.coverage_range.place",
    },
    "xactivity": {"coverages.place",},
    "xwork": {"manufacturers.place",},
    "externalresource": {"place"},
}

related_p_predicates = {
    "birth_place": {"term": "was born in"},
    "death_place": {"term": "died in"},
    "trainings": {"term": "was educated/trained in"},
    "other_occupations": {"term": "worked in"},
    "residences": {
        "VersionedDocument.XParty.Person": {"term": "lived in"},
        "VersionedDocument.XParty.PersonGroup": {"term": "was located in"},
    },
    "coverages": {"term": "was held in"},
    "manufacturers": {"term": "was manufactured in"},
    "place": {"term": "is created in"},
}

#### xparty Related Places

In [11]:
#####
# Set xparty related place pipelines
#####

curr_level = "person"
curr_search_filter = [
    s_c[1]
    for s_c in search_colls
    if s_c[0] == daao_objs_info[curr_level]["collection_name"]
][0]

# get name of first level of field having 'coverage' prefix
fields_levelUp_root = list(
    set(
        ".".join(f.split(".")[:2])
        for f in daao_objs_info[curr_level]["date_objects"]
        if "coverage" in f
    )
)
# get name of first level of ARRAY field having 'coverage' prefix
fields_arrayDates = set(
    f.split(".", 1)[0] for f in fields_levelUp_root if f.split(".", 1)[0].endswith("s")
)
InitStage_Match = gen_manip.construct_MatchStage(bool_filters=curr_search_filter)
InitStage_Match[0]["$match"].update(
    {
        "$or": [
            {f"{f}.$id": {"$type": "objectId"},}
            for f in related_p_fields_mapping[
                daao_objs_info[curr_level]["collection_name"]
            ]
        ]
    }
)
InitStage_Project = gen_manip.construct_FinalProjectionStage(
    keep_list=fields_levelUp_root + ["_cls"]
)
InitStage_Remove = gen_manip.construct_FinalProjectionStage(
    remove_list=set(daao_objs_info[curr_level]["remove_fields"]) - set(["_cls"])
)
Stage2_Unwinds = gen_manip.construct_unwindStage(fields_arrayDates)
Stage3_LevelUp = [
    {
        "$addFields": {
            "birth_date": "$birth.coverage.date._date",
            "birth_place": "$birth.coverage.place",
            "birth_note": "$birth.note",
            "death_date": "$death.coverage.date._date",
            "death_place": "$death.coverage.place",
            "death_note": "$death.note",
            "death_burial_place": "$death.burial_place",
            "other_occupations.date_end": "$other_occupations.coverage_range.date_range.end._date",
            "other_occupations.date_start": "$other_occupations.coverage_range.date_range.start._date",
            "other_occupations.place": "$other_occupations.coverage_range.place",
            "trainings.date_end": "$trainings.coverage_range.date_range.end._date",
            "trainings.date_start": "$trainings.coverage_range.date_range.start._date",
            "trainings.place": "$trainings.coverage_range.place",
            "residences.date_end": "$residences.coverage_range.date_range.end._date",
            "residences.date_start": "$residences.coverage_range.date_range.start._date",
            "residences.place": "$residences.coverage_range.place",
        }
    },
    {
        "$project": {
            "trainings.coverage_range": 0,
            "death": 0,
            "birth": 0,
            "other_occupations.coverage_range": 0,
            "residences.coverage_range": 0,
        }
    },
]
Stage4_Group = daao_manip.daao_construct_groupStage(
    tuple(
        list(set(f.split(".")[0] for f in Stage3_LevelUp[0]["$addFields"].keys()))
        + ["_cls"]
    ),
    tuple(fields_arrayDates),
)

Stage5_Unset = []
Stage5_Unset_set1 = {}
Stage5_Unset_set2 = {}

for f in Stage3_LevelUp[0]["$addFields"].keys():
    if "." in f:
        f_none_value = [{}]
        f_name = f.split(".")[0]
    elif not f.endswith(("note", "date")):
        f_none_value = None
        f_name = f
    else:
        f_name = None
    if f_name:
        Stage5_Unset_set1.update(
            {
                f"{f_name}": {
                    "$cond": {
                        "if": {"$eq": [f"${f_name}", f_none_value]},
                        "then": "$$REMOVE",
                        "else": f"${f_name}",
                    },
                }
            }
        )
    if f != f_name and "." not in f:
        Stage5_Unset_set2.update(
            {
                f: {
                    "$cond": {
                        "if": {
                            "$or": [
                                {"$eq": [f, None]},
                                {"$eq": [f, ""]},
                                {"$not": [f"${f.split('_')[0]}_place"]},
                            ]
                        },
                        "else": f"${f}",
                        "then": "$$REMOVE",
                    },
                }
            }
        )
Stage5_Unset = [{"$set": Stage5_Unset_set1}, {"$set": Stage5_Unset_set2}]

rp_pipelines_xparty = (
    InitStage_Match
    + InitStage_Project
    + InitStage_Remove
    + Stage2_Unwinds
    + Stage3_LevelUp
    + Stage4_Group
    + Stage5_Unset
)

In [12]:
#####
# Extract xparty related places
#####

curr_search_coll = "xparty"
curr_rp_pipelines = rp_pipelines_xparty
refresh_time = 1

with tqdm.tqdm(
    total=daao_meta.daao_db[curr_search_coll].count_documents(
        curr_rp_pipelines[0]["$match"]
    ),
    desc=f"Fetch Related Places: {curr_search_coll}_relationships_update (Refreshed Time: {refresh_time})",
) as pbar:
    tmp_rp_relations = []
    for r in daao_meta.daao_db[curr_search_coll].aggregate(curr_rp_pipelines,):
        r_class = r["_cls"].split(".")[-1]
        if r_class == "PersonGroup":
            r_class = "Organization"
        curr_rp_fields = [
            f
            for f in r.keys() - set(["_id", "_cls"])
            if not f.endswith(("note", "date"))
        ]
        for f in curr_rp_fields:
            if f.startswith(("birth", "death")):
                if r.get(f):
                    object_dbref = r.get(f)
                else:
                    continue
                f_prefix = f.split("_")[0]
                relation = {
                    "subject": DBRef(curr_search_coll, r["_id"]),
                    "object": object_dbref,
                    "note": r.get(f"{f_prefix}_note"),
                    "relation_class": f"{r_class}_RelatedPlace",
                    "time": {
                        "date_start": r.get(f"{f_prefix}_date"),
                        "date_end": None,
                    },
                    "predicate": related_p_predicates[f],
                }
                tmp_rp_relations.append(relation)
            else:
                related_p_obj_relations = r.get(f) if r.get(f) else []
                if f == "residences":
                    rp_predicate = related_p_predicates[f][r["_cls"]]
                else:
                    rp_predicate = related_p_predicates[f]
                for related_p_obj_r in related_p_obj_relations:
                    if related_p_obj_r.get("place"):
                        object_dbref = related_p_obj_r.get("place")
                    else:
                        continue
                    relation = {
                        "subject": DBRef(curr_search_coll, r["_id"]),
                        "object": object_dbref,
                        "relation_class": f"{r_class}_RelatedPlace",
                        "time": {
                            "date_start": related_p_obj_r.get("date_start"),
                            "date_end": related_p_obj_r.get("date_end"),
                        },
                        "predicate": rp_predicate,
                    }
                    tmp_rp_relations.append(relation)
        pbar.update(1)
with tqdm.tqdm(
    total=len(tmp_rp_relations),
    desc=f"Insert Related Place: {curr_search_coll}_relationships_update (Refreshed Time: {refresh_time})",
) as pbar, daao_meta.localclient.start_session() as session:
    for r in tmp_rp_relations:
        relation = daao_manip.daao_get_dbrefInfo(
            copy.copy(r), lookupProj_info, db=daao_meta.daao_db
        )
        if relation.get("subject") and relation.get("object"):
            daao_meta.daao_db[relation_coll_name].insert_one(relation)
            pbar.update(1)
    #####
    # refresh the session for insert
    #####
    if pbar.format_dict["elapsed"] / 1200 > refresh_time:
        daao_meta.localclient.admin.command(
            "refreshSessions", [session.session_id], session=session
        )
        refresh_time += 1
        pbar.set_description(
            f"Related: {search_coll}_relationships_update (Refreshed Time: {refresh_time})"
        )
        pbar.refresh()  # to show immediately the update

Fetch Related Places: xparty_relationships_update (Refreshed Time: 1): 100%|███| 10109/10109 [00:02<00:00, 4200.63it/s]
Insert Related Place: xparty_relationships_update (Refreshed Time: 1): 100%|████| 30638/30638 [02:14<00:00, 228.11it/s]


#### xactivity Related Places

In [13]:
#####
# Set xactivity related place pipelines
#####

curr_level = "event"
curr_search_filter = [
    s_c[1]
    for s_c in search_colls
    if s_c[0] == daao_objs_info[curr_level]["collection_name"]
][0]

# get name of first level of field having 'coverage' prefix
fields_levelUp_root = list(
    set(
        ".".join(f.split(".")[:2])
        for f in daao_objs_info[curr_level]["date_objects"]
        if "coverage" in f
    )
)
# get name of first level of ARRAY field having 'coverage' prefix
fields_arrayDates = set(
    f.split(".", 1)[0] for f in fields_levelUp_root if f.split(".", 1)[0].endswith("s")
)
InitStage_Match = gen_manip.construct_MatchStage(bool_filters=curr_search_filter)
InitStage_Match[0]["$match"].update(
    {
        "$or": [
            {f"{f}.$id": {"$type": "objectId"},}
            for f in related_p_fields_mapping[
                daao_objs_info[curr_level]["collection_name"]
            ]
        ]
    }
)
InitStage_Project = gen_manip.construct_FinalProjectionStage(
    keep_list=list(fields_arrayDates) + ["_cls"]
)
InitStage_Remove = gen_manip.construct_FinalProjectionStage(
    remove_list=set(daao_objs_info[curr_level]["remove_fields"]) - set(["_cls"])
)
Stage2_Unwinds = gen_manip.construct_unwindStage(fields_arrayDates)
Stage3_LevelUp = [
    {
        "$addFields": {
            "coverages.date_end": "$coverages.date_range.end._date",
            "coverages.date_start": "$coverages.date_range.start._date",
        }
    },
    {"$project": {"coverages.date_range": 0,}},
]
Stage4_Group = daao_manip.daao_construct_groupStage(
    tuple(
        list(set(f.split(".")[0] for f in Stage3_LevelUp[0]["$addFields"].keys()))
        + ["_cls"]
    ),
    tuple(fields_arrayDates),
)
Stage5_Unset = [
    {
        "$set": {
            "coverages": {
                "$cond": {
                    "else": "$coverages",
                    "if": {"$eq": ["$coverages", [{}]]},
                    "then": "$$REMOVE",
                }
            },
        }
    }
]

rp_pipelines_xactivity = (
    InitStage_Match
    + InitStage_Project
    + InitStage_Remove
    + Stage2_Unwinds
    + Stage3_LevelUp
    + Stage4_Group
    + Stage5_Unset
)

In [14]:
#####
# Extract xactivity related places
#####

curr_search_coll = "xactivity"
curr_rp_pipelines = rp_pipelines_xactivity
refresh_time = 1

with tqdm.tqdm(
    total=daao_meta.daao_db[curr_search_coll].count_documents(
        curr_rp_pipelines[0]["$match"]
    ),
    desc=f"Fetch Related Places: {curr_search_coll}_relationships_update (Refreshed Time: {refresh_time})",
) as pbar, daao_meta.localclient.start_session() as session:
    for r in daao_meta.daao_db[curr_search_coll].aggregate(curr_rp_pipelines,):
        r_class = r["_cls"].split(".")[-1]
        curr_rp_fields = [f for f in r.keys() - set(["_id", "_cls"])]
        for curr_rp_field in curr_rp_fields:
            related_p_obj_relations = (
                r.get(curr_rp_field) if r.get(curr_rp_field) else []
            )
            for related_p_obj_r in related_p_obj_relations:
                if isinstance(related_p_obj_r.get("place"), DBRef):
                    object_dbref = related_p_obj_r.get("place")
                else:
                    continue
                relation = {
                    "subject": DBRef(curr_search_coll, r["_id"]),
                    "object": object_dbref,
                    "relation_class": f"{r_class}_RelatedPlace",
                    "time": {
                        "date_start": related_p_obj_r.get("date_start"),
                        "date_end": related_p_obj_r.get("date_end"),
                    },
                    "predicate": related_p_predicates[curr_rp_field],
                }
                # relation_ori = copy.copy(relation)
                relation = daao_manip.daao_get_dbrefInfo(
                    relation, lookupProj_info, db=daao_meta.daao_db
                )
                if relation.get("subject") and relation.get("object"):
                    daao_meta.daao_db[relation_coll_name].insert_one(relation)
        pbar.update(1)
        #####
        # refresh the session for insert
        #####
        if pbar.format_dict["elapsed"] / 1200 > refresh_time:
            daao_meta.localclient.admin.command(
                "refreshSessions", [session.session_id], session=session
            )
            refresh_time += 1
            pbar.set_description(
                f"Related: {search_coll}_relationships_update (Refreshed Time: {refresh_time})"
            )
            pbar.refresh()  # to show immediately the update

Fetch Related Places: xactivity_relationships_update (Refreshed Time: 1): 100%|█| 20247/20247 [02:55<00:00, 115.45it/s]


#### xwork Related Places

In [15]:
#####
# Set xwork related place pipelines
#####

curr_level = "work"
curr_search_filter = [
    s_c[1]
    for s_c in search_colls
    if s_c[0] == daao_objs_info[curr_level]["collection_name"]
][0]

# get name of first level of field having 'coverage' prefix
fields_levelUp_root = list(
    set(
        ".".join(f.split(".")[:2])
        for f in daao_objs_info[curr_level]["date_objects"]
        if "coverage" in f
    )
)
# get name of first level of ARRAY field having 'coverage' prefix
InitStage_Match = gen_manip.construct_MatchStage(bool_filters=curr_search_filter)
InitStage_Match[0]["$match"].update(
    {
        "$or": [
            {f"{f}.$id": {"$type": "objectId"},}
            for f in related_p_fields_mapping[
                daao_objs_info[curr_level]["collection_name"]
            ]
        ]
    }
)
InitStage_Project = gen_manip.construct_FinalProjectionStage(
    keep_list=["manufacturers.place", "_cls"]
)
InitStage_Remove = gen_manip.construct_FinalProjectionStage(
    remove_list=set(daao_objs_info[curr_level]["remove_fields"]) - set(["_cls"])
)
# the order of unwind fields matters here
Stage2_Unwinds = gen_manip.construct_unwindStage(
    ["manufacturers", "manufacturers.place"]
)
Stage5_Unset = [
    {
        "$set": {
            "manufacturers": {
                "$cond": {
                    "else": "$manufacturers",
                    "if": {"$eq": ["$manufacturers", [{}]]},
                    "then": "$$REMOVE",
                }
            },
        }
    }
]

rp_pipelines_xwork = (
    InitStage_Match
    + InitStage_Project
    + InitStage_Remove
    + Stage2_Unwinds
    + Stage5_Unset
)

In [16]:
#####
# Extract xwork related places
#####

curr_search_coll = "xwork"
curr_rp_pipelines = rp_pipelines_xwork
refresh_time = 1

with tqdm.tqdm(
    total=daao_meta.daao_db[curr_search_coll].count_documents(
        curr_rp_pipelines[0]["$match"]
    ),
    desc=f"Fetch Related Places: {curr_search_coll}_relationships_update (Refreshed Time: {refresh_time})",
) as pbar, daao_meta.localclient.start_session() as session:
    for r in daao_meta.daao_db[curr_search_coll].aggregate(curr_rp_pipelines,):
        r_class = r["_cls"].split(".")[-1]
        curr_rp_fields = [f for f in r.keys() - set(["_id", "_cls"])]
        for curr_rp_field in curr_rp_fields:
            if r.get(curr_rp_field) and r.get(curr_rp_field).get("place"):
                related_p_obj_r = r.get(curr_rp_field)
                object_dbref = related_p_obj_r.get("place")
            else:
                continue
            relation = {
                "subject": DBRef(curr_search_coll, r["_id"]),
                "object": object_dbref,
                "relation_class": f"{r_class}_RelatedPlace",
                "time": {
                    "date_start": related_p_obj_r.get("date_start"),
                    "date_end": related_p_obj_r.get("date_end"),
                },
                "predicate": related_p_predicates[curr_rp_field],
            }
            relation_ori = copy.copy(relation)
            relation = daao_manip.daao_get_dbrefInfo(
                relation, lookupProj_info, db=daao_meta.daao_db
            )
            if relation.get("subject") and relation.get("object"):
                daao_meta.daao_db[relation_coll_name].insert_one(relation)
        pbar.update(1)
        #####
        # refresh the session for insert
        #####
        if pbar.format_dict["elapsed"] / 1200 > refresh_time:
            daao_meta.localclient.admin.command(
                "refreshSessions", [session.session_id], session=session
            )
            refresh_time += 1
            pbar.set_description(
                f"Related: {search_coll}_relationships_update (Refreshed Time: {refresh_time})"
            )
            pbar.refresh()  # to show immediately the update

Fetch Related Places: xwork_relationships_update (Refreshed Time: 1): 100%|██████████████| 6/6 [00:00<00:00, 79.40it/s]


#### externalresource Related Places

In [17]:
#####
# Set xwork related place pipelines
#####

curr_level = "externalresource"
curr_search_filter = {
    "is_deleted": False,
}

InitStage_Match = gen_manip.construct_MatchStage(bool_filters=curr_search_filter)

FinalStage_Project = gen_manip.construct_FinalProjectionStage(
    keep_list=["place", "_cls"], complex_action_pairs={"date": "$date._date"},
)

rp_pipelines_etlrsc = InitStage_Match + FinalStage_Project

In [18]:
curr_search_coll = "externalresource"
curr_rp_pipelines = rp_pipelines_etlrsc
refresh_time = 1

with tqdm.tqdm(
    total=daao_meta.daao_db[curr_search_coll].count_documents(
        curr_rp_pipelines[0]["$match"]
    ),
    desc=f"Fetch Related Places: {curr_search_coll}_relationships_update (Refreshed Time: {refresh_time})",
) as pbar, daao_meta.localclient.start_session() as session:
    for r in daao_meta.daao_db[curr_search_coll].aggregate(curr_rp_pipelines,):
        if isinstance(r.get("place"), DBRef):
            object_dbref = r.get("place")
        else:
            continue
        relation = {
            "subject": DBRef(curr_search_coll, r["_id"]),
            "object": object_dbref,
            "relation_class": "Resource_RelatedPlace",
            "time": {"date_start": r.get("date"), "date_end": r.get("date"),},
            "predicate": related_p_predicates["place"],
        }
        relation_ori = copy.copy(relation)
        relation = daao_manip.daao_get_dbrefInfo(
            relation, lookupProj_info, db=daao_meta.daao_db
        )
        if relation.get("subject") and relation.get("object"):
            daao_meta.daao_db[relation_coll_name].insert_one(relation)
            pbar.update(1)

Fetch Related Places: externalresource_relationships_update (Refreshed Time: 1):   6%| | 2138/36663 [00:07<02:03, 279.3


### <a name="insert-related-others">Insert Other Related Entities Relationships</a>

In [19]:
relation_class_mapping = {
    "RelatedPerson": "RelatedPerson",
    "RelatedPersonGroup": "RelatedOrganization",
    "RelatedEvent": "RelatedEvent",
    "RelatedCollection": "RelatedCollection",
    "RelatedRecognition": "RelatedRecognition",
    "RelatedWork": "RelatedWork",
    "RelatedPlace": "RelatedPlace",
    "RelatedRecognitionForWork": "RelatedRecognition",
    "RelatedEventGroup": "RelatedEventGroup",
    "RelatedEventForWork": "RelatedEvent",
    "RelatedEventEvent": "RelatedEvent",
}

for search_coll, coll_filter in search_colls:
    # get all (other) related fields
    related_fields = set()
    for k in daao_objs_info.keys():
        if daao_objs_info[k].get("collection_name") == search_coll:
            related_fields = related_fields | set(
                f
                for f in daao_objs_info[k].get("all_fields")
                if "." not in f and "_stub_" not in f and f.startswith("related_")
            )
    related_filter = {"$or": []}
    for f in related_fields:
        related_filter["$or"].append({f: {"$exists": 1, "$ne": []}})
    related_filter.update(coll_filter)
    related_project_fields = {f: 1 for f in related_fields}
    related_project_fields.update({"_cls": 1})

    # extract and look up the "related" attributes
    refresh_time = 1
    with tqdm.tqdm(
        total=daao_meta.daao_db[search_coll].count_documents(related_filter),
        desc=f"Related: {search_coll}_relationships_update (Refreshed Time: {refresh_time})",
    ) as pbar, daao_meta.localclient.start_session() as session:
        for r in daao_meta.daao_db[search_coll].find(
            related_filter,
            related_project_fields,
            no_cursor_timeout=True,
            session=session,
        ):
            r_class = r["_cls"].split(".")[-1]
            if r_class == "PersonGroup":
                r_class = "Organization"
            for f in related_fields:
                related_obj_relations = r.get(f) if r.get(f) else []
                for related_obj_r in related_obj_relations:
                    relation = {
                        "subject": DBRef(search_coll, r["_id"]),
                        "object": related_obj_r.get("target"),
                        "note": related_obj_r.get("note"),
                        "relation_class": f'{r_class}_{relation_class_mapping[related_obj_r["_cls"].split(".")[-1]]}',
                        "predicate": related_obj_r.get("predicate"),
                    }
                    predicate_id = (
                        related_obj_r.get("predicate").id
                        if related_obj_r.get("predicate")
                        else None
                    )
                    object_id = (
                        related_obj_r.get("target").id
                        if related_obj_r.get("target")
                        else None
                    )
                    #####
                    # this part check whether there is any inserting relation exists
                    # in database, if there is, skip it
                    # This should have better method...
                    #####
                    # relation_stored = len(
                    #     list(
                    #         daao_meta.daao_db[relation_coll_name].find(
                    #             {
                    #                 "object._id": object_id,
                    #                 "predicate._id": predicate_id,
                    #                 "subject._id": r["_id"],
                    #             }
                    #         )
                    #     )
                    # )
                    # if relation_stored == 0:
                    #     relation = mdb_qc.daao_get_dbrefInfo(
                    #         relation, lookupProj_info, db=daao_meta.daao_db
                    #     )
                    #     daao_meta.daao_db[relation_coll_name].insert_one(relation)
                    relation = daao_manip.daao_get_dbrefInfo(
                        relation, lookupProj_info, db=daao_meta.daao_db
                    )
                    if relation.get("subject") and relation.get("object"):
                        daao_meta.daao_db[relation_coll_name].insert_one(relation)
            pbar.update(1)
            #####
            # refresh the session for insert
            #####
            if pbar.format_dict["elapsed"] / 1200 > refresh_time:
                daao_meta.localclient.admin.command(
                    "refreshSessions", [session.session_id], session=session
                )
                refresh_time += 1
                pbar.set_description(
                    f"Related: {search_coll}_relationships_update (Refreshed Time: {refresh_time})"
                )
                pbar.refresh()  # to show immediately the update

Related: xparty_relationships_update (Refreshed Time: 1): 100%|██████████████████| 15113/15113 [12:01<00:00, 20.94it/s]
Related: xwork_relationships_update (Refreshed Time: 1): 100%|████████████████████| 3433/3433 [00:33<00:00, 102.78it/s]
Related: xactivity_relationships_update (Refreshed Time: 1): 100%|███████████████████| 752/752 [00:18<00:00, 40.60it/s]


## Learning Notes

1. [no_cursor_timeout not functioning as expected](https://jira.mongodb.org/browse/PYTHON-1879)
2. [Schedule a repeating event in Python 3](https://stackoverflow.com/questions/2398661/schedule-a-repeating-event-in-python-3)
