# Run this pipeline to update the json files for KG in '00_Current Versions' and 01_Archived Versions'

In [1]:
import pandas as pd
import json
import datetime
import os
import shutil


Read current entities and relations

In [2]:
with open('../03_Output/00_GPT KGs/Entities.json', "r") as file:
    data = file.read()
    entities = json.loads(data)
    file.close()

In [4]:
file_path = '../03_Output/00_GPT KGs/Relations_replaced.csv'
relations = pd.read_csv(file_path, delimiter=';')


1. Merge the entities and relations in json objects

In [5]:
kg_list = []
for entity in entities:
    kg_obj = {'metadata': entity, 'knowledge graph': {}}
    records = relations.loc[relations['Subject'] == entity['Entity']]
    filtered_relations = json.loads(records.to_json(orient ='records', indent = 2))
    objects = set()
    entity_relations = []
    for rel in filtered_relations:
        obj = {}
        objects.add(rel['Object'])
        obj['Relation'] = rel['Relation']
        obj['Object'] = rel['Object']
        obj['Description'] = rel['Description']
        obj['Importance'] = rel['Importance']
        entity_relations.append(obj)
    object_lst = list(objects)
    relation_obj = {entity['Entity']: entity_relations}
    kg_obj['knowledge graph']['entities'] = object_lst
    kg_obj['knowledge graph']['relations'] = relation_obj

    kg_list.append(kg_obj)


2. Update the current kg and add in archive for record

In [41]:
timestamp = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
kg_json = json.dumps(kg_list, indent = 2)

with open('../03_Output/01_Auto KGs/00_Current Versions/knowledge_graph.json', 'w') as file:
    file.write(kg_json)
    file.close()

with open('../03_Output/01_Auto KGs/01_Archived Versions/' + str(timestamp) + '.json', 'w') as file:
    file.write(kg_json)
    file.close()

In [12]:
for item in kg_list:
    name = item['metadata']['Entity']
    json_ = json.dumps(item, indent = 2)
    with open('../03_Output/01_Auto KGs/00_Current Versions/' + name + '.json', 'w') as file:
        file.write(json_)
        file.close()


3. Run the below cells to push JSON files from Output/Current_Version folder to 00_API folder.

In [3]:

def update_files_in_API(src_dir, dest_dir):
    """
    Delete all initial files in the destination directory and then copy
    all files from the source directory to the destination directory.

    Args:
        src_dir (str): The path to the source directory.
        dest_dir (str): The path to the destination directory.

    Raises:
        FileNotFoundError: If the source directory does not exist.
    """
    # Check if the source directory exists
    if not os.path.exists(src_dir):
        raise FileNotFoundError(f"Source directory '{src_dir}' does not exist.")

    # Check if the destination directory exists, if not, create it
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
        print(f"Destination directory '{dest_dir}' created.")
    else:
        # Delete all files in the destination directory
        for file_name in os.listdir(dest_dir):
            file_path = os.path.join(dest_dir, file_name)

            # Delete only if it is a file
            if os.path.isfile(file_path):
                os.remove(file_path)
                print(f"Deleted '{file_path}'.")

    # List all files in the source directory
    files = os.listdir(src_dir)

    # Copy each file to the destination directory, replacing existing files
    for file_name in files:
        src_file = os.path.join(src_dir, file_name)
        dest_file = os.path.join(dest_dir, file_name)

        # Copy only if it is a file
        if os.path.isfile(src_file):
            shutil.copy2(src_file, dest_file)
            print(f"Copied '{src_file}' to '{dest_file}'.")

    print("All files deleted in destination and copied from source successfully.")

# Example usage
source_directory = '../03_Output/01_Auto KGs/00_Current Versions'
destination_directory = '../00_API'

update_files_in_API(source_directory, destination_directory)


Deleted '../03_Output/01_Auto KGs/00_Current Versions/blue economy.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/internet of energy.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/decentralized energy systems.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/Energy For Development.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/free prior and informed consent.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/technology transfer.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/multi-stakeholder partnerships.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/pumped hydro storage.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/digital twins.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/land use planning.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/reliance of fuel imports.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/energy infrastructure services

In [4]:
def flatten_relations_list(json_list):
    for data in json_list:
        relations = data["knowledge graph"].pop("relations")

        # Flatten relations into separate keys
        for level, rel_list in relations.items():
            if level == "level 1":
                data["knowledge graph"]["relations"] = rel_list
            else:
                data["knowledge graph"][f"relations-{level.replace(' ', '-')}"] = rel_list

    return json_list


In [13]:
with open('../03_Output/01_Auto KGs/02_Replaced Relations/Nested Relations.json', "r") as file:
    data = file.read()
    kg = json.loads(data)
    file.close()

In [15]:
kg_new = flatten_relations_list(kg)

In [18]:
for item in kg_new:
    name = item['metadata']['Entity']
    json_ = json.dumps(item, indent = 2)
    with open('../00_API/' + name + '.json', 'w') as file:
        file.write(json_)
        file.close()
