# Run this pipeline to update the json files for KG in '00_Current Versions' and 01_Archived Versions'

In [45]:
import pandas as pd
import json
import datetime
import os
import shutil


Read current entities and relations

In [2]:
with open('../03_Output/00_GPT KGs/Entities.json', "r") as file:
    data = file.read()
    entities = json.loads(data)
    file.close()

In [4]:
file_path = '../03_Output/00_GPT KGs/Relations_replaced.csv'
relations = pd.read_csv(file_path, delimiter=';')


1. Merge the entities and relations in json objects

In [5]:
kg_list = []
for entity in entities:
    kg_obj = {'metadata': entity, 'knowledge graph': {}}
    records = relations.loc[relations['Subject'] == entity['Entity']]
    filtered_relations = json.loads(records.to_json(orient ='records', indent = 2))
    objects = set()
    entity_relations = []
    for rel in filtered_relations:
        obj = {}
        objects.add(rel['Object'])
        obj['Relation'] = rel['Relation']
        obj['Object'] = rel['Object']
        obj['Description'] = rel['Description']
        obj['Importance'] = rel['Importance']
        entity_relations.append(obj)
    object_lst = list(objects)
    relation_obj = {entity['Entity']: entity_relations}
    kg_obj['knowledge graph']['entities'] = object_lst
    kg_obj['knowledge graph']['relations'] = relation_obj

    kg_list.append(kg_obj)


2. Update the current kg and add in archive for record

In [41]:
timestamp = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
kg_json = json.dumps(kg_list, indent = 2)

with open('../03_Output/01_Auto KGs/00_Current Versions/knowledge_graph.json', 'w') as file:
    file.write(kg_json)
    file.close()

with open('../03_Output/01_Auto KGs/01_Archived Versions/' + str(timestamp) + '.json', 'w') as file:
    file.write(kg_json)
    file.close()

In [12]:
for item in kg_list:
    name = item['metadata']['Entity']
    json_ = json.dumps(item, indent = 2)
    with open('../03_Output/01_Auto KGs/00_Current Versions/' + name + '.json', 'w') as file:
        file.write(json_)
        file.close()


3. Run the below cells to push JSON files from Output/Current_Version folder to 00_API folder.

In [3]:

def update_files_in_API(src_dir, dest_dir):
    """
    Delete all initial files in the destination directory and then copy
    all files from the source directory to the destination directory.

    Args:
        src_dir (str): The path to the source directory.
        dest_dir (str): The path to the destination directory.

    Raises:
        FileNotFoundError: If the source directory does not exist.
    """
    # Check if the source directory exists
    if not os.path.exists(src_dir):
        raise FileNotFoundError(f"Source directory '{src_dir}' does not exist.")

    # Check if the destination directory exists, if not, create it
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
        print(f"Destination directory '{dest_dir}' created.")
    else:
        # Delete all files in the destination directory
        for file_name in os.listdir(dest_dir):
            file_path = os.path.join(dest_dir, file_name)

            # Delete only if it is a file
            if os.path.isfile(file_path):
                os.remove(file_path)
                print(f"Deleted '{file_path}'.")

    # List all files in the source directory
    files = os.listdir(src_dir)

    # Copy each file to the destination directory, replacing existing files
    for file_name in files:
        src_file = os.path.join(src_dir, file_name)
        dest_file = os.path.join(dest_dir, file_name)

        # Copy only if it is a file
        if os.path.isfile(src_file):
            shutil.copy2(src_file, dest_file)
            print(f"Copied '{src_file}' to '{dest_file}'.")

    print("All files deleted in destination and copied from source successfully.")

# Example usage
source_directory = '../03_Output/01_Auto KGs/00_Current Versions'
destination_directory = '../00_API'

update_files_in_API(source_directory, destination_directory)


Deleted '../03_Output/01_Auto KGs/00_Current Versions/blue economy.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/internet of energy.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/decentralized energy systems.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/Energy For Development.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/free prior and informed consent.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/technology transfer.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/multi-stakeholder partnerships.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/pumped hydro storage.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/digital twins.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/land use planning.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/reliance of fuel imports.json'.
Deleted '../03_Output/01_Auto KGs/00_Current Versions/energy infrastructure services

In [57]:

def flatten_all_relations(json_list, folder_path):
    for data in json_list:
        # Update metadata
        if "Entity" in data["metadata"]:
            entity_title = data["metadata"].pop("Entity")
            data["metadata"]["Entity Title"] = entity_title
            data["metadata"]["Entity Code"] = entity_title.lower()

        relations = data["knowledge graph"].pop("relations")
        sub_relations = data["knowledge graph"].pop("subelement_relations")

        all_relations = []
        entity_title = data["metadata"]["Entity Title"]

        for level, rel_list in relations.items():
            for relation in rel_list:
                if level == "level 1":
                    relation["Subject"] = entity_title
                all_relations.append(relation)

        for relations in sub_relations:
            all_relations.append(relations)

        data["knowledge graph"]["relations"] = all_relations

        # Update entities with metadata from files
        updated_entities = []
        for entity_name in data["knowledge graph"]["entities"]:
            entity_file_path = os.path.join(folder_path, f"{entity_name}.json")
            if os.path.isfile(entity_file_path):
                with open(entity_file_path, 'r') as entity_file:
                    entity_data = json.load(entity_file)
                    if "metadata" in entity_data:
                        metadata = entity_data["metadata"]
                        if "Entity" in metadata:
                            entity_title = metadata.pop("Entity")
                            metadata["Entity Title"] = entity_title
                            metadata["Entity Code"] = entity_title.lower()
                        updated_entities.append(metadata)
                    else:
                        updated_entities.append({"Entity Title": entity_name, "Entity Code": entity_name.lower(), "Error": "Metadata not found"})
            else:
                updated_entities.append({"Entity Title": entity_name, "Entity Code": entity_name.lower(), "Error": "File not found"})

        data["knowledge graph"]["entities"] = updated_entities

    return json_list

In [69]:
def flatten_all_relations(json_list, folder_path):
    for data in json_list:
        # Update metadata
        if "Entity" in data["metadata"]:
            entity_title = data["metadata"].pop("Entity")
            data["metadata"]["Entity Title"] = entity_title
            data["metadata"]["Entity Code"] = entity_title.lower()

        relations = data["knowledge graph"].pop("relations")
        sub_relations = data["knowledge graph"].pop("subelement_relations")

        all_relations = []
        entity_title = data["metadata"]["Entity Title"]

        for level, rel_list in relations.items():
            for relation in rel_list:
                if level == "level 1":
                    relation["Subject"] = entity_title
                all_relations.append(relation)

        for relation in sub_relations:
            all_relations.append(relation)

        data["knowledge graph"]["relations"] = all_relations

        # Update entities with metadata from files
        updated_entities = []
        # Add subelements if present
        #if "subelements" in data:
            #for subelement in data["subelements"]:
                #updated_entities.append(subelement)
            #del data["subelements"]

        # Process entities from knowledge graph
        for entity_name in data["knowledge graph"]["entities"]:
            entity_file_path = os.path.join(folder_path, f"{entity_name}.json")
            if os.path.isfile(entity_file_path):
                with open(entity_file_path, 'r') as entity_file:
                    entity_data = json.load(entity_file)
                    if "metadata" in entity_data:
                        metadata = entity_data["metadata"]
                        if "Entity" in metadata:
                            entity_title = metadata.pop("Entity")
                            metadata["Entity Title"] = entity_title
                            metadata["Entity Code"] = entity_title.lower()
                        updated_entities.append(metadata)
                    else:
                        updated_entities.append({"Entity Title": entity_name, "Entity Code": entity_name.lower(), "Error": "Metadata not found"})
            else:
                updated_entities.append({"Entity Title": entity_name, "Entity Code": entity_name.lower(), "Error": "File not found"})

        # Add objects from all relations
        for relation in all_relations:
            object_name = relation.get("Object")
            if object_name:
                entity_file_path = os.path.join(folder_path, f"{object_name}.json")
                if os.path.isfile(entity_file_path):
                    with open(entity_file_path, 'r') as entity_file:
                        entity_data = json.load(entity_file)
                        if "metadata" in entity_data:
                            metadata = entity_data["metadata"]
                            if "Entity" in metadata:
                                entity_title = metadata.pop("Entity")
                                metadata["Entity Title"] = entity_title
                                metadata["Entity Code"] = entity_title.lower()
                            updated_entities.append(metadata)
                        else:
                            updated_entities.append({"Entity Title": object_name, "Entity Code": object_name.lower(), "Error": "Metadata not found"})
                else:
                    updated_entities.append({"Entity Title": object_name, "Entity Code": object_name.lower(), "Error": "File not found"})

        data["knowledge graph"]["entities"] = updated_entities

    return json_list

In [126]:
import json
import os

def format_entity_title(title):
    articles = {"a", "an", "the", "and", "or", "but", "nor", "for", "so", "yet", "at", "by", "in", "of", "on", "to", "with"}
    words = title.split()
    formatted_words = [words[0].capitalize()]
    for word in words[1:]:
        formatted_words.append(word if word in articles else word.capitalize())
    return ' '.join(formatted_words)

def flatten_all_relations(json_list, folder_path):
    for data in json_list:
        # Update metadata
        if "Entity" in data["metadata"]:
            entity_title = data["metadata"].pop("Entity")
            formatted_entity_title = format_entity_title(entity_title)
            data["metadata"]["Entity Title"] = formatted_entity_title
            data["metadata"]["Entity Code"] = entity_title.lower()

        relations = data["knowledge graph"].pop("relations")
        sub_relations = data["knowledge graph"].pop("subelement_relations")
        relations['sub_relations'] = sub_relations

        all_relations = []
        entity_title = data["metadata"]["Entity Title"]
        entities_set = set(data["knowledge graph"]["entities"])  # To avoid duplicates

        for level, rel_list in relations.items():
            for relation in rel_list:
                if level == "level 1":
                    relation["Subject"] = entity_title
                else:
                    if "Sub-element" in relation:
                        relation["Subject"] = relation.pop("Sub-element").lower()
                    if "Parent" in relation:
                        relation["Object"] = relation.pop("Parent").lower()

                # Convert Subject and Object values to lower case
                if "Subject" in relation:
                    relation["Subject"] = relation["Subject"].lower()
                if "Object" in relation:
                    relation["Object"] = relation["Object"].lower()

                all_relations.append(relation)
                entities_set.add(relation["Object"])


        data["knowledge graph"]["relations"] = all_relations

        # Update entities with metadata from files
        updated_entities = []
        for entity_name in entities_set:
            entity_file_path = os.path.join(folder_path, f"{entity_name}.json")
            if os.path.isfile(entity_file_path):
                with open(entity_file_path, 'r') as entity_file:
                    entity_data = json.load(entity_file)
                    if "metadata" in entity_data:
                        metadata = entity_data["metadata"]
                        if "Entity" in metadata:
                            entity_title = metadata.pop("Entity")
                            metadata["Entity Title"] = format_entity_title(entity_title)
                            metadata["Entity Code"] = entity_title.lower()
                        updated_entities.append(metadata)
                    else:
                        updated_entities.append({"Entity Title": format_entity_title(entity_name), "Entity Code": entity_name.lower(), "Error": "Metadata not found"})
            else:
                updated_entities.append({"Entity Title": format_entity_title(entity_name), "Entity Code": entity_name.lower(), "Category": "Missing"})

        data["knowledge graph"]["entities"] = updated_entities

    return json_list


In [134]:
with open('../03_Output/01_Auto KGs/02_Replaced Relations/Nested Relations.json', "r") as file:
    data = file.read()
    kg = json.loads(data)
    file.close()

In [135]:
folder_path = '../03_Output/01_Auto KGs/00_Current Versions/'
kg_new = flatten_all_relations(kg, folder_path)

In [136]:
kg_new[1]

{'metadata': {'Description': 'Refers to the interconnected network of individuals, organizations, and systems that share and utilize data.',
  'Category': 'Technology',
  'Tags': ['Data Sharing', 'Data Collaboration', 'Data Governance'],
  'Dimension': 'Technology',
  'Acronym': None,
  'Importance': 2.0,
  'Synonyms': None,
  'Entity Title': 'Data Ecosystems',
  'Entity Code': 'data ecosystems'},
 'knowledge graph': {'entities': [{'Entity Title': 'Digital Transformation',
    'Entity Code': 'digital transformation',
    'Category': 'Missing'},
   {'Entity Title': 'Government Agencies',
    'Entity Code': 'government agencies',
    'Category': 'Missing'},
   {'Description': 'Markets where electricity, natural gas, and other energy commodities are bought and sold.',
    'Category': 'Economic',
    'Tags': ['Energy', 'Markets', 'Commodities'],
    'Dimension': 'Economic',
    'Acronym': None,
    'Importance': 4.0,
    'Synonyms': ['energy startups',
     'energy access',
     'energy au

In [137]:
def update_api_folder(kg_new, api_folder_path):
    # Delete all files in the API folder
    for filename in os.listdir(api_folder_path):
        file_path = os.path.join(api_folder_path, filename)
        if os.path.isfile(file_path):
            os.remove(file_path)

    # Add new files to the API folder
    for item in kg_new:
        name = item['metadata']['Entity Code']
        json_ = json.dumps(item, indent=2)
        with open(os.path.join(api_folder_path, name + '.json'), 'w') as file:
            file.write(json_)

In [138]:
api_folder_path = '../00_API/'
update_api_folder(kg_new, api_folder_path)
