In [2]:
import os
import openai
from string import Template
import json
from timeit import default_timer as timer
from dotenv import load_dotenv
from time import sleep


### Load Enviroment files

In [3]:
# Load environment variables
load_dotenv()

True

In [4]:
# OpenAI API configuration
openai.api_type = "azure"
openai.api_key = os.getenv("api_key_azure")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = os.getenv("api_version")
openai_deployment = "sdgi-gpt-35-turbo-16k"

# openai.api_key = os.getenv("OPENAI_KEY")


# print(openai.api_key)
# print(openai.api_base)
# print(openai.api_version)


In [8]:
#Helper functions

def fix_json_structure(data):
    if "entities" in data and "relations" in data:
        entities = data.pop("entities")
        relations = data.pop("relations")
        kg = data.get("knowledge graph", {})
        
        kg_entities = kg.get("entities", [])
        kg_relations = kg.get("relations", {})
        
        for entity in entities:
            if entity not in kg_entities:
                kg_entities.append(entity)
        
        for key, value in relations.items():
            if key in kg_relations:
                for relation in value:
                    if relation not in kg_relations[key]:
                        kg_relations[key].append(relation)
            else:
                kg_relations[key] = value
        
        kg["entities"] = kg_entities
        kg["relations"] = kg_relations
        data["knowledge graph"] = kg
    
    return data

In [27]:

def generate_KGOld(initial_entity, iterations, n):
    # Initialize knowledge graph with the given initial entity
    knowledge_graph  = {
    "metadata": {
        "Data": "Knowledge Graph",
    },
    "knowledge graph": {
        "entities": [
            {
                "entity": ""+initial_entity+"",
                "category": ""
            },  
            {
                "entity": "UNDP",
                "category": "Organization"
            },
            {
                "entity": "UNESCO",
                "category": "Organization"
            }
        ],
        "relations": {}
    }
}

    # Initialize a set to keep track of processed entities
    processed_entities = set()
    for i in range(iterations):
        current_entities = knowledge_graph["knowledge graph"]["entities"]
        for entity in current_entities:
            if entity["entity"] not in processed_entities:
                # generate new subgraph based on this entity
                new_subgraph=generate_subgraph(entity,10) 
                processed_entities.add(entity["entity"])
                #  add new entities and new relations to KG
                # print(type(new_subgraph))
                knowledge_graph=updateKG(knowledge_graph, new_subgraph)
                
    return knowledge_graph

def generate_KG(initial_entity, iterations, n):
    knowledge_graph = {
        "metadata": {
            "Data": "Knowledge Graph",
        },
        "knowledge graph": {
            "entities": [
                {
                    "entity": initial_entity,
                    "category": ""
                },
                {
                    "entity": "UNDP",
                    "category": "Organization"
                },
                {
                    "entity": "UNESCO",
                    "category": "Organization"
                }
            ],
            "relations": {}
        }
    }

    processed_entities = set()
    for i in range(iterations):
        current_entities = knowledge_graph["knowledge graph"]["entities"]
        for entity in current_entities:
            if entity["entity"] not in processed_entities:
                new_subgraph = generate_subgraph(entity, 10)  # Assuming generate_subgraph is defined
                processed_entities.add(entity["entity"])
                knowledge_graph = updateKG(knowledge_graph, new_subgraph)
                
                # Add inner entities based on n
                if n > 0:
                    subgraph_entities = json.loads(new_subgraph)
                    inner_entities= subgraph_entities["entities"]
                    print(new_subgraph)
                    for inner_entity in inner_entities:
                        new_inner_subgraph = generate_subgraph(inner_entity, 10)  # Assuming generate_subgraph is defined
                        knowledge_graph = updateKG(knowledge_graph, new_inner_subgraph)
        
    return knowledge_graph

def generate_subgraph(entity, count):
    # Use the openai api to generate a list (of length 'count') of the most related entities
    
    prompt = f"""
    The entity is {entity['entity']} 
    Give me {count} entities and their relationship to {entity['entity']}
    Output should be in example json format: 
        {{
          "entities": [
            {{
                "entity": "ENTITY2",
                "category": "Catetory"
            }}
            ],
        "relations": {{
                    "{entity['entity']}": [
                        {{
                            "Relation": “RELATIONSHIP”,
                            "Object": “ENTITY2”,
                            "Description": “CONTENT DESCRIPTION”
                        }}
                    ],
        }}
        }}
    Any ENTITY2 used in the relations must be part of the entities array section
    """
    response_entities = openai.chat.completions.create(
                    model=openai_deployment,
                    temperature=0,
                    messages=[
                        {"role": "user", "content": prompt},
                    ]
                )
    new_subgraph = response_entities.choices[0].message.content

    return new_subgraph


# Function to Update the Knowledge Graph
def updateKG(knowledge_graph, new_subgraph_str):
    if isinstance(new_subgraph_str, dict):
        new_subgraph = new_subgraph_str
    else:
        new_subgraph = json.loads(new_subgraph_str)
        
    # Merge entities
    knowledge_graph_entities = knowledge_graph.get("entities", [])
    new_entities = new_subgraph.get("entities", [])
    
    for new_entity in new_entities:
        if new_entity not in knowledge_graph_entities:
            knowledge_graph_entities.append(new_entity)
    
    # Merge relations
    knowledge_graph_relations = knowledge_graph.get("relations", {})
    new_relations = new_subgraph.get("relations", {})
    
    for key, value in new_relations.items():
        if key in knowledge_graph_relations:
            for relation in value:
                if relation not in knowledge_graph_relations[key]:
                    knowledge_graph_relations[key].append(relation)
        else:
            knowledge_graph_relations[key] = value

    # Update the knowledge graph
    knowledge_graph["entities"] = knowledge_graph_entities
    knowledge_graph["relations"] = knowledge_graph_relations

    return knowledge_graph



In [None]:
# Example usage
initial_entity = "sustainable energy"
knowledge_graph = generate_KG(initial_entity,iterations=4, n=2)
fixed_knowledge_graph_output = fix_json_structure(knowledge_graph)

# Write the knowledge graph to a JSON file
output_file = '../03_Output/01_Auto KGs/00_Current Versions/knowledge_graph_nth.json'
with open(output_file, 'w') as json_file:
    json.dump(knowledge_graph, json_file, indent=4)

print(f"Knowledge graph saved to {output_file}")
