In [None]:
import os
import random
from unstructured.partition.pdf import partition_pdf
from unstructured.partition.docx import partition_docx
from unstructured.partition.pptx import partition_pptx
from graphrag_sdk.source import Source
from falkordb import FalkorDB
from graphrag_sdk import KnowledgeGraph, Ontology
from graphrag_sdk.models.openai import OpenAiGenerativeModel
from graphrag_sdk.model_config import (
    KnowledgeGraphModelConfig,
)  # Import the missing config

# Set your OpenAI API Key
os.environ["OPENAI_API_KEY"] = (
    "Your_OpenAi_Keys"
)
# Specify directories containing the files
pdf_directory = "Data/PDFS"
docx_directory = "Data/DOCX"
pptx_directory = "Data/PPTX"


pdf_sources = []

for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        pdf_path = os.path.join(pdf_directory, filename)
        elements = partition_pdf(filename=pdf_path)

        # Save extracted text to .txt file
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        txt_path = os.path.join(pdf_directory, txt_filename)

        with open(txt_path, "w", encoding="utf-8") as txt_file:
            for element in elements:
                if element.text:
                    txt_file.write(element.text + "\n")

        pdf_sources.append(Source(txt_path))

print("PDF text extraction complete.")
docx_sources = []

for filename in os.listdir(docx_directory):
    if filename.endswith(".docx"):
        docx_path = os.path.join(docx_directory, filename)
        elements = partition_docx(filename=docx_path)

        # Save extracted text to .txt file
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        txt_path = os.path.join(docx_directory, txt_filename)

        with open(txt_path, "w", encoding="utf-8") as txt_file:
            for element in elements:
                if element.text:
                    txt_file.write(element.text + "\n")

        docx_sources.append(Source(txt_path))

print("DOCX text extraction complete.")


pptx_sources = []

for filename in os.listdir(pptx_directory):
    if filename.endswith(".pptx"):
        pptx_path = os.path.join(pptx_directory, filename)
        elements = partition_pptx(filename=pptx_path)

        # Save extracted text to .txt file
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        txt_path = os.path.join(pptx_directory, txt_filename)

        with open(txt_path, "w", encoding="utf-8") as txt_file:
            for element in elements:
                if element.text:
                    txt_file.write(element.text + "\n")

        pptx_sources.append(Source(txt_path))

print("PPTX text extraction complete.")


# Define the model to be used for the ontology
model = OpenAiGenerativeModel(model_name="gpt-4o")

# Combine all sources for ontology generation
all_sources = pdf_sources + docx_sources + pptx_sources


ontology = Ontology.from_sources(
    sources=all_sources,
    model=model,
)


# Knowledge Graph
kg = KnowledgeGraph(
    name="K_G_A",
    model_config=KnowledgeGraphModelConfig.with_model(model),
    ontology=ontology,
    host=" your Host ",
    username="username",
    password="password",
    port="port_number",
)

# Process the sources to create the graph
kg.process_sources(all_sources)

# Test the connection
print("Knowledge Graph created:")
print(kg._name)
print(kg.db.list_graphs())



# Conversation.
chat = kg.chat_session()
response = chat.send_message("Enter your question Based on Data.")
print(response)
