## Fetch and Structure the ISO 15288 Process Data from PostgresDB

In [None]:
import psycopg2
import os
# Connect to PostgreSQL
conn = psycopg2.connect(
    host=os.environ.get("POSTGRES_HOST"),
    database=os.environ.get("POSTGRES_DB"),
    user=os.environ.get("POSTGRES_USER"),
    password=os.environ.get("POSTGRES_PASSWORD")
)
cursor = conn.cursor()

# Execute the query
cursor.execute("""
    SELECT 
        p.id AS process_id,
        p.name AS process_name,
        p.description AS process_description,
        a.name AS activity_name,
        t.name AS task_name
    FROM 
        iso_processes p
    LEFT JOIN 
        iso_activities a ON p.id = a.process_id
    LEFT JOIN 
        iso_tasks t ON a.id = t.activity_id
    ORDER BY 
        p.id, a.id, t.id;
""")

# Fetch all rows
rows = cursor.fetchall()

# Close the connection
conn.close()

# Create a dictionary to organize the data
process_data = {}

# Organize the data into a nested dictionary
for process_id, process_name, process_description, activity_name, task_name in rows:
    # If the process is not in the dictionary, add it
    if process_name not in process_data:
        process_data[process_name] = {
            'description': process_description,
            'activities': {}  # Initialize an empty dictionary for activities
        }
    
    # If the activity is not None, add it under the process
    if activity_name:
        if activity_name not in process_data[process_name]['activities']:
            process_data[process_name]['activities'][activity_name] = {
                'Tasks': []  # Initialize an empty list for tasks
            }
        
        # Add tasks under the corresponding activity
        if task_name:
            process_data[process_name]['activities'][activity_name]['Tasks'].append(task_name)

# Example of accessing data:
for process, details in process_data.items():
    print(f"Process: {process}")
    print(f"  Description: {details['description']}")
    for activity, activity_details in details['activities'].items():
        print(f"    Activity: {activity}")
        print(f"      Tasks: {', '.join(activity_details['Tasks'])}")

Process: acquisition process
  Description: Used by organizations for acquiring products or services. The purpose of the acquisition process is to obtain a product or service in accordance with the acquirer'srequirements.
    Activity: Prepare for the acquisition
      Tasks: Define a strategy for how the acquisition will be conducted, Prepare a request for the supply of a product or service that includes the requirements
    Activity: Advertise the acquisition and select the supplier
      Tasks: Communicate the request for the supply of a product or service to potential suppliers., Select one or more suppliers.
    Activity: Establish and maintain an agreement(acquisition process)
      Tasks: Develop and approve an agreement with the supplier that includes acceptance criteria, Identify necessary changes to the agreement., Evaluate impact of changes on the agreement, Update the agreement with the supplier, as necessary.
    Activity: Monitor the agreement
      Tasks: Assess the exec

In [27]:
process_data

{'acquisition process': {'description': "Used by organizations for acquiring products or services. The purpose of the acquisition process is to obtain a product or service in accordance with the acquirer'srequirements.",
  'activities': {'Prepare for the acquisition': {'Tasks': ['Define a strategy for how the acquisition will be conducted',
     'Prepare a request for the supply of a product or service that includes the requirements']},
   'Advertise the acquisition and select the supplier': {'Tasks': ['Communicate the request for the supply of a product or service to potential suppliers.',
     'Select one or more suppliers.']},
   'Establish and maintain an agreement(acquisition process)': {'Tasks': ['Develop and approve an agreement with the supplier that includes acceptance criteria',
     'Identify necessary changes to the agreement.',
     'Evaluate impact of changes on the agreement',
     'Update the agreement with the supplier, as necessary.']},
   'Monitor the agreement': {'T

## Combine everything into a single chunk for each process

In [28]:
# Initialize a list to store chunks
process_chunks = []

# Iterate through the structured process_data dictionary
for process_name, process_info in process_data.items():
    # Start with the process name and description
    chunk = f"Process: {process_name}\n"
    chunk += f"Process Description: {process_info['description']}\n"
    
    # Add each activity and its tasks under the process
    for activity_name, activity_info in process_info['activities'].items():
        chunk += f"  Activity: {activity_name}\n"
        
        # Add the tasks related to this activity
        tasks = activity_info['Tasks']
        for task in tasks:
            chunk += f"    Task: {task}\n"
    
    # Add the chunk to the list
    process_chunks.append(chunk)

# Example of a single chunk
for i, chunk in enumerate(process_chunks):
    print(f"Process Chunk {i+1}:\n")
    print(chunk[:500])  # Print the first 500 characters of each chunk for preview
    print("-----\n")


Process Chunk 1:

Process: acquisition process
Process Description: Used by organizations for acquiring products or services. The purpose of the acquisition process is to obtain a product or service in accordance with the acquirer'srequirements.
  Activity: Prepare for the acquisition
    Task: Define a strategy for how the acquisition will be conducted
    Task: Prepare a request for the supply of a product or service that includes the requirements
  Activity: Advertise the acquisition and select the supplier
  
-----

Process Chunk 2:

Process: supply process
Process Description: Used by organizations for supplying products or services. The purpose of the supply process is to provide an acquirer with a product or service that meets agreedrequirements.
  Activity: Prepare for the supply
    Task: Determine the existence and identity of an acquirer who has a need for a product or service.
    Task: Define a supply strategy.
  Activity: Respond to a request for supply of products or ser

## Embed the Chunks Using OpenAI Embeddings

In [32]:
from langchain_openai import AzureOpenAIEmbeddings

In [None]:
# Set your Azure OpenAI configurations
api_key = os.environ.get("AZURE_API_KEY")
api_base = os.environ.get("AZURE_API_BASE") # Replace with your Azure resource name
api_version = "2024-02-01"  # API version
azure_embedding_deployment_name = "text-embedding-ada-002"  # The deployment name you created
azure_llm_deployment_name = "gpt-4" 

# Initialize the Azure OpenAI Embeddings class with your configurations
azure_embeddings = AzureOpenAIEmbeddings(
    api_key=api_key,
    azure_endpoint=api_base,
    openai_api_version=api_version,
    model=azure_embedding_deployment_name
)

In [37]:
# Embed each chunk using Azure OpenAI Embeddings
process_embeddings = azure_embeddings.embed_documents(process_chunks)

## Store the embeddings in FAISS (Facebook AI Similarity Search)

In [40]:
import faiss
import numpy as np

# Convert embeddings to a numpy array
embedding_matrix = np.array(process_embeddings).astype('float32')

# Define the FAISS index for L2 (Euclidean) distance search
dimension = embedding_matrix.shape[1]
index = faiss.IndexFlatL2(dimension)

# Add the embeddings to the FAISS index
index.add(embedding_matrix)

# Save the FAISS index to disk
faiss.write_index(index, "faiss_index.index")

In [46]:
from langchain.docstore import InMemoryDocstore
from langchain.schema import Document
# Step 5: Create the index_to_docstore_id mapping
# Each document will have an ID based on its position in the process_chunks list
index_to_docstore_id = {i: str(i) for i in range(len(process_chunks))}

# Step 6: Create an InMemoryDocstore to store the original process chunks
# The docstore will store the documents (process_chunks) keyed by their IDs
documents = {str(i): Document(page_content=doc) for i, doc in enumerate(process_chunks)}


In [48]:
docstore = InMemoryDocstore(documents)

In [49]:
from langchain.vectorstores import FAISS
# Initialize LangChain's FAISS vector store with the created FAISS index
# Step 7: Initialize LangChain's FAISS vector store with the FAISS index, docstore, and index_to_docstore_id
vector_store = FAISS(
    azure_embeddings,
    index,
    docstore,
    index_to_docstore_id
)

###  Saving the Document Store and Index-to-Docstore Mapping

In [85]:
# Save the FAISS vector store locally
vector_store.save_local("faiss_index")