In [None]:
import pandas as pd
from langchain_openai import AzureChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
import os
# Load the Excel file
df = pd.read_excel('final_test.xlsx')

# Initialize the LLM
llm = AzureChatOpenAI(
    model="gpt-4o-mini",
    openai_api_key=os.environ.get("AZURE_API_KEY"),
    azure_endpoint=os.environ.get("AZURE_ENDPOINT"),
    api_version="2024-02-15-preview",
    temperature=0.5
)

# Define the structured output format
# Define the structured output format
response_schemas = [
    ResponseSchema(name="responsible", description="Comma-separated process names the user is responsible for"),
    ResponseSchema(name="supporting", description="Comma-separated process names the user is supporting"),
    ResponseSchema(name="designing", description="Comma-separated process names the user is designing")
]

parser = StructuredOutputParser.from_response_schemas(response_schemas)

# Define the LLM prompt
template = ChatPromptTemplate.from_messages([
    ("system", "You are an AI assistant that extracts structured information from text."),
    ("human", "Extract and format the userâ€™s process involvements from the given structured data.\n\n"
              "### INPUT DATA:\n{llm_output}\n\n"
              "### OUTPUT FORMAT:\n"
              "- responsible: Comma-separated list of processes the user is responsible for\n"
              "- supporting: Comma-separated list of processes the user is supporting\n"
              "- designing: Comma-separated list of processes the user is designing\n\n"
              "Return the output as a valid JSON object where keys are exactly: 'responsible', 'supporting', 'designing'.")
])

# Function to process each LLM output
def process_llm_output(llm_output):
    try:
        prompt = template.format(llm_output=llm_output)
        structured_output = llm.invoke(prompt)
        parsed_result = parser.parse(structured_output.content)

        # Convert keys to lowercase to handle case differences
        parsed_result = {k.lower(): v for k, v in parsed_result.items()}

        # Formatting output as required
        formatted_output = (
            f"User is Responsible For ISO Processes: {parsed_result.get('responsible', 'None')}\n"
            f"User is Supporting: {parsed_result.get('supporting', 'None')}\n"
            f"User is Designing: {parsed_result.get('designing', 'None')}"
        )
        return formatted_output

    except Exception as e:
        return f"Error processing: {str(e)}"

# Apply function to the LLM_Output column
df['Parsed_LLM_Output'] = df['LLM_Output'].astype(str).apply(process_llm_output)

# Save the processed DataFrame
df.to_excel('processed_final_test.xlsx', index=False)

In [5]:
df

Unnamed: 0,Gold_Input,LLM_Output,Parsed_LLM_Output
0,User is Responsible For ISO Processes: verific...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
1,User is Responsible For ISO Processes: Human r...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
2,User is Responsible For ISO Processes: Validat...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
3,User is Responsible For ISO Processes: Verific...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
4,User is Responsible For ISO Processes: Informa...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
5,User is Responsible For ISO Processes: Life cy...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
6,User is Responsible For ISO Processes: Integra...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
7,User is Responsible For ISO Processes: Design ...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
8,User is Responsible For ISO Processes: system ...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...
9,User is Responsible For ISO Processes: system ...,{'status': 'User Performs the following ISO Pr...,Error processing: Got invalid return object. E...


In [7]:
# Save the updated DataFrame to a new Excel file
df.to_excel('processed_final_test_bkp.xlsx', index=False)