In [2]:
import sys
import os
import asyncio
import json
import flatdict
import time
from typing import Any, Dict, List, Optional, Union, Tuple
import pandas as pd
from pathlib import Path
from string import Formatter

# LangChain imports
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnablePassthrough

In [3]:
## Updated PromptProcessor (LLama)

"""
A utility for processing natural language prompts in parallel using multiple Ollama instances.

This class handles the end-to-end workflow of:
1. Loading input data from various file formats (CSV, Excel, JSON)
2. Formatting prompts with variables from the input data
3. Distributing prompt processing across multiple Ollama instances
4. Tracking progress with visual indicators
5. Handling errors and retries
6. Processing and flattening JSON responses
7. Saving results to Excel files

The processor supports optional RAG (Retrieval Augmented Generation) functionality
by incorporating relevant context from PDF documents when generating responses.

Example usage:
    processor = PromptProcessor(
        input_file="./data.xlsx",
        output_dir="./results",
        model="llama3.1",
        model_kwargs={"temperature": 0.3}
    )
    
    results = await processor.run_prompt_batch(
        system_message="You are a helpful assistant.",
        user_message_template="Analyze this data: {data}",
        prompt_name="data-analysis",
        items=items,
        ids=ids
    )
"""
    
class PromptProcessor:
    """Process prompts in parallel using multiple Ollama instances."""

    def __init__(self, 
                input_file: Optional[str] = None, 
                output_dir: str = "./output", 
                model: str = "llama3", 
                pdf_directory: Optional[str] = None, 
                use_rag: bool = False,
                input_df: Optional[pd.DataFrame] = None,
                model_kwargs: Optional[Dict[str, Any]] = None):
        """
        Initialize the prompt processor.
        
        Args:
            input_file: Path to the input file (CSV, Excel, etc.)
            output_dir: Directory to save output results
            model: LLM model to use for processing
            pdf_directory: Directory containing PDF files for RAG (optional)
            use_rag: Whether to use RAG functionality
            input_df: Optional dataframe to use directly instead of loading from file
            model_kwargs: Additional kwargs for the LLM model
        
        Raises:
            ValueError: If neither input_file nor input_df is provided
        """
        self.input_file = input_file
        self.output_dir = output_dir
        self.model = model
        self.pdf_directory = pdf_directory
        self.rag_processor = None
        self.input_df = input_df
        self.model_kwargs = model_kwargs or {
            "temperature": 0.3,
            "format": "json",
            "keep_alive": "1h"
        }
        self.last_port = None
        # Initialize RAG if requested
        if use_rag:
            self.rag_processor = RAGProcessor(pdf_directory, model)
            
        # Load dataframe if input_file is provided and input_df is not
        if input_file and input_df is None:
            self.load_input_data()
        elif input_df is not None:
            self.input_df = input_df
        elif not input_file and input_df is None:
            raise ValueError("Either input_file or input_df must be provided")

    def df_to_prompt_items(self, df: pd.DataFrame, columns: List[str] = None) -> List[Dict[str, Any]]:
        """
        Convert dataframe rows to a list of dictionaries for prompt variables.
        
        Args:
            df: Input dataframe
            columns: List of column names to include (None for all columns)
            
        Returns:
            List of dictionaries, each representing variables for a prompt
            
        Raises:
            ValueError: If specified columns don't exist in the dataframe
        """
        if columns is None:
            columns = df.columns.tolist()
        else:
            # Ensure all specified columns exist in the dataframe
            missing = [col for col in columns if col not in df.columns]
            if missing:
                raise ValueError(f"Columns not found in dataframe: {missing}")
        
        # Convert each row to a dictionary with only the specified columns
        return df[columns].to_dict(orient='records')
        
    async def process_json_responses(self, 
                              responses: List[Dict], 
                              ids: List[Any], 
                              prompt_type: str, 
                              json_key: str = None) -> List[Dict]:
        """
        Process responses and flatten extracted JSON structures.
        
        Args:
            responses: List of responses from the LLM
            ids: List of identifiers corresponding to each response
            prompt_type: Type of prompt used (for tracking)
            json_key: Optional key to extract from JSON response
            
        Returns:
            List of processed and flattened dictionaries
        """
        processed = []

        for i, response in enumerate(responses):
            output = {}
            
            # Handle None responses (failed prompts)
            if response is None:
                output = {
                    "item_id": ids[i],
                    "prompt_type": prompt_type,
                    "error": "Prompt failed after retry"
                }
                processed.append(output)
                continue
                
            try:
                # Extract content from response
                if isinstance(response, str):
                    content = response
                elif isinstance(response, dict) and "response" in response:
                    content = response["response"].content
                else:
                    content = str(response)
                    
                try:
                    response_json = json.loads(content)
                    if json_key and json_key in response_json:
                        nested_dicts = response_json[json_key]
                        if isinstance(nested_dicts, list):
                            flat_dicts = [flatdict.FlatDict(d, delimiter=".") for d in nested_dicts]
                            for d in flat_dicts:
                                output.update(d)
                        elif isinstance(nested_dicts, dict):
                            flat_dict = flatdict.FlatDict(nested_dicts, delimiter=".")
                            output.update(flat_dict)
                    else:
                        # If no json_key specified or not found, use the whole response
                        flat_dict = flatdict.FlatDict(response_json, delimiter=".")
                        output.update(flat_dict)
                except (json.JSONDecodeError, TypeError):
                    output["json_parse_error"] = content
            except Exception as e:
                output["processing_error"] = str(e)
                output["raw_response"] = str(response)
                
            # Add metadata
            output.update({
                "item_id": ids[i],
                "prompt_type": prompt_type,
            })
            
            processed.append(output)
        return processed
        
    ## Updated run_prompt_batch method
    async def run_prompt_batch(self, 
                llm,  # Original parameter kept for compatibility
                system_message: str, 
                user_message_template: str, 
                prompt_name: str, 
                items: List[Dict[str, Any]], 
                ids: List[Any] = None, 
                json_key: str = None,
                start_port: int = 11434,
                num_ports: int = 1) -> List[Dict]:
        """
        Execute prompts in parallel using multiple Ollama instances with progress tracking.
        
        Args:
            llm: Ignored (kept for compatibility)
            system_message: System message for the LLM
            user_message_template: Template with {variable} placeholders
            prompt_name: Name of the prompt for tracking
            items: List of dictionaries with template variables
            ids: Optional identifiers for each item
            json_key: Optional key to extract from JSON response
            num_ports: Number of Ollama instances to use
            
        Returns:
            List of processed responses
        """
        # Use sequential IDs if none provided
        if ids is None:
            ids = list(range(len(items)))
        
        # Get retriever for RAG if enabled
        retriever = self.rag_processor.get_retriever() if self.rag_processor else None
        
        # Format all user messages
        formatted_messages = []
        for item in items:
            user_msg = user_message_template
            for key, value in item.items():
                placeholder = f"{{{key}}}"
                if placeholder in user_msg:
                    user_msg = user_msg.replace(placeholder, str(value))
            formatted_messages.append(user_msg)
        
        # Configure multiple Ollama instances
        PORTS=[]
        port_range=list(np.arange(0, num_ports, 1))
        for p in port_range:
            PORTS.append(start_port+p)
        self.last_port = PORTS[-1]
        
        models = [
            ChatOllama(
                model=self.model,
                base_url=f"http://localhost:{port}",
                **self.model_kwargs
            )
            for port in PORTS
        ]
        
        # Create a shared counter for overall progress
        total_messages = len(formatted_messages)
        processed_count = 0
        
        # Create a lock for updating the counter
        counter_lock = asyncio.Lock()
        
        async def process_message(model, message):
            """Process a single message with retry logic"""
            nonlocal processed_count
            
            # Try with one retry on failure
            for attempt in range(2):
                try:
                    # Prepare question with RAG context if needed
                    question = message
                    if retriever:
                        docs = retriever.invoke(message)
                        context = "\n\n".join([doc.page_content for doc in docs])
                        question = f"Context information:\n{context}\n\nUser query:\n{message}"
                    
                    # Use SystemMessage and HumanMessage directly to avoid template issues
                    result = await model.ainvoke([
                        SystemMessage(content=system_message),
                        HumanMessage(content=question)
                    ])
                    
                    # Update the counter
                    async with counter_lock:
                        processed_count += 1
                        
                    return result
                except Exception as e:
                    if attempt == 0:
                        print(f"Error: {str(e)}. Retrying...")
                    else:
                        print(f"Retry failed. Skipping this message.")
                        # Update the counter even for failed messages
                        async with counter_lock:
                            processed_count += 1
                        return {}
        
        async def process_distributed(messages, models):
            """Distribute messages across available models with progress tracking"""
            # Calculate chunk size for each model
            num_models = len(models)
            try:
                # Original calculation that might cause ZeroDivisionError
                chunk_size = (len(messages) + num_models - 1) // num_models
            except ZeroDivisionError:
                # If num_models is 0 or division error occurs, set chunk_size to handle all messages
                raise
            
            # Create chunks of messages
            chunks = [
                messages[i:i + chunk_size] 
                for i in range(0, len(messages), chunk_size)
            ]
            
            # Create the main progress bar
            main_progress = tqdm_asyncio(
                total=total_messages,
                desc="Overall Progress",
                position=0,
                leave=True
            )
            
            # Process each chunk with a dedicated model
            async def process_chunk(model, chunk):
                results = []
                for msg in chunk:
                    result = await process_message(model, msg)
                    results.append(result)
                    # Update the main progress bar
                    main_progress.update(1)
                return results
            
            # Run all chunks in parallel
            print(f"Processing {len(messages)} messages using {len(models)} Ollama instances...")
            results_nested = await asyncio.gather(*[
                process_chunk(models[i], chunks[i]) 
                for i in range(min(len(chunks), len(models)))
            ])
            
            # Close the progress bar
            main_progress.close()
            
            # Flatten results
            return [item for sublist in results_nested for item in sublist]
        
        # Process all messages in parallel with progress tracking
        start_time = time.time()
        responses = await process_distributed(formatted_messages, models)
        elapsed = time.time() - start_time
        print(f"Processed {len(responses)} messages in {elapsed:.2f}s")
        
        # Prepare results with IDs
        result_items = []
        for item_id, response in zip(ids, responses):
            if response is not None:
                result_items.append({
                    "id": item_id,
                    "response": response,
                    "prompt_name": prompt_name
                })
            else:
                print(f"Warning: Item with ID {item_id} failed to process")
        
        # Process JSON responses
        return await self.process_json_responses(result_items, ids, prompt_name, json_key)


## main function

async def main(
    system_message: str = "Act as a helpful assistant",
    system_message_file: str = None,
    user_template: str = None,
    user_template_file: str = None,
    input_file: str = None,
    input_df: pd.DataFrame = None,
    prompt_name: str = "default-prompt",
    num_ports: int = 1,
    start_port: int = 11434,
    prompt_vars: List[str] = None,
    model_kwargs: Dict[str, Any] = None,
    json_key: str = None,
    output_dir: str = "./output",
    output_filename: str = "results.xlsx",
    model: str = "llama3.1"
    ):
    """
    Process prompts with the specified parameters.
    
    Args:
        system_message: System message for the LLM
        system_message_file: Path to file containing system message (overrides system_message if provided)
        user_template: User message template
        user_template_file: Path to file containing user template (overrides user_template if provided)
        input_file: Path to input file (CSV, Excel, etc.)
        input_df: DataFrame to use directly instead of loading from file
        prompt_name: Name of the prompt for tracking
        num_ports: Number of Ollama instances to use
        prompt_vars: List of column names to include as variables
        model_kwargs: Additional kwargs for the LLM model
        json_key: Optional key to extract from JSON response
        output_dir: Directory to save output results
        output_filename: Name of the output file
        model: LLM model to use for processing
    
    Returns:
        List of processed responses
    """
    # Load system message from file if provided
    if system_message_file:
        try:
            with open(system_message_file, 'r') as f:
                system_message = f.read().strip()
        except Exception as e:
            print(f"Error loading system message from file: {str(e)}")
            print(f"Using default system message instead.")
    
    # Load user template from file if provided
    if user_template_file:
        try:
            with open(user_template_file, 'r') as f:
                user_template = f.read().strip()
        except Exception as e:
            print(f"Error loading user template from file: {str(e)}")
            if not user_template:
                raise ValueError("No user template provided and failed to load from file.")
    elif not user_template:
        raise ValueError("Either user_template or user_template_file must be provided.")
    
    # Set default model kwargs if not provided
    if model_kwargs is None:
        model_kwargs = {
            "temperature": 0.3,
            "format": "json",
            "keep_alive": "1h"
        }
    
    # Initialize the processor
    processor = PromptProcessor(
        input_file=input_file,
        output_dir=output_dir,
        model=model,
        input_df=input_df,
        model_kwargs=model_kwargs
    )

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Prepare the items and IDs
    if prompt_vars is None:
        prompt_vars = processor.input_df.columns.tolist()
    
    print(processor.input_df)
    items = processor.df_to_prompt_items(processor.input_df, prompt_vars)
    
    # Use documentKey as ID if available, otherwise use index
    if "documentKey" in processor.input_df.columns:
        ids = processor.input_df["documentKey"].tolist()
    else:
        ids = list(range(len(processor.input_df)))

    # Create a placeholder LLM (will be replaced in run_prompt_batch)
    placeholder_llm = None

    # Process the batch directly
    print(f"Starting analysis with prompt: {prompt_name}...")
    start_time = time.time()

    # Call run_prompt_batch directly
    results = await processor.run_prompt_batch(
        llm=placeholder_llm,
        system_message=system_message,
        user_message_template=user_template,
        prompt_name=prompt_name,
        items=items,
        ids=ids,
        json_key=json_key,
        num_ports=num_ports,
        start_port=start_port
    )

    elapsed = time.time() - start_time
    print(f"Processed {len(results)} items in {elapsed:.2f}s")

    # Save the results
    if results:
        results_df = pd.DataFrame(results)
        output_path = os.path.join(output_dir, output_filename)
        results_df.to_excel(output_path, index=False)
        print(f"Results saved to: {output_path}")
        
        # Print a sample of the results
        print("\nSample results:")
        sample_cols = min(5, len(results_df.columns))
        for col in results_df.columns[:sample_cols]:
            print(f"{col}: {results_df[col].iloc[0]}")
    else:
        print("No results were generated")

    return results, processor

In [6]:
def extract_message(input_source, find_message_type):
    
    if find_message_type not in ["system", "user"]:
        raise ValueError("find_message_type must be either 'system' or 'user'") 
    
    text = ""
    try:
        # Try to open as a file
        with open(input_source, 'r', encoding='utf-8') as file:
            text = file.read()
    except (IOError, TypeError):
        # If it's not a valid file path, assume it's already a string
        text = input_source
        
    if find_message_type == "system":
        # Extract text between "SYSTEM MESSAGE:" and "USER MESSAGE:"
        system_start = text.find("SYSTEM MESSAGE:")
        user_start = text.find("USER MESSAGE:")
        if system_start == -1 or user_start == -1:
            return ""
        # Add length of "SYSTEM MESSAGE:" to get the actual start of content
        system_start += len("SYSTEM MESSAGE:")
        return text[system_start:user_start].strip()
    else:  # find_message_type == "user"
        # Extract text from "USER MESSAGE:" to the end
        user_start = text.find("USER MESSAGE:")
        if user_start == -1:
            return ""
        # Add length of "USER MESSAGE:" to get the actual start of content
        user_start += len("USER MESSAGE:")
        return text[user_start:].strip()

In [21]:
from pathlib import Path
import nest_asyncio
import numpy as np
from tqdm.asyncio import tqdm_asyncio
nest_asyncio.apply()

# Example 3: Loading templates from files and using a DataFrame directly
df = pd.read_excel("./ready_for_testcase_summarization_prompt.xlsx")

prompt_template_fp = Path("../src/data/test_case_structure_prompt.txt")

num_ports = 5
prompt_name = prompt_template_fp.stem 
system_message = extract_message(prompt_template_fp, "system")
user_template = extract_message(prompt_template_fp, "user")
prompt_vars = ["testcases"]
output_filename = f"{prompt_name}_results.xlsx"
model_kwargs = {"temperature": 0.1, "format": "json", "keep_alive": "30m"}  

In [22]:
system_message

'You are a Senior Software QA Architect. Your task is to evaluate the structure of a test case against the provided Acceptance Criteria. You will determine whether the acceptance criteria has been appropriately satisfied:\n\nAcceptance Criteria:\n    - Test case steps follow a logical, sequential path that ensures reproducibility.\n    - Actions are documented where data collection is required, and data collection is accurate and complete.\n    - Steps are clear, concise, and free of jargon; absolutes are avoided.\n    - Steps are numbered or ordered for easy execution.\n    - Outcome aligns with the expected results.\n\nYou will provide: assessment_verdict, assessment_rationale, identified_gaps, actionable_recommendations, and test_case_improvements. Propose improvements detecting misalignment between test case objective and actual steps.\n\nResponse Format (produce exactly this JSON structure):\n{\n    "assessment_verdict": "complete|partial|inadequate",\n    "assessment_rationale": 

In [23]:
user_template

'Task: Evaluate the completeness of a test case against the acceptance criteria for Test Case Structure.\n\n## Input Variables:\n- Test Case:\n{testcases}\n\nAcceptance Criteria:\n- Test case steps follow a logical, sequential path that ensures reproducibility.\n- Actions are documented where data collection is required, and data collection is accurate and complete.\n- Steps are clear, concise, and free of jargon; absolutes are avoided.\n- Steps are numbered or ordered for easy execution.\n- Outcome aligns with the expected path defined in the requirement.\n\nProduce output strictly in the Response Format JSON. Do not use Markdown.\n\nNow perform the review on the provided Input Variables and return only the Response Format JSON.'

In [24]:
results, processor = asyncio.run(main(
    system_message=system_message,
    user_template=user_template,
    input_df=df,
    prompt_name=prompt_name,
    num_ports=num_ports,
    prompt_vars=prompt_vars,
    model_kwargs=model_kwargs,
    output_filename=output_filename
))

     Unnamed: 0     documentKey  \
0             5    P1320-TEST-2   
1             6  P1320-TEST-202   
2             7  P1320-TEST-203   
3             8  P1320-TEST-204   
4             9  P1320-TEST-206   
..          ...             ...   
193         198   P1320-TEST-67   
194         199   P1320-TEST-69   
195         200   P1320-TEST-70   
196         201   P1320-TEST-73   
197         202   P1320-TEST-95   

                                           description  \
0     this test case verifies within the local moni...   
1     obsoleting this since it is already covered i...   
2     this test case verifies within the local moni...   
3     this test case verifies within the local moni...   
4     this test case verifies within the local moni...   
..                                                 ...   
193   the anchor shall have labeling which displays...   
194   this test case verifies that on the exterior ...   
195   the staff and asset shall have labeling which...   

Overall Progress:   0%|          | 0/198 [00:00<?, ?it/s]

Processing 198 messages using 5 Ollama instances...


Overall Progress: 100%|██████████| 198/198 [11:40<00:00,  3.54s/it]

Processed 198 messages in 700.83s
Processed 198 items in 700.84s
Results saved to: ./output/test_case_structure_prompt_results.xlsx

Sample results:
assessment_verdict: partial
assessment_rationale: The test case steps follow a logical path, but some actions are not clearly documented where data collection is required. Additionally, some expected results contain absolutes (e.g., 'the create group control shall change to enabled state'). The outcome generally aligns with the expected path defined in the requirement.
identified_gaps: ['Actions for data collection are not clearly documented', 'Expected results contain absolutes']
actionable_recommendations: ['Clearly document actions where data collection is required', 'Rephrase expected results to avoid absolutes']
test_case_improvements: ['Consider breaking down long steps into smaller, more manageable tasks', 'Use a consistent format for documenting expected results']





In [41]:
processor.last_port

11435

In [1]:
from src import utils

all_results_df = utils.concat_matching_dataframes(
    _path="./output",                     # base directory to scan
    _regex=rf"testcase_.*.xlsx$",             # regex applied to filenames
    recursive=False,
    case_sensitive=True,
    match_on="name",
    read_kwargs=None,
    ignore_index=True,
    check_list_like_columns=False,
    axis=0
    )

[PosixPath('output/testcase_objective_review.xlsx'), PosixPath('output/testcase_summaries.xlsx'), PosixPath('output/testcase_structure_prompt_results.xlsx')]


In [2]:
all_results_df.head(5)

Unnamed: 0,assessment_verdict,assessment_rationale,identified_gaps,recommendations,test_case_improvements,item_id,prompt_type,actionable_recommendations,completeness.test_case_steps,completeness.data_collection,...,output,review.status,review.comments,processing_error,raw_response,testcases_summary,status,message,status_code,results
0,complete,The test case objective is clear and specific....,[],['Consider adding more detailed assertions for...,['Update steps as follows: 1. Create group; ca...,P1320-TEST-2,testsuite-review,,,,...,,,,,,,,,,
1,complete,The test case objective is clear and specific ...,[],[],[],P1320-TEST-202,testsuite-review,,,,...,,,,,,,,,,
2,complete,The test case meets its intended objective and...,[],[],[],P1320-TEST-203,testsuite-review,,,,...,,,,,,,,,,
3,complete,"The test case objective is clear and specific,...",[],[],[],P1320-TEST-204,testsuite-review,,,,...,,,,,,,,,,
4,complete,The test case objective is clear and specific....,[],[],[],P1320-TEST-206,testsuite-review,,,,...,,,,,,,,,,


In [3]:
all_results_df.to_excel('all_results.xlsx')