In [None]:
#! usr/bin/bash
! curl -fsSL https://ollama.com/install.sh | sh
! sudo apt update
! sudo apt install pciutils lshw
! ollama serve 

In [None]:
#! usr/bin/bash
! ollama pull mistral
! ollama pull llama3.1
! ollama pull tinyllama

In [None]:
! pip install -e ..

In [49]:
import os
import asyncio
import json
import flatdict
from typing import Any, Dict, List, Optional
from dotenv import dotenv_values

import pandas as pd
import flatdict
from pydantic import BaseModel, Field, SecretStr
from tqdm.asyncio import tqdm_asyncio  # Import tqdm_asyncio for async progress bars

from src import utils
from aiswre.components import prompteval as pe
from ollama import AsyncClient  # Import AsyncClient from ollama

async def process_json_responses(
    responses, ids, prompt_type, json_key: str = "requirements_review"
    ) -> List[Dict[str, Any]]:
    """Process responses and flatten extracted JSON structures."""
    processed = []

    for i, response in enumerate(responses):
        output = {}
        
        # Extract content from ollama response
        if "message" in response and "content" in response["message"]:
            content = response["message"]["content"]
            try:
                response_json = json.loads(content)
                if json_key in response_json:
                    nested_dicts = response_json[json_key]
                    flat_dicts = [flatdict.FlatDict(d, delimiter=".") for d in nested_dicts]
                    for d in flat_dicts:
                        output.update(d)
            except (json.JSONDecodeError, TypeError):
                output["json_parse_error"] = content
        
        # Include usage info if available
        if "eval_count" in response:
            output["eval_count"] = response["eval_count"]
        if "prompt_eval_count" in response:
            output["prompt_eval_count"] = response["prompt_eval_count"]
        if "total_duration" in response:
            output["total_duration"] = response["total_duration"]
            
        output.update(
            {
                "requirement_id": ids[i],
                "prompt_type": prompt_type,
            }
        )
        processed.append(output)
    return processed

async def run_requirement_review(
    ollama_client,
    system_message: str,
    user_message: str,
    prompt_name: str,
    requirements: List[str],
    ids: Optional[List[int]] = None,
    model: str = "llama3",  # Default to llama3 model for ollama
    json_key: str = "requirements_review",
    ) -> List[Dict[str, Any]]:
    """Execute concurrent review prompts and process JSON responses."""
    if ids is None:
        ids = list(range(len(requirements)))
    
    # Build tasks list
    tasks = []
    for req, req_id in zip(requirements, ids):
        task = ollama_client.chat(
            model=model,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user",
                    "content": user_message
                    .replace("{requirements}", f"{req_id}: {req}")
                    .replace("{enable_split}", "True"),
                },
            ],
            format="json",  # Request JSON format response
        )
        tasks.append(task)
    
    # Run all requests concurrently with progress bar
    responses = await tqdm_asyncio.gather(*tasks, desc="Processing requirements")

    # Process structured JSON responses
    return await process_json_responses(responses, ids, prompt_name, json_key)

In [50]:
import nest_asyncio
nest_asyncio.apply()

# Instantiate the ollama client and define model
DOT_ENV = dotenv_values("../.env")
OLLAMA_HOST = DOT_ENV.get('OLLAMA_HOST', 'http://localhost:11434')  # Default to localhost if not specified
ollama_client = AsyncClient(host=OLLAMA_HOST)
MODEL = 'llama3.1'  # Use llama3 or another model available in your Ollama instance

eval_funcs = [
    'eval_avoids_vague_terms',
    'eval_definite_articles_usage',
    'eval_has_appropriate_subject_verb',
    'eval_has_common_units_of_measure',
    'eval_has_escape_clauses',
    'eval_has_no_open_ended_clauses',
    'eval_is_active_voice',
]
eval_weights = [
    0.35,
    0.05,
    0.15,
    0.05,
    0.10,
    0.10,
    0.20
]
# Make eval config
eval_config = pe.make_eval_config(pe, include_funcs=eval_funcs)

# Define prompt messages
SYSTEM_PROMPT = """
You are a Senior Requirements Quality Analyst and technical editor. 
You specialize in detecting and fixing requirement defects using authoritative quality rules. 
Be rigorous, consistent, and concise. Maintain the author's technical intent while removing ambiguity. 
Do not add new functionality. Ask targeted clarification questions when needed.

Response Format (produce exactly this JSON structure):
{
  "requirements_review": [
    {
      "requirement_id": "<ID>",
      "original": "<original requirement>",
      "checks": {
        "R2": {"status": "pass|fail", "active_voice": ["<issues>"], "explanation": "<brief>"},
        "R3": {"status": "pass|fail", "appropriate_subj_verb": ["<issues>"], "explanation": "<brief>"},
        "R5": {"status": "pass|fail", "definite_articles": ["<issues>"], "explanation": "<brief>"},
        "R6": {"status": "pass|fail", "units": ["<issues>"], "explanation": "<brief>"},
        "R7": {"status": "pass|fail", "vague terms": ["<issues>"], "explanation": "<brief>"},
        "R8": {"status": "pass|fail", "escape_clauses": ["<issues>"], "explanation": "<brief>"},
        "R9": {"status": "pass|fail", "open_ended_clauses": ["<issues>"], "explanation": "<brief>"}
      },
      "proposed_rewrite": "<single improved requirement that resolves all detected issues>",
      "split_recommendation": {
        "needed": true|false,
        "because": "<why>",
        "split_into": ["<Req A>", "<Req B>"]
      },
    }
  ]
}

Evaluation method:
1) Parse inputs and normalize IDs. 
2) For each requirement, test 2, R3, R5, R6, R7, R8, R9. 
3) Explain each failure succinctly. 
4) Rewrite to a single, verifiable sentence unless a split is recommended. 
5) Apply glossary rules for abbreviations; on first use of allowed abbreviations, prefer the expanded form with abbreviation in parentheses. 
6) If required numbers are missing and no defaults are provided, use TBD placeholders and ask explicit questions to resolve them. 
7) Summarize compliance.

Important: If {requirements} is empty, respond with a single clarifying question requesting requirements to review and stop.
"""

USER_PROMPT = """
Task: Review and improve the following requirement statements using the provided variables.

Variables:
- Requirements (list or newline-separated; may include IDs):
  {requirements}
- Enable split recommendations (true|false; default true): {enable_split}

Produce output strictly in the Response Format JSON. Do not use Markdown.

Now perform the review on the provided inputs and return only the Response Format JSON.
"""

PROMPT_NAME = 'basic-incose'

# Define the requirements to be revised
requirements = [
    "If projected the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30",
    "If projected the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30",
    "If projected the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30",
]
df = pd.DataFrame({'requirements': requirements})

In [51]:
# Run revisions and cast to dataframe
revisions = asyncio.run(run_requirement_review(
    ollama_client=ollama_client,  # Use ollama_client instead of rl_openai_client
    system_message=SYSTEM_PROMPT,
    user_message=USER_PROMPT,
    prompt_name=PROMPT_NAME,
    requirements=requirements,
    ids=None,
    model=MODEL,  # Use the MODEL variable defined above
    json_key="requirements_review"
    )
)
final_df = pd.DataFrame(revisions)

Processing requirements: 100%|██████████| 3/3 [00:34<00:00, 11.57s/it]


In [41]:
final_df

Unnamed: 0,requirement_id,original,checks.R2.status,checks.R2.active_voice,checks.R2.explanation,checks.R3.status,checks.R3.appropriate_subj_verb,checks.R3.explanation,checks.R5.status,checks.R6.status,...,prompt_type,checks.R5.definite_articles,checks.R5.explanation,checks.R6.explanation,checks.R7.vague terms,checks.R7.explanation,checks.R8.escape_clauses,checks.R8.explanation,checks.R9.open_ended_clauses,checks.R9.explanation
0,0,If projected the data must be readable. On a 1...,fail,"[The sentence starts in passive voice, recomme...",Active voice is recommended for clarity and co...,fail,[The verb 'must' should be used with a singula...,Maintaining subject-verb agreement enhances gr...,pass,fail,...,basic-incose,,,,,,,,,
1,1,If projected the data must be readable. On a 1...,fail,[The requirement should be stated in active vo...,Rewrite using an active verb,pass,[],,fail,pass,...,basic-incose,[The requirement should use 'a' instead of 'th...,Rewrite to avoid unnecessary definite articles,,[The term 'readable' is vague. Consider using ...,Clarify what 'readable' means in this context,[],,[The requirement contains an open-ended clause...,Specify the minimum percentage of viewers that...
2,2,If projected the data must be readable. On a ...,fail,"[Rewrite in active voice, e.g., 'The system sh...",Rewritten in active voice for clarity and conc...,pass,[],,fail,fail,...,basic-incose,"[Use 'a' instead of 'an'], ]",Replace 'On a 10x10 projection screen' with 'O...,Use 'a viewing distance of 30 meters' instead ...,[],,"[Remove the phrase 'must be able to read'], ]",Replace with 'shall display' or similar verb f...,[],


In [42]:
# Get pre-revision Accuracy Score
final_df = pe.call_evals(final_df, col='original', eval_config=eval_config)
final_df = pe.get_failed_evals(final_df)
pe.add_weighted_column(final_df, eval_funcs, eval_weights, "initial_weighted_value")
# Get post-revision Accuracy Score
final_df = pe.call_evals(final_df, col='proposed_rewrite', eval_config=eval_config)
final_df = pe.get_failed_evals(final_df)
pe.add_weighted_column(final_df, eval_funcs, eval_weights, "weighted_value")

# View original and rewritten requirement statements
print(final_df[['original', 'proposed_rewrite', 'initial_weighted_value', 'weighted_value']])

                                            original  \
0  If projected the data must be readable. On a 1...   
1  If projected the data must be readable. On a 1...   
2  If projected the data must be readable.  On a ...   

                                    proposed_rewrite  initial_weighted_value  \
0  The data must be readable on a 10x10 projectio...                    0.65   
1  A 10x10 projection screen must ensure that at ...                    0.65   
2  The system shall ensure that on a 10x-projecti...                    0.65   

   weighted_value  
0            0.65  
1            0.85  
2            1.00  


[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


# PyQT app

In [53]:
! pip install PyQt5 pandas ollama flatdict

[0mCollecting PyQt5
  Downloading PyQt5-5.15.11-cp38-abi3-manylinux_2_17_x86_64.whl (8.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting PyQt5-Qt5<5.16.0,>=5.15.2
  Downloading PyQt5_Qt5-5.15.17-py3-none-manylinux2014_x86_64.whl (61.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting PyQt5-sip<13,>=12.15
  Downloading pyqt5_sip-12.17.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.whl (271 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m271.4/271.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[0mInstalling collected packages: PyQt5-Qt5, PyQt5-sip, PyQt5
[0mSuccessfully installed PyQt5-5.15.11 PyQt5-Qt5-5.15.17 PyQt5-sip-12.17.1
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m

In [54]:
import sys
import os
import asyncio
import json
import flatdict
from typing import Any, Dict, List, Optional
import pandas as pd
from pathlib import Path

from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, 
                            QPushButton, QProgressBar, QLabel, QFileDialog, 
                            QLineEdit, QHBoxLayout, QMessageBox)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMimeData
from PyQt5.QtGui import QDragEnterEvent, QDropEvent

from ollama import AsyncClient

class RequirementsProcessorThread(QThread):
    progress_updated = pyqtSignal(int, int)
    processing_complete = pyqtSignal(pd.DataFrame)
    error_occurred = pyqtSignal(str)
    
    def __init__(self, excel_file, output_dir, model="llama3"):
        super().__init__()
        self.excel_file = excel_file
        self.output_dir = output_dir
        self.model = model
        
    async def process_json_responses(self, responses, ids, prompt_type, json_key="requirements_review"):
        """Process responses and flatten extracted JSON structures."""
        processed = []

        for i, response in enumerate(responses):
            output = {}
            
            # Extract content from ollama response
            if "message" in response and "content" in response["message"]:
                content = response["message"]["content"]
                try:
                    response_json = json.loads(content)
                    if json_key in response_json:
                        nested_dicts = response_json[json_key]
                        flat_dicts = [flatdict.FlatDict(d, delimiter=".") for d in nested_dicts]
                        for d in flat_dicts:
                            output.update(d)
                except (json.JSONDecodeError, TypeError):
                    output["json_parse_error"] = content
            
            # Include usage info if available
            if "eval_count" in response:
                output["eval_count"] = response["eval_count"]
            if "prompt_eval_count" in response:
                output["prompt_eval_count"] = response["prompt_eval_count"]
            if "total_duration" in response:
                output["total_duration"] = response["total_duration"]
                
            output.update(
                {
                    "requirement_id": ids[i],
                    "prompt_type": prompt_type,
                }
            )
            processed.append(output)
        return processed

    async def run_requirement_review(self, ollama_client, system_message, user_message, 
                                    prompt_name, requirements, ids=None, json_key="requirements_review"):
        """Execute concurrent review prompts and process JSON responses."""
        if ids is None:
            ids = list(range(len(requirements)))
        
        # Build tasks list
        tasks = []
        for req, req_id in zip(requirements, ids):
            task = ollama_client.chat(
                model=self.model,
                messages=[
                    {"role": "system", "content": system_message},
                    {"role": "user",
                        "content": user_message
                        .replace("{requirements}", f"{req_id}: {req}")
                        .replace("{enable_split}", "True"),
                    },
                ],
                format="json",  # Request JSON format response
            )
            tasks.append(task)
        
        # Process requirements one by one with progress updates
        responses = []
        total = len(tasks)
        for i, task in enumerate(tasks):
            response = await task
            responses.append(response)
            self.progress_updated.emit(i + 1, total)
        
        # Process structured JSON responses
        return await self.process_json_responses(responses, ids, prompt_name, json_key)

    async def process_requirements(self):
        try:
            # Load Excel file
            df = pd.read_excel(self.excel_file)
            
            # Extract requirements and IDs
            requirements = df['requirement_text'].tolist()
            ids = df['requirement_id'].tolist() if 'requirement_id' in df.columns else None
            
            # Initialize Ollama client
            client = AsyncClient()
            
            # Define system and user messages (these would typically come from a config)
            system_message = """You are a requirements analysis expert. Analyze the given requirement 
                              and provide structured feedback in JSON format."""
            
            user_message = """Please analyze the following requirement and provide a detailed review:
                           {requirements}
                           
                           Return your analysis in JSON format with the key 'requirements_review' 
                           containing an array of objects with your findings."""
            
            # Process requirements
            results = await self.run_requirement_review(
                client, 
                system_message, 
                user_message, 
                "requirement_review", 
                requirements, 
                ids
            )
            
            # Convert results to DataFrame
            results_df = pd.DataFrame(results)
            
            return results_df
            
        except Exception as e:
            raise Exception(f"Error processing requirements: {str(e)}")

    def run(self):
        try:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            results_df = loop.run_until_complete(self.process_requirements())
            
            # Save results to output directory
            output_path = os.path.join(self.output_dir, 'requirements_analysis_results.xlsx')
            results_df.to_excel(output_path, index=False)
            
            self.processing_complete.emit(results_df)
            
        except Exception as e:
            self.error_occurred.emit(str(e))


class DropArea(QWidget):
    file_dropped = pyqtSignal(str)
    
    def __init__(self):
        super().__init__()
        self.setAcceptDrops(True)
        self.setMinimumSize(400, 200)
        
        layout = QVBoxLayout()
        self.label = QLabel("Drop Excel file here")
        self.label.setAlignment(Qt.AlignCenter)
        layout.addWidget(self.label)
        self.setLayout(layout)
        
        # Set styling
        self.setStyleSheet("""
            QWidget {
                border: 2px dashed #aaa;
                border-radius: 5px;
                background-color: #f8f8f8;
            }
            QLabel {
                font-size: 16px;
                color: #555;
            }
        """)
        
    def dragEnterEvent(self, event: QDragEnterEvent):
        if event.mimeData().hasUrls() and event.mimeData().urls()[0].toLocalFile().endswith(('.xlsx', '.xls')):
            event.acceptProposedAction()
            self.setStyleSheet("""
                QWidget {
                    border: 2px dashed #3498db;
                    border-radius: 5px;
                    background-color: #e8f4fc;
                }
                QLabel {
                    font-size: 16px;
                    color: #3498db;
                }
            """)
        
    def dragLeaveEvent(self, event):
        self.setStyleSheet("""
            QWidget {
                border: 2px dashed #aaa;
                border-radius: 5px;
                background-color: #f8f8f8;
            }
            QLabel {
                font-size: 16px;
                color: #555;
            }
        """)
        
    def dropEvent(self, event: QDropEvent):
        file_path = event.mimeData().urls()[0].toLocalFile()
        self.label.setText(f"File: {os.path.basename(file_path)}")
        self.file_dropped.emit(file_path)
        self.setStyleSheet("""
            QWidget {
                border: 2px solid #2ecc71;
                border-radius: 5px;
                background-color: #eafaf1;
            }
            QLabel {
                font-size: 16px;
                color: #2ecc71;
            }
        """)


class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Requirements Analyzer")
        self.setMinimumSize(600, 400)
        
        self.excel_file_path = None
        self.output_dir = os.path.expanduser("~/Documents")
        
        self.init_ui()
        
    def init_ui(self):
        central_widget = QWidget()
        main_layout = QVBoxLayout(central_widget)
        
        # Drop area for Excel file
        self.drop_area = DropArea()
        self.drop_area.file_dropped.connect(self.set_excel_file)
        main_layout.addWidget(self.drop_area)
        
        # Output directory selection
        dir_layout = QHBoxLayout()
        dir_layout.addWidget(QLabel("Output Directory:"))
        
        self.output_dir_edit = QLineEdit(self.output_dir)
        dir_layout.addWidget(self.output_dir_edit)
        
        browse_btn = QPushButton("Browse...")
        browse_btn.clicked.connect(self.browse_output_dir)
        dir_layout.addWidget(browse_btn)
        
        main_layout.addLayout(dir_layout)
        
        # Model selection
        model_layout = QHBoxLayout()
        model_layout.addWidget(QLabel("Model:"))
        
        self.model_edit = QLineEdit("llama3")
        model_layout.addWidget(self.model_edit)
        
        main_layout.addLayout(model_layout)
        
        # Progress bar
        self.progress_bar = QProgressBar()
        self.progress_bar.setRange(0, 100)
        self.progress_bar.setValue(0)
        main_layout.addWidget(self.progress_bar)
        
        # Status label
        self.status_label = QLabel("Ready")
        self.status_label.setAlignment(Qt.AlignCenter)
        main_layout.addWidget(self.status_label)
        
        # Run button
        self.run_button = QPushButton("Run Analysis")
        self.run_button.clicked.connect(self.run_analysis)
        self.run_button.setEnabled(False)
        main_layout.addWidget(self.run_button)
        
        self.setCentralWidget(central_widget)
        
    def set_excel_file(self, file_path):
        self.excel_file_path = file_path
        self.run_button.setEnabled(True)
        self.status_label.setText(f"Ready to analyze: {os.path.basename(file_path)}")
        
    def browse_output_dir(self):
        dir_path = QFileDialog.getExistingDirectory(
            self, "Select Output Directory", self.output_dir
        )
        if dir_path:
            self.output_dir = dir_path
            self.output_dir_edit.setText(dir_path)
            
    def run_analysis(self):
        if not self.excel_file_path:
            QMessageBox.warning(self, "Error", "Please select an Excel file first.")
            return
            
        output_dir = self.output_dir_edit.text()
        if not os.path.isdir(output_dir):
            QMessageBox.warning(self, "Error", "Please select a valid output directory.")
            return
            
        model = self.model_edit.text().strip()
        if not model:
            QMessageBox.warning(self, "Error", "Please specify a model name.")
            return
            
        # Disable UI elements during processing
        self.run_button.setEnabled(False)
        self.status_label.setText("Processing requirements...")
        
        # Start processing thread
        self.processor_thread = RequirementsProcessorThread(
            self.excel_file_path, output_dir, model
        )
        self.processor_thread.progress_updated.connect(self.update_progress)
        self.processor_thread.processing_complete.connect(self.processing_finished)
        self.processor_thread.error_occurred.connect(self.processing_error)
        self.processor_thread.start()
        
    def update_progress(self, current, total):
        percentage = int((current / total) * 100)
        self.progress_bar.setValue(percentage)
        self.status_label.setText(f"Processing requirement {current} of {total}...")
        
    def processing_finished(self, results_df):
        output_path = os.path.join(self.output_dir_edit.text(), 'requirements_analysis_results.xlsx')
        self.status_label.setText(f"Analysis complete! Results saved to: {output_path}")
        self.progress_bar.setValue(100)
        self.run_button.setEnabled(True)
        
        QMessageBox.information(
            self, 
            "Processing Complete", 
            f"Requirements analysis complete!\n\nResults saved to:\n{output_path}"
        )
        
    def processing_error(self, error_message):
        self.status_label.setText(f"Error: {error_message}")
        self.progress_bar.setValue(0)
        self.run_button.setEnabled(True)
        
        QMessageBox.critical(
            self,
            "Processing Error",
            f"An error occurred during processing:\n\n{error_message}"
        )


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = MainWindow()
    window.show()
    sys.exit(app.exec_())

: 