In [80]:
import os
import pandas as pd
import tiktoken
from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch
import subprocess
import io
import sys
import util
import prompt_template
from openai import OpenAI
import re
import api

In [81]:
#remove uncommon characters 
def clean_text_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8', errors='ignore') as file:
        content = file.read()
    
    cleaned_content = ''.join(char for char in content if ord(char) < 128)
    
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(cleaned_content)
input_file = "input/articles.txt"
#clean_text_file(input_file, input_file)


In [82]:
def run_graphrag_index():
    try:
        subprocess.run([sys.executable, "-m", "graphrag.index", "--root", "."], 
                                capture_output=False, text=True, check=True)
        print("Command executed successfully.")
    except subprocess.CalledProcessError as e:
        print("An error occurred while running the command:")
        print("Error output:", e.stderr)
    except FileNotFoundError:
        print("Error: graphrag module not found. Make sure it's installed and in your Python path.")

#run_graphrag_index()

In [83]:
report_df = pd.read_parquet(f"GPT-4-turbo-indexing/artifacts/create_final_community_reports.parquet")
report_df

Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,0,# Regulatory Compliance and Relationships in t...,0,7.5,Regulatory Compliance and Relationships in the...,The impact severity rating is high due to the ...,This report examines the intricate network of ...,"[{'explanation': 'Brokers, including broker/de...","{\n ""title"": ""Regulatory Compliance and Rel...",1abd8f20-01b6-4cb5-8dee-eee69fa31ce3
1,1,# Regulatory and Operational Dynamics in the T...,0,8.5,Regulatory and Operational Dynamics in the Tra...,The high impact severity rating reflects the c...,This report delves into the intricate network ...,[{'explanation': 'Broker/Dealers occupy a cent...,"{\n ""title"": ""Regulatory and Operational Dy...",e451bdd5-b8b0-48c3-8ab1-8505357d9ee7
2,2,# Regulatory Framework in Financial Trading\n\...,0,8.5,Regulatory Framework in Financial Trading,The high impact severity rating reflects the c...,This report outlines the intricate relationshi...,[{'explanation': 'The Organization is at the h...,"{\n ""title"": ""Regulatory Framework in Finan...",b7cd43b6-8cd2-4578-8702-cf96dda0890b
3,3,# Online Trading Ecosystem Analysis\n\nThis re...,0,7.5,Online Trading Ecosystem Analysis,The impact severity rating is relatively high ...,This report delves into the structured relatio...,"[{'explanation': 'Customers, both individuals ...","{\n ""title"": ""Online Trading Ecosystem Anal...",0675a3fe-2a43-4f12-9e4f-7ffac697454a


In [84]:
report_df = pd.read_parquet(f"GPT-4o-mini-indexing/artifacts/create_final_community_reports.parquet")
report_df

Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,66,# Organization and Information Security Manage...,2,8.5,Organization and Information Security Management,The impact severity rating is high due to the ...,The community centers around the concept of an...,[{'explanation': 'Organizations are pivotal in...,"{\n ""title"": ""Organization and Information ...",9a3533a6-0cc3-4fb6-9662-18e6766262fb
1,67,# Information Security Competence and Awarenes...,2,7.5,Information Security Competence and Awareness,The impact severity rating is high due to the ...,This community focuses on the critical aspects...,[{'explanation': 'Competence is a fundamental ...,"{\n ""title"": ""Information Security Competen...",e94da1ed-34fb-4a9d-be5f-f86c56b030c3
2,68,# Information Systems and Records Management\n...,2,7.5,Information Systems and Records Management,The impact severity rating is high due to the ...,The community focuses on the relationship betw...,[{'explanation': 'Information systems are esse...,"{\n ""title"": ""Information Systems and Recor...",c03614a3-841c-42dc-b079-30c2da132c38
3,69,# Information Security Management System Commu...,2,8.0,Information Security Management System Community,The impact severity rating is high due to the ...,The community is centered around the Informati...,[{'explanation': 'The Information Security Man...,"{\n ""title"": ""Information Security Manageme...",0bdc5f02-ea5b-4137-9fa9-c26029978f77
4,70,# Information Security Management System and S...,2,7.5,Information Security Management System and Sta...,The impact severity rating is high due to the ...,The community focuses on the Information Secur...,[{'explanation': 'Interested Parties play a cr...,"{\n ""title"": ""Information Security Manageme...",4f21a029-e29c-4193-b1ed-6c986c6a9fb0
...,...,...,...,...,...,...,...,...,...,...
62,5,# Brokerage and Trading Community\n\nThe commu...,0,8.5,Brokerage and Trading Community,The impact severity rating is high due to the ...,The community encompasses various entities inv...,[{'explanation': 'Brokers serve as essential i...,"{\n ""title"": ""Brokerage and Trading Communi...",c5a7b791-3cb2-4797-ad31-cc1b2eb394c5
63,6,# Information Security Incident Management Com...,0,7.5,Information Security Incident Management Commu...,The impact severity rating is high due to the ...,The community focuses on the management of inf...,[{'explanation': 'Information Security Inciden...,"{\n ""title"": ""Information Security Incident...",fb1dcec7-17bf-44f3-8684-323d99f9e70e
64,7,# Information Security Management Community\n\...,0,7.5,Information Security Management Community,The impact severity rating is high due to the ...,The community focuses on information security ...,[{'explanation': 'Information Classification i...,"{\n ""title"": ""Information Security Manageme...",37120226-8608-4358-a1c5-7afecbbeac48
65,8,# Information Security Community\n\nThe Inform...,0,8.5,Information Security Community,The impact severity rating is high due to the ...,The Information Security Community comprises v...,[{'explanation': 'Information security is the ...,"{\n ""title"": ""Information Security Communit...",41e6583c-fa02-4c8a-aa60-3727de7ba229


In [85]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = "gpt-4o-mini-2024-07-18"

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

In [86]:
INPUT_DIR = "./GPT-4o-mini-indexing/artifacts"
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
COMMUNITY_LEVEL = 2

In [87]:
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 67


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,66,# Organization and Information Security Manage...,2,8.5,Organization and Information Security Management,The impact severity rating is high due to the ...,The community centers around the concept of an...,[{'explanation': 'Organizations are pivotal in...,"{\n ""title"": ""Organization and Information ...",9a3533a6-0cc3-4fb6-9662-18e6766262fb
1,67,# Information Security Competence and Awarenes...,2,7.5,Information Security Competence and Awareness,The impact severity rating is high due to the ...,This community focuses on the critical aspects...,[{'explanation': 'Competence is a fundamental ...,"{\n ""title"": ""Information Security Competen...",e94da1ed-34fb-4a9d-be5f-f86c56b030c3
2,68,# Information Systems and Records Management\n...,2,7.5,Information Systems and Records Management,The impact severity rating is high due to the ...,The community focuses on the relationship betw...,[{'explanation': 'Information systems are esse...,"{\n ""title"": ""Information Systems and Recor...",c03614a3-841c-42dc-b079-30c2da132c38
3,69,# Information Security Management System Commu...,2,8.0,Information Security Management System Community,The impact severity rating is high due to the ...,The community is centered around the Informati...,[{'explanation': 'The Information Security Man...,"{\n ""title"": ""Information Security Manageme...",0bdc5f02-ea5b-4137-9fa9-c26029978f77
4,70,# Information Security Management System and S...,2,7.5,Information Security Management System and Sta...,The impact severity rating is high due to the ...,The community focuses on the Information Secur...,[{'explanation': 'Interested Parties play a cr...,"{\n ""title"": ""Information Security Manageme...",4f21a029-e29c-4193-b1ed-6c986c6a9fb0


In [88]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    entities=entities,
    token_encoder=token_encoder,
)

In [89]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 5000,
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 1000, 
    "temperature": 0.0,
}

In [90]:
search_engine = GlobalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=5000, 
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=False,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",
)

In [91]:
#Report utils
num_of_LLM_calls = 0
num_of_comp_violations = 0
num_of_part_violations = 0
num_of_compliances = 0
num_of_self_redundencies = 0
num_of_redundencies = 0
num_of_self_conflicts = 0
num_of_conflicts = 0

In [92]:
req_num = 42
with open(f"documents/requirements/FR/req_{req_num}.txt", mode="r", encoding="utf-8") as req_file:
    req_content = req_file.read()
    result = await search_engine.asearch(f"I want you to find all the entities that can be related to {req_content}.")
    num_of_LLM_calls += result.llm_calls



##DEBUG##: {
    "points": [
        {"description": "The Broker Back Office System is integral to managing broker operations, including the processing of orders, which encompasses the deletion of completed and canceled orders from the customer order list. This system ensures compliance and operational efficiency in handling such transactions [Data: Reports (65)].", "score": 80},
        {"description": "The User Interaction with Database and Transaction Functionality highlights the importance of user engagement with transaction functionalities, which includes the ability to delete orders based on their status. This interaction is crucial for maintaining an accurate order list for users [Data: Reports (29)].", "score": 75},
        {"description": "The Secure Development Community emphasizes the need for robust error handling mechanisms, such as automatic retries for failed deletion attempts, to ensure a seamless user experience when managing their order lists [Data: Reports (50)].", "

In [93]:

    
df = pd.DataFrame(result.context_data['reports'])
context_df = df.sort_values(by='occurrence weight', ascending=False) 


filtered_df = context_df[context_df['occurrence weight'] > 0.6]
result_str =  result.response
#content_context = context_df["content"][0] + "\n" + result_str

context_df.head(10)

Unnamed: 0,id,title,occurrence weight,content,rank
18,75,Brokerage Community and Regulatory Compliance,0.62069,# Brokerage Community and Regulatory Complianc...,7.5
26,69,Information Security Management System Community,0.551724,# Information Security Management System Commu...,8.0
35,72,User Account Management and Stock Trading Comm...,0.482759,# User Account Management and Stock Trading Co...,7.5
27,40,Trading Compliance and Customer Interaction,0.344828,# Trading Compliance and Customer Interaction\...,7.5
43,29,User Interaction with Database and Transaction...,0.275862,# User Interaction with Database and Transacti...,7.5
19,78,Information Security Community,0.275862,# Information Security Community\n\nThe Inform...,7.5
0,52,Broker Community and Employee Compliance,0.172414,# Broker Community and Employee Compliance\n\n...,7.5
1,73,User Account Management Community,0.172414,# User Account Management Community\n\nThe com...,6.5
44,10,Tehran Financial Market Governance,0.172414,# Tehran Financial Market Governance\n\nThe co...,8.5
9,33,Stock Trading Community,0.172414,# Stock Trading Community\n\nThe Stock Trading...,7.5


In [94]:
sample_string = ""
for resp in result.map_responses:
    sample_string += str(resp)

pattern = re.compile(r"'answer':\s*'(.*?)',\s*'score':\s*(\d+)")
matches = pattern.findall(sample_string)

df = pd.DataFrame(matches, columns=['Answer', 'Score'])
df['Score'] = pd.to_numeric(df['Score'])
sorted_df = df.sort_values(by='Score', ascending=False) 

filtered_df = df[df['Score'] >= 0.7]
high_score_answer = '\n'.join(filtered_df['Answer'])
result_str = result_str + "\n\n" + high_score_answer

sorted_df

Unnamed: 0,Answer,Score
8,The system must ensure that the deletion proce...,85
0,The Account Maintenance Page serves as the cen...,80
10,The role of brokers in managing customer order...,80
3,The process of deleting completed and canceled...,80
15,The Broker Back Office System is integral to m...,80
7,The process of deleting completed and canceled...,80
16,The User Interaction with Database and Transac...,75
13,The order list is a key entity that contains a...,75
11,The online trading infrastructure supports the...,75
9,Error handling is a critical aspect of the del...,75


In [95]:
with open(f"RAG_OUTPUT/SIMILARITY/sim_res_{req_num}.txt", mode="w", encoding="utf-8") as res_file:
    res_file.write(result_str)

In [96]:
# call GPT-4o
def call_LLM(user_prompt, system_prompt):
    client = OpenAI(api_key=api.API_KEY)
    response = client.chat.completions.create(
        model="gpt-4o-mini-2024-07-18", 
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        #max_tokens = 4096,  
        temperature = 0.2
    )
    global num_of_LLM_calls
    num_of_LLM_calls += 1
    return response


In [116]:
req_num = 3
with open(f"RAG_OUTPUT/SIMILARITY/sim_res_{req_num}.txt", mode="r", encoding="utf-8") as res_file:
    result_str = res_file.read() 

In [117]:
req_num = 45
system_prompt = prompt_template.prompt_cross_reference_check(result_str,True)
with open(f"documents/requirements/FR/req_{req_num}.txt", mode = "r", encoding="utf-8") as req_file:
    req = req_file.read()
    user_prompt = prompt_template.prompt_user_compliance(req)
    result = call_LLM(user_prompt=user_prompt,system_prompt=system_prompt)
    comp_resp = result.choices[0].message.content

    with open(f"RAG_OUTPUT/COMPLIANCE/comp_res_{req_num}.txt", mode = "w", encoding="utf-8") as comp_file:
        comp_file.write(comp_resp)
        print(comp_resp)

SUMMARY:
- Total related contents found: 5
- Complete violations: 3
- Partial violations: 2
- Compliant aspects: 0

DETAILED ANALYSIS:

1. 
   Relationship: Complete Violation
   Content Reference: Entities Related to Bank Account Validation
   Reason: The requirement states that the bank account information is checked against an internal database without contacting the bank interface. This violates the established best practice of validating bank accounts through a reliable bank interface to ensure accuracy and legitimacy.
   Suggested Solution: Modify the requirement to include validation through the bank interface to ensure that the account information is verified against the bank's records.

2. 
   Relationship: Complete Violation
   Content Reference: Error Messages
   Reason: The requirement specifies that if the customer's bank account is invalid, the system proceeds without notifying the customer. This is a violation of best practices, as effective error handling and communicat

In [99]:
# LLM result will return compliances too. -> extract violations
def extract_violations(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    global num_of_comp_violations
    global num_of_part_violations
    global num_of_compliances
    
    violations = []
    entries = content.split('\n\n') 
    for entry in entries:
        if 'Relationship: Partial Violation' in entry:
            violations.append(entry)
            num_of_part_violations += 1
        if 'Relationship: Complete Violation' in entry:
            violations.append(entry)
            num_of_comp_violations += 1
        if 'Relationship: Compliance' in entry:
            num_of_compliances += 1
    
    return violations

In [100]:
# store extracted violations in a data frame for better analysis
data_list = [extract_violations(f"RAG_OUTPUT/COMPLIANCE/comp_res_{req_num}.txt")]
print(data_list)
data = []
def parse_item(item, counter):
    id_value = counter

    relationship_match = re.search(r'Relationship: ([^\n]+)', item)
    if relationship_match:
        relationship = relationship_match.group(1).strip()
    else:
        relationship = None

    content_reference_match = re.search(r'Content Reference: ([^\n]+)', item)
    if content_reference_match:
        content_reference = content_reference_match.group(1).strip()
    else:
        content_reference = None

    reason_match = re.search(r'Reason: ([^\n]+)', item)
    if reason_match:
        reason = reason_match.group(1).strip()
    else:
        reason = None

    suggested_solution_match = re.search(r'Suggested Solution: ([^\n]+)', item)
    if suggested_solution_match:
        suggested_solution = suggested_solution_match.group(1).strip()
    else:
        suggested_solution = None

    return {
        'ID': id_value,
        'Relationship': relationship,
        'Content Reference': content_reference,
        'Reason': reason,
        'Suggested Solution': suggested_solution
    }

counter = 1
for sublist in data_list:
    for item in sublist:
        data.append(parse_item(item, counter))
    counter += 1

df = pd.DataFrame(data, index=range(1, len(data) + 1))
df.head(10)


[['7. \n   Relationship: Partial Violation\n   Content Reference: Status of Orders\n   Reason: The requirement states that users can delete orders with the status "completed" or "canceled," but it does not clarify how the system identifies these statuses, which is crucial for effective order management.\n   Suggested Solution: Include a specification on how the system determines the status of orders eligible for deletion.']]


Unnamed: 0,ID,Relationship,Content Reference,Reason,Suggested Solution
1,1,Partial Violation,Status of Orders,The requirement states that users can delete o...,Include a specification on how the system dete...


In [101]:
# Extract suggested solutions for consistency check
#util.delete_all_files_in_directory("RAG_OUTPUT/SOLUTIONS/")
batch_size = 10
results = ""
solutions_size = 0
counter = 1
system_prompt_size = util.count_tokens(prompt_template.prompt_consistency(""), "gpt-4o")

for start in range(0, len(df), batch_size):
    end = start + batch_size
    batch = df[start:end]
    batch_str = "\n".join(f"{row.name}. {row['Suggested Solution']}" for index, row in batch.iterrows())
    solutions_size += util.count_tokens(batch_str, "gpt-4o")
    if solutions_size >= 2048 - system_prompt_size:
        with open(f"RAG_OUTPUT/SOLUTIONS/sol_{counter}.txt", 'w', encoding='utf-8') as file:
            file.write(results)
        results = ""
        solutions_size = 0
        counter += 1
    results += batch_str + "\n"
    solutions_size += util.count_tokens(batch_str, "gpt-4o")

with open(f"RAG_OUTPUT/SOLUTIONS/sol_{counter}.txt", 'w', encoding='utf-8') as file:
    file.write(results)

In [102]:
# # find redundant or contradicting solutions. (self consistency check)
# directory = 'RAG_OUTPUT/SOLUTIONS/'
# text_files = [f for f in os.listdir(directory) if f.endswith('.txt')]
# result = ""
# if len(text_files) > 1:
#     for i in range(len(text_files)):
#         for j in range(i + 1, len(text_files)):
#             file1_path = os.path.join(directory, text_files[i])
#             file2_path = os.path.join(directory, text_files[j])
#             with open(file1_path, 'r', encoding='utf-8') as file1:
#                 file1_contents = file1.read()

#             with open(file2_path, 'r', encoding='utf-8') as file2:
#                 file2_contents = file2.read()
#             combined_contents = file1_contents + "\n" + file2_contents
#             response = call_LLM(prompt_template.prompt_user_self_consistency(combined_contents), system_prompt = prompt_template.prompt_consistency(""))
#             result += response.choices[0].message.content
# else:
#     file_path = os.path.join(directory, "sol_1.txt")
#     with open(file_path, 'r', encoding='utf-8') as file:
#         file_contents = file.read()
#         response = call_LLM(prompt_template.prompt_user_self_consistency(file_contents), system_prompt = prompt_template.prompt_consistency(""))
#         result = response.choices[0].message.content

# with open(f'RAG_OUTPUT/CONSISTENCY/SELF_CON/self_con_result_{req_num}.txt', 'w', encoding='utf-8') as confile:
#     confile.write(result)

In [103]:
# # df for self consistencies
# user_input = []
# content_from = []
# similarity_score = []
# similarity_number = []

# with open(f'RAG_OUTPUT/CONSISTENCY/SELF_CON/self_con_result_{req_num}.txt', 'r') as file:
#     lines = file.readlines()

# for line in lines:
#     if "Similarity #" in line:
#         sim_number = int(re.search(r'\d+', line).group())
#         similarity_number.append(sim_number)
#     elif "(suggested solution):" in line:
#         user_input.append(int(re.search(r'\d+', line).group()))
#     elif "Content from :" in line:
#         content_from.append(int(re.search(r'\d+', line).group()))
#     elif "Similarity Score:" in line:
#         similarity_score.append(int(re.search(r'\d+', line).group()))

# df = pd.DataFrame({
#     'Similarity Number': similarity_number,
#     'User Input (Suggested Solution)': user_input,
#     'Content From': content_from,
#     'Similarity Score': similarity_score
# })


# df.head(10)


In [104]:
# # remove redundant suggestions. conflicts are manually handled.
# content_from_list = df[df['Similarity Score'] >= 70]['Content From'].tolist()
# num_of_self_conflicts = len(df[df['Similarity Score'] == 0]['Content From'].tolist())
# num_of_self_redundencies = len(content_from_list)

# number_of_sol_files = len([f for f in os.listdir("RAG_OUTPUT/SOLUTIONS") if f.endswith('.txt')])


# for i in range(1,number_of_sol_files + 1):
#     with open(f'RAG_OUTPUT/SOLUTIONS/sol_{i}.txt', 'r') as file:
#         lines = file.readlines()

#         filtered_lines = [line for index, line in enumerate(lines, start=1) if index not in content_from_list]
#         with open(f'RAG_OUTPUT/SOLUTIONS/update/updated_sol_{i}.txt', 'w') as file:
#             file.writelines(filtered_lines)


In [105]:
# # consistency check between remaining solutions and existing requirement document.

# req_directory = 'documents/requirements/'
# sol_directory = 'RAG_OUTPUT/SOLUTIONS/update/'

# sol_files = [f for f in os.listdir(sol_directory) if f.endswith('.txt')]
# req_files = [f for f in os.listdir(req_directory) if f.endswith('.txt')]
# result = ""

# for i in range(len(sol_files)):
#     for j in range(len(req_files)):
#         sol_path = os.path.join(sol_directory, sol_files[i])
#         req_path = os.path.join(req_directory, req_files[j])
#         with open(sol_path, 'r', encoding='utf-8') as file1:
#             sol_contents = file1.read()
#         with open(req_path, 'r', encoding='utf-8') as file2:
#             req_contents = file2.read()
#         response = call_LLM(prompt_template.prompt_user_consistency(sol_contents), system_prompt = prompt_template.prompt_consistency(req_contents))
#         result += response.choices[0].message.content

# with open(f"RAG_OUTPUT/CONSISTENCY/con_res_{req_num}.txt", 'w', encoding='utf-8') as confile:
#     confile.write(result)


In [106]:
# # df for consistencies
# user_input = []
# content_from = []
# similarity_score = []
# similarity_number = []

# with open(f"RAG_OUTPUT/CONSISTENCY/con_res_{req_num}.txt", 'r') as file:
#     lines = file.readlines()

# for line in lines:
#     if "Similarity #" in line or "Contradiction #" in line:
#         sim_number = int(re.search(r'\d+', line).group())
#         similarity_number.append(sim_number)
#     elif "(suggested solution):" in line:
#         user_input.append(int(re.search(r'\d+', line).group()))
#     elif "Content from " in line:
#         content_from.append(int(re.search(r'\d+', line).group()))
#     elif "Similarity Score:" in line:
#         similarity_score.append(int(re.search(r'\d+', line).group()))
# df = pd.DataFrame({
#     'Similarity Number': similarity_number,
#     'User Input (Suggested Solution)': user_input,
#     'Content From': content_from,
#     'Similarity Score': similarity_score
# })

# content_from_list = df[df['Similarity Score'] >= 70]['Content From'].tolist()
# num_of_conflicts = len(df[df['Similarity Score'] == 0]['Content From'].tolist()) - num_of_self_conflicts
# num_of_redundencies = len(content_from_list) - num_of_self_redundencies

# df.head(10)


In [107]:
print("Number of LLM calls: ",num_of_LLM_calls)
print("Number of complete violations: ", num_of_comp_violations)
print("Number of partial violations: ", num_of_part_violations)
print("Number of compliances: ", num_of_compliances)

Number of LLM calls:  8
Number of complete violations:  0
Number of partial violations:  1
Number of compliances:  7


In [108]:
res_df = pd.read_csv("RAG_OUTPUT/result.csv")
res_df.loc[len(res_df)] = [req_num,num_of_LLM_calls,num_of_part_violations,num_of_comp_violations,num_of_compliances]


In [109]:
res_df.to_csv("RAG_OUTPUT/result.csv",index = False)