In [1]:
import pandas as pd

# List of AD identifiers
ad_list = [
    'AD_2018-0289R1_1',
    'AD_2019-0056_1',
    'AD_2019-0173_1',
    'AD_2020-0030_1',
    'AD_2020-0053_1',
    'AD_2020-0118_1',
    'AD_2020-0148_1',
    'AD_2020-0219_1',
    'AD_2020-0250_1',
    'AD_2021-0172_2',
    'AD_2021-0236_1',
    'AD_2021-0256_1',
    'AD_2021-0279_2',
    'AD_2022-0032R1_1',
    'AD_2022-0115_2',
    'AD_2022-0185_1'
]

# Create a DataFrame template
df = pd.DataFrame({
    'AD_Number': ad_list,
    'Models': [''] * len(ad_list),
    'Conditions': [''] * len(ad_list),
    'Affected_Parts': [''] * len(ad_list),
    'Total_Extracted': [''] * len(ad_list),
    'Time_Parsed_sec': [''] * len(ad_list)
})

# Display the DataFrame to the user
#import ace_tools as tools; tools.display_dataframe_to_user(name="AD Extraction Metrics", dataframe=df)

# Optionally, save to CSV
df.to_csv(r'C:\Users\zdrop\OneDrive - TU Wien\MASTER THESIS\ADs\A320\directives\sample dataset\golden_data_llm_without\ad_extraction_metrics_template.csv', index=False)


In [20]:
import json
import re

def find_matching_and_missing_models(json_file1_path, json_file2_path):
    """
    Compares aircraft models between two JSON files (specified by paths)
    and identifies matching and missing models.

    Args:
        json_file1_path (str): The path to the first JSON file (ground truth).
        json_file2_path (str): The path to the second JSON file.

    Returns:
        tuple: A tuple containing two lists:
               - matching_models (list): Models found in both files.
               - missing_models (list): Models present in file1 but missing in file2.
    """

    ground_truth_models = set()
    extracted_models_from_file2 = set()

    # Load data from the first JSON file
    try:
        with open(json_file1_path, 'r') as f:
            json_file1_data = json.load(f)
    except FileNotFoundError:
        print(f"Error: File not found at {json_file1_path}")
        return [], []
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from {json_file1_path}")
        return [], []

    # Load data from the second JSON file
    try:
        with open(json_file2_path, 'r') as f:
            json_file2_data = json.load(f)
    except FileNotFoundError:
        print(f"Error: File not found at {json_file2_path}")
        return [], []
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from {json_file2_path}")
        return [], []

    # Extract models from the first JSON file (ground truth)
    if "applicability_groups" in json_file1_data:
        for group in json_file1_data["applicability_groups"]:
            if "models" in group:
                ground_truth_models.update(group["models"])

    # Extract models from the raw_output of the second JSON file
    if "raw_output" in json_file2_data:
        raw_output = json_file2_data["raw_output"]
        # Regex to find aircraft models (e.g., A318-111, A319, A320)
        model_pattern = re.compile(r'\b[A]\d{3}(?:-\d{3})?\b')
        found_models = model_pattern.findall(raw_output)
        extracted_models_from_file2.update(found_models)

    matching_models = sorted(list(ground_truth_models.intersection(extracted_models_from_file2)))
    missing_models = sorted(list(ground_truth_models.difference(extracted_models_from_file2)))

    return matching_models, missing_models

# --- Example Usage ---

# 1. Create dummy JSON files for testing:
#    You would typically have these files already, but for demonstration,
#    we'll create them.

# Content for file1.json
"""
file1_content = {
    "ad_number": "2018-0289R1",
    "status": "active",
    "affected_parts": [
        {
            "raw_text": "Door stop fitting holes at position 1 or 7 at fuselage frame (FR)16 and FR20, on left-hand and right-hand side, respectively.",
            "identifier": None,
            "location": "ATA 53"
        }
    ],
    "applicability_groups": [
        {
            "manufacturer": "Airbus, formerly Airbus Industrie",
            "models": [
                "A318-111", "A318-112", "A318-121", "A318-122", "A319-111",
                "A319-112", "A319-113", "A319-114", "A319-115", "A319-131",
                "A319-132", "A319-133", "A320-211", "A320-212", "A320-214",
                "A320-215", "A320-216", "A320-231", "A320-232", "A320-233",
                "A321-111", "A321-112", "A321-131", "A321-211", "A321-212",
                "A321-213", "A321-231", "A321-232"
            ],
            "serial_numbers": "all manufacturer serial numbers",
            "qualifiers": [
                {
                    "type": "exclusion",
                    "raw_text": "except: - A318 aeroplanes on which Airbus modification (mod) 39195 was embodied in production",
                    "condition": {
                        "identifiers": ["39195"],
                        "requirement": "mod embodied",
                        "applies_to_subset_models": ["A318"]
                    }
                },
                {
                    "type": "exclusion",
                    "raw_text": "Airbus Service Bulletin (SB) A320-00-1219 was embodied in service; and",
                    "condition": {
                        "identifiers": ["A320-00-1219"],
                        "requirement": "SB embodied in service",
                        "applies_to_subset_models": ["A318"]
                    }
                },
                {
                    "type": "exclusion",
                    "raw_text": "- A319 aeroplanes on which Airbus mod 28238, mod 28162 and mod 28342 were embodied in production.",
                    "condition": {
                        "identifiers": ["28238", "28162", "28342"],
                        "requirement": "were embodied",
                        "applies_to_subset_models": ["A319"]
                    }
                }
            ]
        }
    ]
}

# Content for file2.json (slightly modified to show a missing model)
file2_content = {
    "raw_output": "Here's the applicability information extracted from the Airworthiness Directive:\n\n**Aircraft Models:**\n* Airbus A318-111, A318-112, A318-121, A318-122, A319-111, A319-112, A319-113, A319-114, A319-115, A319-131, A319-132, A319-133, A320-211, A320-212, A320-214, A320-215, A320-216, A320-231, A320-232, A320-233, A321-111, A321-112, A321-131, A321-211, A321-212, A321-213, A321-232 aeroplanes.\n\n**Exceptions:**\n\n* A318 aeroplanes on which Airbus modification (mod) 39195 was embodied in production, or Airbus Service Bulletin (SB) A320-00-1219 was embodied in service; and\n* A319 aeroplanes on which Airbus mod 28238, mod 28162 and mod 28342 were embodied in production.\n",
    "processing_time_seconds": 6.89
}
"""
# Define file paths
file1_path = r"C:\Users\zdrop\OneDrive - TU Wien\MASTER THESIS\ADs\A320\directives\sample dataset\llm_without guidance\golden_data_llm_without\AD_2022-0185_1_c.json"
file2_path = r"C:\Users\zdrop\OneDrive - TU Wien\MASTER THESIS\ADs\A320\directives\sample dataset\llm_without guidance\golden_data_llm_without\AD_2022-0185_1_lng.json"


# Call the function with file paths
matching_models, missing_models = find_matching_and_missing_models(file1_path, file2_path)

print("Matching Models:", matching_models)
print("Missing Models:", missing_models)

Matching Models: []
Missing Models: ['A319-151N', 'A319-153N', 'A319-171N', 'A320-251N', 'A320-252N', 'A320-253N', 'A320-271N', 'A320-272N', 'A320-273N', 'A321-251N', 'A321-251NX', 'A321-252N', 'A321-252NX', 'A321-253N', 'A321-253NX', 'A321-271N', 'A321-271NX', 'A321-272N', 'A321-272NX']
