### Library

In [1]:
import requests
from pathlib import Path
import json
from langchain_openai import ChatOpenAI
from bs4 import BeautifulSoup
import json

### Scraper

In [2]:
def download_ad(url, ad_name):
    folder = Path(f"ad_data/{ad_name}")
    folder.mkdir(parents=True, exist_ok=True)
    response = requests.get(url)
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')
    
    text = soup.get_text()
    with open(folder / "content.txt", "w") as f:
        f.write(text)

    pdf_links = soup.find_all('a', href=True)
    pdf_count = 0
    
    for link in pdf_links:
        href = link['href']
        if '.pdf' in href.lower():
            if href.startswith('http'):
                pdf_url = href
            else:
                pdf_url = f"https://ad.easa.europa.eu{href}"
            pdf_response = requests.get(pdf_url)
            pdf_count += 1
            
            pdf_file = folder / f"document_{pdf_count}.pdf"
            with open(pdf_file, "wb") as f:
                f.write(pdf_response.content)

### Parsing

In [None]:
def parse_document(document_path):
    dotsOCR_url = "OCR_URL"
    files = {
        "file": open(document_path, "rb")
    }
    response = requests.post(dotsOCR_url, files=files)
    return response.json()['document_text']

###  Extract Rules

In [None]:
LLM_CONFIG = {
    "model_name": "model_name",
    "api_key": "api_key",
    "base_url": "base_url"
}

def extract_rules(ad_id, text):
    llm = ChatOpenAI(
        model=LLM_CONFIG["model_name"],
        api_key=LLM_CONFIG["api_key"],
        base_url=LLM_CONFIG["base_url"],
        temperature=0
    )

    system_prompt = f"""
        
        You are analyzing an Airworthiness Directive (AD) document.

        AD ID: {ad_id}

        Extract the following information from the AD text below:

        1. Applicable Aircraft Models: List all aircraft models mentioned in the applicability section
        2. MSN Constraints: Any manufacturer serial number constraints (null if applies to all MSNs)
        3. Excluded If Modifications: Any modifications or service bulletins that EXCLUDE aircraft from this AD if present
        4. Required Modifications: Any modifications or service bulletins that are REQUIRED for this AD to apply

        Format your response as a JSON object with this structure:
        {{
            "ad_id": "{ad_id}",
            "applicability_rules": {{
                "aircraft_models": ["MD-11", "MD-11F", "DC-10-30F", ...],
                "msn_constraints": null or "specific range",
                "excluded_if_modifications": [],
                "required_modifications": []
            }}
        }}

        The text is as follows:
        {text}

        Respond with ONLY the JSON object, no additional text.
    """

    response = llm.invoke([system_prompt])
    return response.content

In [7]:
ad_data_dir = Path("ad_data")
results = []

for ad_folder in sorted(ad_data_dir.iterdir()):
    ad_id = ad_folder.name
    pdf_files = list(ad_folder.glob("*.pdf"))
    document_path = pdf_files[0]

    document_text = parse_document(str(document_path))
    response = extract_rules(ad_id, document_text)

    result = json.loads(response)
    results.append(result)

output_file = "ad_rules.json"
with open(output_file, "w") as f:
    json.dump(results, f, indent=2)

In [8]:
results

[{'ad_id': 'EASA-2025-0254',
  'applicability_rules': {'aircraft_models': ['A320-211',
    'A320-212',
    'A320-214',
    'A320-215',
    'A320-216',
    'A320-231',
    'A320-232',
    'A320-233',
    'A321-111',
    'A321-112',
    'A321-131'],
   'msn_constraints': None,
   'excluded_if_modifications': ['Airbus modification 24591 embodied in production',
    'Airbus Service Bulletin A320-57-1089 Revision 04 accomplished in service',
    'Airbus modification 24977 embodied in production'],
   'required_modifications': ['Airbus Service Bulletin A320-57-1089 Revision 04 (modification)']}},
 {'ad_id': 'FAA-2025-23-53',
  'applicability_rules': {'aircraft_models': ['MD-11',
    'MD-11F',
    'MD-10-10F',
    'MD-10-30F',
    'DC-10-10',
    'DC-10-10F',
    'DC-10-15',
    'DC-10-30',
    'DC-10-30F',
    'KC-10A',
    'KDC-10',
    'DC-10-40',
    'DC-10-40F'],
   'msn_constraints': None,
   'excluded_if_modifications': [],
   'required_modifications': []}}]

### Test with 10 Aircraft without LLM

In [9]:
def load_ad_rules():
    with open("ad_rules.json", "r") as f:
        return json.load(f)

In [13]:
def is_aircraft_affected(aircraft, ad):
    model = aircraft["model"]
    msn = aircraft["msn"]
    mods = aircraft["modifications"]


    ad_id = ad["ad_id"]
    applicable_models = ad["applicability_rules"]["aircraft_models"]

    if model not in applicable_models:
        return False, "Model not applicable"

    msn_constraints = ad["applicability_rules"]["msn_constraints"]
    if msn_constraints:
        is_in_range = False
        for constraint in msn_constraints:
            start = constraint.get("start")
            end = constraint.get("end")
            if start <= msn <= end:
                is_in_range = True
                break
        if not is_in_range:
            return False, f"MSN {msn} not in range"
    
    excluded_mods = ad["applicability_rules"]["excluded_if_modifications"]
    for excluded in excluded_mods:
        excluded_id = excluded if isinstance(excluded, str) else excluded.get("modification_id")
        for mod in mods:
            if mod == excluded_id:
                return False, f"Has excluded mod: {mod}"
    
    required_mods = ad["applicability_rules"]["required_modifications"]
    for required in required_mods:
        required_id = required if isinstance(required, str) else required.get("modification_id")
        found = False
        for mod in mods:
            if mod == required_id:
                found = True
        if not found:
            return False, f"Missing required mod: {required_id}"
    
    return True, "Applicable"
    

In [14]:
test_aircraft = [
        {"model": "MD-11", "msn": 48123, "modifications": []},
        {"model": "DC-10-30F", "msn": 47890, "modifications": []},
        {"model": "Boeing 737-800", "msn": 30123, "modifications": []},
        {"model": "A320-214", "msn": 5234, "modifications": []},
        {"model": "A320-232", "msn": 6789, "modifications": ["mod 24591"]},
        {"model": "A320-214", "msn": 7456, "modifications": ["SB A320-57-1089 Rev 04"]},
        {"model": "A321-111", "msn": 8123, "modifications": []},
        {"model": "A321-112", "msn": 364, "modifications": ["mod 24977"]},
        {"model": "A319-100", "msn": 9234, "modifications": []},
        {"model": "MD-10-10F", "msn": 46234, "modifications": []},
    ]

ad_rules = load_ad_rules()
# ad_rules = results

In [15]:
for i, aircraft in enumerate(test_aircraft):
    for ad in ad_rules:
        affected, reason = is_aircraft_affected(aircraft, ad)
        
        results.append({
            "aircraft_index": i + 1,
                "model": aircraft["model"],
                "msn": aircraft["msn"],
                "modifications": aircraft["modifications"],
                "ad_id": ad["ad_id"],
                "affected": affected,
                "reason": reason
            })


with open("results.json", "w") as f:
    json.dump(results, f, indent=2)

### Test with 10 Aircraft With LLM

In [16]:
def check_with_llm(aircraft, ad_rule):
    llm = ChatOpenAI(
        model=LLM_CONFIG["model_name"],
        api_key=LLM_CONFIG["api_key"],
        base_url=LLM_CONFIG["base_url"],
        temperature=0
    )

    system_prompt = f"""
        You are an aviation safety expert. Determine if the following aircraft is affected by this Airworthiness Directive (AD).

        AIRCRAFT CONFIGURATION:
        - Model: {aircraft["model"]}
        - MSN: {aircraft["msn"]}
        - Modifications: {", ".join(aircraft["modifications"]) if aircraft["modifications"] else "None"}

        AIRWORTHINESS DIRECTIVE:
        {json.dumps(ad_rule, indent=2)}

        RULES:
        1. The aircraft model must be in the "aircraft_models" list
        2. If there are MSN constraints, the aircraft MSN must meet them (null = all MSNs)
        3. If the aircraft has any modifications in "excluded_if_modifications", the AD does NOT apply
        4. If there are "required_modifications", the aircraft must have them for the AD to apply

        Answer with a JSON object in this exact format:
        {{
            "affected": true/false,
            "reason": "brief explanation"
        }}

        Respond with ONLY the JSON object, nothing else.
"""
    response = llm.invoke([system_prompt])
    return json.loads(response.content)

In [17]:
for i, aircraft in enumerate(test_aircraft):
    for ad in ad_rules:
        result = check_with_llm(aircraft, ad)
        affected = result["affected"]
        reason = result["reason"]
        results.append({
            "aircraft_index": i + 1,
            "model": aircraft["model"],
            "msn": aircraft["msn"],
            "modifications": aircraft["modifications"],
            "ad_id": ad["ad_id"],
            "affected": affected,
            "reason": reason
        })

with open("results_with_llm.json", "w") as f:
    json.dump(results, f, indent=2)