# Augment Aircraft Data

Combines aircraft.csv with:
1. Category/military classification via Anthropic (AircraftClassifier)
2. Owner information from hexdb.io API

Output: modes.csv

In [None]:
import pandas as pd
import os

In [None]:
# Load aircraft.csv (semicolon-separated, no header)
# Format like ~/Downloads/aircraft (1).csv: 0=hex, 1=Registration, 2=ICAOTypeCode, 3=flag, 4=Type
aircraft_path = os.path.expanduser('aircraft.csv')
df = pd.read_csv(aircraft_path, header=None, sep=';')
df[0] = df[0].apply(lambda v: str(v).lower() if pd.notna(v) else v)
df = df.dropna(subset=[2])
df = df.rename(columns={0: 'hex', 1: 'Registration', 2: 'ICAOTypeCode', 4: 'Type'})
for c in [3, 5, 6, 7]:
    if c in df.columns:
        del df[c]
df = df.set_index('hex')
df['lookup'] = df['Type'].fillna('') + ' ' + df['ICAOTypeCode'].astype(str)
df['Manufacturer'] = ''
df['RegisteredOwners'] = None  # filled from hexdb
df.head()

## Query hexdb for owner information

In [None]:
import requests
import time
from datetime import datetime

hex_codes = df.index.unique().tolist()
hex_codes = [h for h in hex_codes if isinstance(h, str) and '~' not in h]
print(f"{len(hex_codes)} hex codes to query")

In [None]:
def fetch_aircraft_data(hex_code, max_retries=5):
    url = f"https://hexdb.io/api/v1/aircraft/{hex_code}"
    headers = {"Accept": "application/json"}
    for retry in range(max_retries):
        try:
            r = requests.get(url, headers=headers, timeout=10)
            if r.status_code == 200:
                data = r.json()
                data['hex'] = hex_code
                return data
            elif r.status_code == 404:
                return {'hex': hex_code, 'error': '404'}
            elif r.status_code == 429:
                print(f"[{hex_code}] Rate limited. Waiting 5 min.")
                time.sleep(300)
            else:
                time.sleep(10)
        except Exception as e:
            time.sleep(10)
    return {'hex': hex_code, 'error': 'Unknown'}

hexdb_results = []
for i, hex_code in enumerate(hex_codes):
    data = fetch_aircraft_data(hex_code)
    hexdb_results.append(data)
    if (i + 1) % 500 == 0:
        print(f"[{datetime.now()}] Queried {i + 1}/{len(hex_codes)}")
    time.sleep(0.1)  # polite delay

In [None]:
hexdb_df = pd.DataFrame(hexdb_results)
hexdb_df = hexdb_df.set_index('hex')
hexdb_renamed = hexdb_df.rename(columns={
    'Registration': 'registration', 'Manufacturer': 'manufacturer',
    'ICAOTypeCode': 'typecode', 'Type': 'type',
    'RegisteredOwners': 'owner', 'OperatorFlagCode': 'operator'
})
# Merge hexdb into df (prefer hexdb when available)
df = df.join(hexdb_renamed[['registration', 'manufacturer', 'typecode', 'type', 'owner', 'operator']], how='left')
# Use hexdb values, fall back to aircraft.csv
df['registration'] = df['registration'].fillna(df.get('Registration'))
df['manufacturer'] = df['manufacturer'].fillna(df.get('Manufacturer', pd.Series())).fillna('')
df['typecode'] = df['typecode'].fillna(df['ICAOTypeCode'])
df['type'] = df['type'].fillna(df['Type'])
df['owner'] = df['owner'].fillna(df.get('RegisteredOwners'))
df['operator'] = df['operator'].fillna(df['typecode'])
# Build description for classification: Manufacturer ICAOTypeCode Type
df['description'] = (df['manufacturer'].fillna('') + ' ' + df['typecode'].fillna('').astype(str) + ' ' + df['type'].fillna('').astype(str)).str.strip()
df['lookup'] = df['description'].where(df['description'].str.len() > 0, df['Type'].fillna('') + ' ' + df['ICAOTypeCode'].astype(str))
df.head()

## Classify aircraft with AircraftClassifier (Anthropic)

In [None]:
import anthropic
import json
from typing import Dict, List, Optional

class AircraftClassifier:
    def __init__(self, api_key: str):
        """
        Initialize the Aircraft Classifier with Anthropic API key.

        Args:
            api_key: Your Anthropic API key
        """
        self.client = anthropic.Anthropic(api_key=api_key)

        # Define the classification scheme
        self.classification_scheme = {
            "civilian": [
                "airliner", "business jet", "helicopter", "general aviation",
                "glider", "balloon", "amphibian", "gyrocopter", "airship"
            ],
            "military": [
                "fighter", "bomber", "torpedo bomber", "trainer", "transport",
                "tanker", "reconnaissance", "liaison", "helicopter",
                "maritime patrol", "electronic warfare", "uav"
            ]
        }

        self.system_prompt = """You are an expert aircraft classifier. Your job is to classify aircraft according to a specific scheme.

CLASSIFICATION SCHEME:

Civilian Categories (9):
- airliner: Commercial passenger aircraft
- business jet: Corporate/private jets
- helicopter: Civilian rotorcraft
- general aviation: Light aircraft, private planes, experimental aircraft, ultralights, turboprops
- glider: Unpowered sailplanes and gliders
- balloon: Hot air balloons and gas balloons
- amphibian: Aircraft that operate from both water and land
- gyrocopter: Autogyros and gyroplanes
- airship: Lighter-than-air dirigibles

Military Categories (12):
- fighter: Air-to-air combat aircraft
- bomber: Strategic/tactical bombers
- torpedo bomber: WWII-era torpedo attack aircraft
- trainer: Military training aircraft
- transport: Military cargo/personnel transport
- tanker: Aerial refueling aircraft
- reconnaissance: Surveillance/mapping aircraft
- liaison: Light military utility aircraft
- helicopter: Military rotorcraft
- maritime patrol: Ocean surveillance/ASW aircraft
- electronic warfare: Electronic countermeasures/AWACS
- uav: Unmanned aerial vehicles

EXAMPLES (two for each category):

CIVILIAN:
- airliner:
  * "Boeing B77W 777 312ER" → {"aircraft": "Boeing B77W 777 312ER", "category": "airliner", "military": false}
  * "Airbus A388 A380 861" → {"aircraft": "Airbus A388 A380 861", "category": "airliner", "military": false}
- business jet:
  * "Dassault F2LX Falcon 2000LX" → {"aircraft": "Dassault F2LX Falcon 2000LX", "category": "business jet", "military": false}
  * "Bombardier GL7T Global 7500" → {"aircraft": "Bombardier GL7T Global 7500", "category": "business jet", "military": false}
- helicopter:
  * "Robinson R22 R22 Alpha" → {"aircraft": "Robinson R22 R22 Alpha", "category": "helicopter", "military": false}
  * "Bell B06 206L Long Ranger" → {"aircraft": "Bell B06 206L Long Ranger", "category": "helicopter", "military": false}
- general aviation:
  * "Cessna C172 172R Skyhawk" → {"aircraft": "Cessna C172 172R Skyhawk", "category": "general aviation", "military": false}
  * "Piper P28A PA-28 180 Cherokee B" → {"aircraft": "Piper P28A PA-28 180 Cherokee B", "category": "general aviation", "military": false}
- glider:
  * "Schleicher GLID ASW 27" → {"aircraft": "Schleicher GLID ASW 27", "category": "glider", "military": false}
  * "Schempp-Hirth NIMB Nimbus 4" → {"aircraft": "Schempp-Hirth NIMB Nimbus 4", "category": "glider", "military": false}
- balloon:
  * "Cameron BALL A-160 Hot Air Balloon" → {"aircraft": "Cameron BALL A-160 Hot Air Balloon", "category": "balloon", "military": false}
  * "Ultramagic BALL N-425 Hot Air Balloon" → {"aircraft": "Ultramagic BALL N-425 Hot Air Balloon", "category": "balloon", "military": false}
- amphibian:
  * "Grumman G44 Widgeon G-44A" → {"aircraft": "Grumman G44 Widgeon G-44A", "category": "amphibian", "military": false}
  * "AVIC AG60 AG600 M" → {"aircraft": "AVIC AG60 AG600 M", "category": "amphibian", "military": false}
- gyrocopter:
  * "AutoGyro Europe CLON Cavalon" → {"aircraft": "AutoGyro Europe CLON Cavalon", "category": "gyrocopter", "military": false}
  * "Magni Gyro MM16 M-16 Tandem Trainer" → {"aircraft": "Magni Gyro MM16 M-16 Tandem Trainer", "category": "gyrocopter", "military": false}
- airship:
  * "Zeppelin SHIP LZ N07-101 Airship" → {"aircraft": "Zeppelin SHIP LZ N07-101 Airship", "category": "airship", "military": false}
  * "Goodyear SHIP GZ-20A Blimp" → {"aircraft": "Goodyear SHIP GZ-20A Blimp", "category": "airship", "military": false}

MILITARY:
- fighter:
  * "North American P51 P-51D Mustang 20-NA" → {"aircraft": "North American P51 P-51D Mustang 20-NA", "category": "fighter", "military": true}
  * "General Dynamics F16 F-16BM Fighting Falcon" → {"aircraft": "General Dynamics F16 F-16BM Fighting Falcon", "category": "fighter", "military": true}
- bomber:
  * "North American B25 TB-25J Mitchell" → {"aircraft": "North American B25 TB-25J Mitchell", "category": "bomber", "military": true}
  * "English Electric CNBR Canberra TT.18" → {"aircraft": "English Electric CNBR Canberra TT.18", "category": "bomber", "military": true}
- torpedo bomber:
  * "General Motors TBM TBM-3S Avenger" → {"aircraft": "General Motors TBM TBM-3S Avenger", "category": "torpedo bomber", "military": true}
  * "Fairey SWOR Swordfish II" → {"aircraft": "Fairey SWOR Swordfish II", "category": "torpedo bomber", "military": true}
- trainer:
  * "North American T6 AT-6D Texan" → {"aircraft": "North American T6 AT-6D Texan", "category": "trainer", "military": true}
  * "British Aerospace HAWK Hawk T.1A" → {"aircraft": "British Aerospace HAWK Hawk T.1A", "category": "trainer", "military": true}
- transport:
  * "Lockheed C130 C-130H-30 Hercules" → {"aircraft": "Lockheed C130 C-130H-30 Hercules", "category": "transport", "military": true}
  * "Airbus Military A400 A400M" → {"aircraft": "Airbus Military A400 A400M", "category": "transport", "military": true}
- tanker:
  * "Boeing B762 KC-767J" → {"aircraft": "Boeing B762 KC-767J", "category": "tanker", "military": true}
  * "Ilyushin IL78 IL-78 MKI" → {"aircraft": "Ilyushin IL78 IL-78 MKI", "category": "tanker", "military": true}
- reconnaissance:
  * "Boeing R135 RC-135W Rivet Joint" → {"aircraft": "Boeing R135 RC-135W Rivet Joint", "category": "reconnaissance", "military": true}
  * "Grob EGRT G-520 Egrett" → {"aircraft": "Grob EGRT G-520 Egrett", "category": "reconnaissance", "military": true}
- liaison:
  * "Cessna O1 O-1A Bird Dog" → {"aircraft": "Cessna O1 O-1A Bird Dog", "category": "liaison", "military": true}
  * "Westland LYSA Lysander IIIA" → {"aircraft": "Westland LYSA Lysander IIIA", "category": "liaison", "military": true}
- helicopter:
  * "Bell UH1 UH-1P Iroquois" → {"aircraft": "Bell UH1 UH-1P Iroquois", "category": "helicopter", "military": true}
  * "Boeing-Vertol H47 CH-47D Chinook" → {"aircraft": "Boeing-Vertol H47 CH-47D Chinook", "category": "helicopter", "military": true}
- maritime patrol:
  * "Lockheed P3 P-3AM Orion" → {"aircraft": "Lockheed P3 P-3AM Orion", "category": "maritime patrol", "military": true}
  * "Consolidated P4Y Privateer PB4Y-2" → {"aircraft": "Consolidated P4Y Privateer PB4Y-2", "category": "maritime patrol", "military": true}
- electronic warfare:
  * "Boeing E3TF E-3A Sentry" → {"aircraft": "Boeing E3TF E-3A Sentry", "category": "electronic warfare", "military": true}
  * "Gulfstream Aerospace G550 E-550A CAEW" → {"aircraft": "Gulfstream Aerospace G550 E-550A CAEW", "category": "electronic warfare", "military": true}
- uav:
  * "General Atomics UAV MQ-9A Reaper" → {"aircraft": "General Atomics UAV MQ-9A Reaper", "category": "uav", "military": true}
  * "Northrop Grumman Q4 RQ-4D Phoenix" → {"aircraft": "Northrop Grumman Q4 RQ-4D Phoenix", "category": "uav", "military": true}

RULES:
1. Choose the most specific and accurate category
2. Set military=true for any military aircraft (including trainers, even if used by civilian organizations)
3. Set military=false for civilian aircraft
4. When in doubt between categories, choose the primary operational role
5. Respond ONLY with valid JSON in this exact format:
{"aircraft": "exact aircraft name", "category": "category_name", "military": true/false}"""

    def classify_aircraft(self, aircraft_name: str) -> Dict[str, any]:
        """
        Classify a single aircraft.

        Args:
            aircraft_name: The name/designation of the aircraft

        Returns:
            Dictionary with aircraft name, category, and military status
        """
        user_message = f"Classify this aircraft: {aircraft_name}"

        try:
            message = self.client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=150,
                system=self.system_prompt,
                messages=[
                    {"role": "user", "content": user_message}
                ]
            )

            # Parse the JSON response
            response_text = message.content[0].text.strip()
            classification = json.loads(response_text)

            # Validate the response
            if self._validate_classification(classification):
                return classification
            else:
                raise ValueError(f"Invalid classification returned: {classification}")

        except json.JSONDecodeError as e:
            raise ValueError(f"Failed to parse JSON response: {e}")
        except Exception as e:
            raise Exception(f"API call failed: {e}")

    def classify_aircraft_batch(self, aircraft_list: List[str], batch_size: int = 10) -> List[Dict[str, any]]:
        """
        Classify multiple aircraft efficiently using batch requests.

        Args:
            aircraft_list: List of aircraft names/designations
            batch_size: Number of aircraft to classify in each API call (default: 10)

        Returns:
            List of classification dictionaries
        """
        all_results = []

        # Process aircraft in batches
        for i in range(0, len(aircraft_list), batch_size):
            batch = aircraft_list[i:i + batch_size]

            try:
                batch_results = self._classify_batch_request(batch)
                all_results.extend(batch_results)
                print(f"✓ Classified batch {i//batch_size + 1}: {len(batch)} aircraft")
            except Exception as e:
                print(f"✗ Error classifying batch {i//batch_size + 1}: {e}")
                # Add error entries for the whole batch
                for aircraft in batch:
                    all_results.append({
                        "aircraft": aircraft,
                        "category": "error",
                        "military": None,
                        "error": str(e)
                    })

        return all_results

    def _classify_batch_request(self, aircraft_batch: List[str]) -> List[Dict[str, any]]:
        """
        Send a single API request to classify multiple aircraft.

        Args:
            aircraft_batch: List of aircraft names for this batch

        Returns:
            List of classification dictionaries
        """
        # Create numbered list for the prompt
        aircraft_list_text = "\n".join([f"{i+1}. {aircraft}" for i, aircraft in enumerate(aircraft_batch)])

        user_message = f"""Classify these {len(aircraft_batch)} aircraft. Return a JSON array with one object per aircraft in the same order:

{aircraft_list_text}

Respond with a JSON array like this:
[
  {{"aircraft": "exact aircraft name 1", "category": "category_name", "military": true/false}},
  {{"aircraft": "exact aircraft name 2", "category": "category_name", "military": true/false}}
]"""

        try:
            message = self.client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=4000,  # Increased for batch responses
                system=self.system_prompt,
                messages=[
                    {"role": "user", "content": user_message}
                ]
            )

            # Parse the JSON response
            response_text = message.content[0].text.strip()

            # Handle cases where response might have markdown code blocks
            if response_text.startswith("```json"):
                response_text = response_text.replace("```json", "").replace("```", "").strip()
            elif response_text.startswith("```"):
                response_text = response_text.replace("```", "").strip()

            classifications = json.loads(response_text)

            # Validate that we got the expected number of results
            if len(classifications) != len(aircraft_batch):
                raise ValueError(f"Expected {len(aircraft_batch)} classifications, got {len(classifications)}")

            # Validate each classification
            for i, classification in enumerate(classifications):
                if not self._validate_classification(classification):
                    raise ValueError(f"Invalid classification for aircraft {i+1}: {classification}")

            return classifications

        except json.JSONDecodeError as e:
            raise ValueError(f"Failed to parse JSON response: {e}")
        except Exception as e:
            raise Exception(f"Batch API call failed: {e}")

    def _validate_classification(self, classification: Dict) -> bool:
        """Validate that the classification follows the expected format and rules."""
        required_keys = {"aircraft", "category", "military"}

        # Check required keys exist
        if not all(key in classification for key in required_keys):
            return False

        # Check category is valid
        all_categories = self.classification_scheme["civilian"] + self.classification_scheme["military"]
        if classification["category"] not in all_categories:
            return False

        # Check military is boolean
        if not isinstance(classification["military"], bool):
            return False

        return True

    def get_classification_scheme(self) -> Dict:
        """Return the classification scheme for reference."""
        return self.classification_scheme

API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")  # Set your key
classifier = AircraftClassifier(API_KEY)

In [None]:
# Get unique descriptions to classify
unique_descriptions = df['lookup'].dropna().unique().tolist()
unique_descriptions = [u for u in unique_descriptions if u and str(u).strip()]
print(f"{len(unique_descriptions)} unique aircraft types to classify")

In [None]:
# Run classifier (requires ANTHROPIC_API_KEY)
# Classifications are returned in same order as unique_descriptions
if API_KEY:
    classifications = classifier.classify_aircraft_batch(unique_descriptions, batch_size=50)
    lookup_to_class = {unique_descriptions[i]: classifications[i] for i in range(min(len(unique_descriptions), len(classifications)))}
    df['category'] = df['lookup'].map(lambda x: lookup_to_class.get(x, {}).get('category'))
    df['military'] = df['lookup'].map(lambda x: lookup_to_class.get(x, {}).get('military'))
    df['aircraft'] = df['lookup'].map(lambda x: lookup_to_class.get(x, {}).get('aircraft', x))
else:
    print("Set ANTHROPIC_API_KEY to run classification")
    df['category'] = None
    df['military'] = None
    df['aircraft'] = df['description']

## Save to modes.csv

In [None]:
out = df.reset_index()[['hex', 'registration', 'manufacturer', 'typecode', 'type', 'owner', 'operator', 'aircraft', 'category', 'military']]
out.to_csv('modes.csv', index=False)
print(f"Saved {len(out)} rows to modes.csv")
out.head()