<a href="https://colab.research.google.com/github/ianellisjones/usn/blob/main/Auto_Map_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
US Navy "Big Deck" Fleet Tracker (CVN/LHA/LHD)

A scraping utility to track the deployment history of US Navy
Aircraft Carriers (CVN) and Amphibious Assault Ships (LHA/LHD).

This tool bypasses standard browser truncation issues by fetching raw HTML
and using context-aware parsing to determine the latest physical location,
status, and date for the fleet. Source: U.S. Carriers Net
"""

import csv
import re
from pathlib import Path
from typing import List, Tuple, Dict

import requests
from bs4 import BeautifulSoup

# --- Configuration ---

# Comprehensive list of "Big Deck" Flattops
FLEET_URLS: List[str] = [
    # Aircraft Carriers (CVN)
    "http://uscarriers.net/cvn68history.htm", # USS Nimitz
    "http://uscarriers.net/cvn69history.htm", # USS Eisenhower
    "http://uscarriers.net/cvn70history.htm", # USS Carl Vinson
    "http://uscarriers.net/cvn71history.htm", # USS Theodore Roosevelt
    "http://uscarriers.net/cvn72history.htm", # USS Abraham Lincoln
    "http://uscarriers.net/cvn73history.htm", # USS George Washington
    "http://uscarriers.net/cvn74history.htm", # USS John C. Stennis
    "http://uscarriers.net/cvn75history.htm", # USS Harry S. Truman
    "http://uscarriers.net/cvn76history.htm", # USS Ronald Reagan
    "http://uscarriers.net/cvn77history.htm", # USS George H.W. Bush
    "http://uscarriers.net/cvn78history.htm", # USS Gerald R. Ford

    # Amphibious Assault Ships (LHA/LHD)
    "http://uscarriers.net/lhd1history.htm", # USS Wasp
    "http://uscarriers.net/lhd2history.htm", # USS Essex
    "http://uscarriers.net/lhd3history.htm", # USS Kearsarge
    "http://uscarriers.net/lhd4history.htm", # USS Boxer
    "http://uscarriers.net/lhd5history.htm", # USS Bataan
    "http://uscarriers.net/lhd7history.htm", # USS Iwo Jima
    "http://uscarriers.net/lhd8history.htm", # USS Makin Island
    "http://uscarriers.net/lha6history.htm", # USS America
    "http://uscarriers.net/lha7history.htm", # USS Tripoli
]

OUTPUT_FILENAME = "big_deck_status.csv"
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'


def fetch_history_text(url: str, char_limit: int = 50000) -> str:
    """
    Fetches the raw HTML content, strips tags, and returns the tail of the text.

    Args:
        url: The URL to scrape.
        char_limit: Number of characters to retrieve from the end of the file
                    to ensure capture of recent logs in verbose history files.

    Returns:
        Cleaned text string from the bottom of the page.
    """
    try:
        response = requests.get(url, headers={'User-Agent': USER_AGENT}, timeout=20)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        full_text = soup.get_text(separator='\n')

        # Normalize whitespace to handle inconsistent HTML formatting
        lines = [line.strip() for line in full_text.split('\n') if line.strip()]
        clean_text = '\n'.join(lines)

        # Return only the tail end where recent history resides
        return clean_text[-char_limit:] if len(clean_text) > char_limit else clean_text

    except requests.RequestException as e:
        return f"ERROR: {str(e)}"


def parse_status_entry(text_block: str) -> Tuple[str, str]:
    """
    Parses the text block to identify the most recent status entry.

    Implements 'Contextual Year Tracking' to handle log entries that lack
    explicit year headers by inheriting the last detected year context.
    """
    lines = text_block.split('\n')

    # 1. Determine Context Year
    # Scans the entire block for years to handle cut-off headers.
    current_year = "Unknown"
    years_found = re.findall(r'(202[3-6])', text_block)

    if years_found:
        # Prioritize 'Current' years (2024/2025) over future projection dates (2026+)
        priority_years = [y for y in years_found if y in ['2024', '2025']]
        current_year = priority_years[-1] if priority_years else years_found[-1]

    processed_lines = []
    running_year = current_year

    # 2. Line-by-Line Parsing
    for line in lines:
        # Update running context if line starts with a year header
        year_match = re.search(r'^202[3-6]', line)
        if year_match:
            running_year = year_match.group(0)

        processed_lines.append({'text': line, 'year': running_year})

    # 3. Define Filters
    keywords = [
        "moored", "anchored", "underway", "arrived", "departed",
        "transited", "operations", "returned", "participated", "conducted",
        "moved to", "visited", "pulled into", "sea trials", "flight deck certification"
    ]

    allowed_years = ["2024", "2025", "2026"]

    # 4. Bottom-up Search (Latest entry first)
    for entry in reversed(processed_lines):
        text_lower = entry['text'].lower()
        year = entry['year']

        if year in allowed_years and any(k in text_lower for k in keywords):
            # Filter out range summaries (e.g. "From Jan - Mar")
            if text_lower.strip().startswith("from ") and " - " in text_lower:
                continue
            return year, entry['text']

    return current_year, "No status found."


def categorize_location(text: str) -> str:
    """
    Maps specific keywords in the status text to high-level location tags.
    Includes departure overrides to show current ocean rather than origin port.
    """
    text = text.lower()

    # --- MOVEMENT OVERRIDES ---
    # These prioritize the destination/body of water over the port name
    if "departed san diego" in text:
        return "Pacific Ocean"
    if "departed norfolk" in text:
        return "Atlantic Ocean"
    if "departed pearl harbor" in text:
        return "Pacific Ocean"
    if "departed mayport" in text:
        return "Atlantic Ocean"

    # --- LOCATION MAPPING ---
    location_map = {
        # Ports / Shipyards
        "Norfolk / Portsmouth": ["norfolk", "portsmouth", "virginia beach", "nassco"],
        "San Diego": ["san diego", "north island", "camp pendleton"],
        "Bremerton / Kitsap": ["bremerton", "kitsap"],
        "Newport News": ["newport news"],
        "Yokosuka": ["yokosuka"],
        "Pearl Harbor": ["pearl harbor"],
        "Mayport": ["mayport"],
        "Everett": ["everett"],
        "Singapore": ["singapore", "changi"],
        "Bahrain": ["bahrain", "manama"],
        "Dubai": ["dubai", "jebel ali"],
        "Busan": ["busan"],
        "Guam": ["guam", "apra"],
        "Sasebo": ["sasebo", "juliet basin"],
        "Malaysia": ["malaysia", "klang"],
        "Philippines": ["philippines", "manila", "subic"],
        "Pascagoula": ["pascagoula"],

        # Specific Strategic Regions
        "South China Sea": ["south china sea", "spratly islands", "luzon"],
        "Western Pacific (WESTPAC)": ["san bernardino strait", "western pacific", "westpac"],
        "Red Sea": ["red sea"],
        "Persian Gulf": ["persian gulf", "arabian gulf"],
        "Gulf of Oman": ["gulf of oman"],
        "Gulf of Aden": ["gulf of aden"],
        "Mediterranean": ["mediterranean"],
        "Caribbean Sea": ["caribbean", "st. croix", "trinidad", "tobago", "puerto rico"],
        "North Sea": ["north sea"],
        "Norwegian Sea": ["norwegian sea"],
        "Strait of Gibraltar": ["gibraltar"],
        "Suez Canal": ["suez"],
        "Bab el-Mandeb": ["bab el-mandeb"],

        # Broad Oceans (Prioritized Last)
        "Philippine Sea": ["philippine sea", "okinawa"],
        "Atlantic Ocean": ["atlantic"],
        "Pacific Ocean": ["pacific"],
        "Indian Ocean": ["indian ocean"],
    }

    for label, keywords in location_map.items():
        if any(k in text for k in keywords):
            return label

    return "Underway / Unknown"


def extract_date(text: str) -> str:
    """Extracts the last specific date (Month Day) mentioned in the text."""
    pattern = r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2}'
    matches = re.findall(pattern, text, re.IGNORECASE)
    return matches[-1] if matches else "Date Unspecified"


def main():
    print(f"{'='*90}")
    print(f"US NAVY BIG DECK TRACKER (CVN + LHA/LHD)")
    print(f"{'='*90}\n")

    results: List[Dict[str, str]] = []

    for url in FLEET_URLS:
        # Extract hull number (Supports CVN, LHA, LHD)
        hull_match = re.search(r'((?:cvn|lha|lhd)\d+)', url, re.IGNORECASE)
        hull = hull_match.group(1).upper() if hull_match else "UNK"

        raw_text = fetch_history_text(url)

        if "ERROR" in raw_text:
            year, status, loc_tag, date_str = "Error", raw_text, "Error", "Error"
        else:
            year, status = parse_status_entry(raw_text)
            loc_tag = categorize_location(status)
            date_str = extract_date(status)

            # Fallback to year if no specific date found
            if date_str == "Date Unspecified":
                date_str = year

        results.append({
            "Hull": hull,
            "Location": loc_tag,
            "Date": date_str,
            "Status Sentence": status,
            "Source URL": url
        })

        # Console Output Format: [HULL] [LOCATION] [DATE] SENTENCE
        print(f"[{hull}] [{loc_tag}] [{date_str}] {status}")

    # Write results to CSV
    try:
        output_path = Path(OUTPUT_FILENAME)
        with output_path.open(mode='w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ["Hull", "Location", "Date", "Status Sentence", "Source URL"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(results)

        print(f"\n{'='*90}")
        print(f"SUCCESS: Report saved to '{output_path.absolute()}'")
        print(f"{'='*90}")

    except PermissionError:
        print(f"\nERROR: Could not write to {OUTPUT_FILENAME}. Is the file open in Excel?")

if __name__ == "__main__":
    main()

US NAVY BIG DECK TRACKER (CVN + LHA/LHD)

[CVN68] [South China Sea] [Nov. 18] From November 8-11, the Nimitz CSG conducted operations  off the coast of Brunei; Conducted operations northeast of Spratly Islands from Nov. 12-13; Conducted operations off the southwest coast of Luzon from Nov. 14-17; Transited the San Bernardino Strait northbound on Nov. 18.
[CVN69] [Norfolk / Portsmouth] [Jan. 8] September 26, USS Dwight D. Eisenhower moored at Pier 12N on Naval Station Norfolk after a six-day underway for TRACOM-CQ, in the Jacksonville Op. Area; Moved "dead-stick" to Super Pier 5N in Norfolk Naval Shipyard, for a Planned Incremental Availability (PIA), on Jan. 8.
[CVN70] [San Diego] [September 16] September 16, The Carl Vinson moored at Juliet Pier on Naval Air Station North Island after a three-day underway for ammo offload.
[CVN71] [San Diego] [Nov. 10] November  8, USS Theodore Roosevelt moored at Berth Lima on Naval Air Station North Island   after a 12-day underway for Tailored Ship

In [None]:
"""
US Navy Fleet Mapper

Visualizes the output of the 'Big Deck Tracker' on an interactive map.
Reads 'big_deck_status.csv' and generates 'fleet_map.html'.

Uses the Folium library for Leaflet.js mapping.
"""

import pandas as pd
import folium
import os
from folium.plugins import MarkerCluster

# --- CONFIGURATION ---
INPUT_FILE = "big_deck_status.csv"
OUTPUT_FILE = "fleet_map.html"

# --- COORDINATE DICTIONARY ---
# Maps the "High Level Location" tags from the scraper to [Lat, Lon]
# Coordinates are approximate centers of ports or regions.
LOCATION_COORDS = {
    # US Ports
    "norfolk / portsmouth": [36.96, -76.32],
    "san diego": [32.68, -117.18],
    "bremerton / kitsap": [47.55, -122.64],
    "everett": [47.98, -122.22],
    "newport news": [36.98, -76.44],
    "mayport": [30.39, -81.42],
    "pearl harbor": [21.35, -157.97],
    "pascagoula": [30.34, -88.56],

    # Foreign Ports
    "yokosuka": [35.29, 139.66],
    "sasebo": [33.16, 129.71],
    "busan": [35.10, 129.11],
    "guam": [13.44, 144.65],
    "singapore": [1.30, 103.85],
    "bahrain": [26.22, 50.61],
    "dubai": [25.26, 55.30],
    "philippines": [14.66, 120.76], # Subic/Manila general area
    "malaysia": [3.00, 101.38], # Port Klang area

    # Regions / Seas (Approximate Centroids)
    "south china sea": [12.00, 114.00],
    "philippine sea": [20.00, 130.00],
    "western pacific (westpac)": [15.00, 135.00],
    "persian gulf": [27.00, 51.00],
    "red sea": [20.00, 38.00],
    "gulf of oman": [24.00, 58.00],
    "gulf of aden": [12.00, 48.00],
    "mediterranean": [35.00, 18.00],
    "caribbean sea": [15.00, -75.00], # Central Caribbean
    "north sea": [56.00, 3.00],
    "norwegian sea": [66.00, 5.00],
    "strait of gibraltar": [35.95, -5.60],
    "suez canal": [30.60, 32.33],
    "bab el-mandeb": [12.58, 43.33],

    # Broad Oceans (Generic Markers)
    "atlantic ocean": [33.00, -60.00], # Mid-Atlantic
    "pacific ocean": [25.00, -150.00], # Mid-Pacific
    "indian ocean": [-5.00, 80.00],

    # Fallback
    "underway / unknown": [0.0, 0.0] # Null Island
}

def get_coords(location_tag):
    """Returns [lat, lon] for a given location tag, or None if not found."""
    tag = str(location_tag).lower().strip()
    return LOCATION_COORDS.get(tag, None)

def generate_map():
    if not os.path.exists(INPUT_FILE):
        print(f"ERROR: Could not find {INPUT_FILE}. Run the scraper first!")
        return

    print(f"Reading fleet data from {INPUT_FILE}...")
    df = pd.read_csv(INPUT_FILE)

    # Initialize Map centered on the world
    m = folium.Map(location=[20, 0], zoom_start=2, tiles="CartoDB positron")

    # Create feature groups for easy toggling
    carrier_group = folium.FeatureGroup(name="Aircraft Carriers (CVN)")
    amphib_group = folium.FeatureGroup(name="Amphibious Ships (LHA/LHD)")

    marker_count = 0

    for index, row in df.iterrows():
        hull = row['Hull']
        loc_tag = row['Location']
        status = row['Status Sentence']
        date = row['Date']

        coords = get_coords(loc_tag)

        if coords:
            # Define Icon: Blue for CVN, Red for LHA/LHD
            if "CVN" in hull:
                icon_color = "blue"
                icon_type = "plane" # FontAwesome icon
                target_group = carrier_group
            else:
                icon_color = "red"
                icon_type = "anchor"
                target_group = amphib_group

            # Create Popup Content (HTML)
            popup_html = f"""
            <div style="width:300px">
                <h4><b>{hull}</b></h4>
                <p><b>Date:</b> {date}</p>
                <p><b>Location:</b> {loc_tag.title()}</p>
                <hr>
                <p><i>"{status}"</i></p>
            </div>
            """

            # Add Marker
            folium.Marker(
                location=coords,
                popup=folium.Popup(popup_html, max_width=320),
                tooltip=f"{hull} - {loc_tag.title()}",
                icon=folium.Icon(color=icon_color, icon=icon_type, prefix='fa')
            ).add_to(target_group)

            marker_count += 1
        else:
            print(f"WARNING: No coordinates found for location tag: '{loc_tag}' (Ship: {hull})")

    # Add layers to map
    carrier_group.add_to(m)
    amphib_group.add_to(m)
    folium.LayerControl().add_to(m)

    # Save map
    m.save(OUTPUT_FILE)
    print(f"\n{'='*60}")
    print(f"SUCCESS: Map generated with {marker_count} ships.")
    print(f"Open '{os.path.abspath(OUTPUT_FILE)}' in your web browser.")
    print(f"{'='*60}")

if __name__ == "__main__":
    generate_map()

Reading fleet data from big_deck_status.csv...

SUCCESS: Map generated with 20 ships.
Open '/content/fleet_map.html' in your web browser.
