# Eclipse Chatbot â€” LLM-Powered Viewing Advisor
Ask where and when to view an eclipse from your location, or query a specific eclipse at a specific place. The chatbot uses your eclipse database and an LLM to generate viewing advice.

In [None]:
import os
import json
import re
import numpy as np
from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI
import ipywidgets as widgets
from IPython.display import display, HTML

# Load API key from .env
load_dotenv()
api_key = os.environ.get("ASTRO1221_API_KEY")

if not api_key:
    print("âš   ASTRO1221_API_KEY not found in .env â€” check your file.")
else:
    print(f"âœ“ API key loaded (ends with ...{api_key[-4:]})")

client = OpenAI(api_key=api_key)

âœ“ API key loaded (ends with ...faow)
âœ— OpenAI error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-LT1uy*************faow. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'code': 'invalid_api_key', 'param': None}, 'status': 401}


In [3]:
# ============================================================
# Load eclipse database
# ============================================================
with open("eclipse_data.json") as f:
    data = json.load(f)

eclipse_list = data["eclipse_list"]
print(f"âœ“ Loaded {len(eclipse_list)} eclipses  ({eclipse_list[0]['date_raw']} â†’ {eclipse_list[-1]['date_raw']})")

âœ“ Loaded 224 eclipses  (2001 Jun 21 â†’ 2100 Sep 04)


In [4]:
# ============================================================
# ECLIPSE SEARCH & GEOMETRY FUNCTIONS
# ============================================================

MONTH_MAP = {
    "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4,
    "May": 5, "Jun": 6, "Jul": 7, "Aug": 8,
    "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12,
}


def parse_eclipse_date(date_str):
    """Convert '2024 Apr 08' â†’ datetime object. Returns None on failure."""
    try:
        parts = date_str.strip().split()
        year = int(parts[0])
        month = MONTH_MAP.get(parts[1], 1)
        day = int(parts[2])
        return datetime(year, month, day)
    except Exception:
        return None


def parse_coord(coord_str):
    """Parse '11S' â†’ -11.0, '131W' â†’ -131.0, etc."""
    if not coord_str or coord_str.strip() == "-":
        return 0.0
    m = re.match(r"(\d+)([NSEW])", coord_str.strip())
    if m:
        val = float(m.group(1))
        if m.group(2) in ("S", "W"):
            val = -val
        return val
    return 0.0


def haversine_km(lat1, lon1, lat2, lon2):
    """Great-circle distance in km."""
    R = 6371.0
    la1, lo1, la2, lo2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = la2 - la1, lo2 - lo1
    a = np.sin(dlat / 2) ** 2 + np.cos(la1) * np.cos(la2) * np.sin(dlon / 2) ** 2
    return R * 2 * np.arcsin(np.sqrt(np.clip(a, 0, 1)))


def eclipse_visibility_km(eclipse):
    """Rough visibility radius in km for this eclipse."""
    raw = eclipse.get("_raw", {})
    pw = raw.get("path_width_km", "-")
    try:
        path_w = float(pw)
    except (ValueError, TypeError):
        path_w = 0
    # Central eclipses: partial visibility extends ~3500 km beyond path edge
    # Partial eclipses: visibility ~2500 km from point of greatest eclipse
    return (path_w / 2 + 3500) if path_w > 0 else 2500


def is_visible_from(eclipse, lat, lon):
    """Check if an eclipse is roughly visible from (lat, lon)."""
    raw = eclipse.get("_raw", {})
    ecl_lat = parse_coord(raw.get("latitude", "0N"))
    ecl_lon = parse_coord(raw.get("longitude", "0E"))
    dist = haversine_km(lat, lon, ecl_lat, ecl_lon)
    return dist <= eclipse_visibility_km(eclipse)


def find_next_eclipses(lat, lon, n=3, after_date=None):
    """
    Find the next n eclipses visible from (lat, lon) after a given date.
    Defaults to today's date.
    """
    if after_date is None:
        after_date = datetime.now()

    results = []
    for ecl in eclipse_list:
        dt = parse_eclipse_date(ecl["date_raw"])
        if dt is None or dt < after_date:
            continue
        if is_visible_from(ecl, lat, lon):
            raw = ecl.get("_raw", {})
            ecl_lat = parse_coord(raw.get("latitude", "0N"))
            ecl_lon = parse_coord(raw.get("longitude", "0E"))
            dist = haversine_km(lat, lon, ecl_lat, ecl_lon)
            results.append((ecl, dt, dist))
        if len(results) >= n:
            break
    return results


def find_eclipse_by_date(target_date_str):
    """
    Find an eclipse matching a date string (flexible matching).
    Tries year-month-day, year-month, or just year.
    """
    target = target_date_str.strip().lower()
    matches = []
    for ecl in eclipse_list:
        raw_date = ecl["date_raw"].lower()
        if target in raw_date or raw_date in target:
            matches.append(ecl)
    # Also try partial matches (e.g. just "2024" or "apr 2024")
    if not matches:
        for ecl in eclipse_list:
            raw_date = ecl["date_raw"].lower()
            tokens = target.replace(",", " ").split()
            if all(t in raw_date for t in tokens):
                matches.append(ecl)
    return matches


def eclipse_summary(ecl, obs_lat=None, obs_lon=None):
    """Build a text summary of an eclipse for the LLM context."""
    raw = ecl.get("_raw", {})
    ecl_lat = parse_coord(raw.get("latitude", "0N"))
    ecl_lon = parse_coord(raw.get("longitude", "0E"))

    lines = [
        f"Date: {ecl['date_raw']}",
        f"Type: {ecl['type']} (code: {ecl.get('type_code', '?')})",
        f"Magnitude: {ecl.get('magnitude', '?')}",
        f"Saros: {ecl.get('saros', '?')}",
        f"Duration: {ecl.get('duration', 'N/A')}",
        f"Greatest Eclipse at: {ecl_lat:.1f}Â°N, {ecl_lon:.1f}Â°E",
        f"Path Width: {raw.get('path_width_km', 'N/A')} km",
        f"Gamma: {raw.get('gamma', '?')}",
    ]
    if obs_lat is not None and obs_lon is not None:
        dist = haversine_km(obs_lat, obs_lon, ecl_lat, ecl_lon)
        visible = is_visible_from(ecl, obs_lat, obs_lon)
        lines.append(f"Observer distance: {dist:,.0f} km from center")
        lines.append(f"Visible from observer: {'Yes' if visible else 'No / unlikely'}")
    return "\n".join(lines)


# Quick test
print("âœ“ Search functions loaded.")
test = find_next_eclipses(30.0, -97.0, n=2)  # Austin, TX
for ecl, dt, dist in test:
    print(f"  Next from Austin: {ecl['date_raw']}  {ecl['type']}  ({dist:,.0f} km away)")

âœ“ Search functions loaded.
  Next from Austin: 2045 Aug 12  Total  (1,820 km away)
  Next from Austin: 2052 Mar 30  Total  (1,072 km away)


In [5]:
# ============================================================
# LLM INTEGRATION â€” prompt builder + chat function
# ============================================================

SYSTEM_PROMPT = """You are an expert solar eclipse advisor. You help people find
and plan for solar eclipses based on a NASA catalog of 224 eclipses from 2001â€“2100.

When the user asks about eclipses, you will receive ECLIPSE DATA pulled from the
database as context. Use that data to give accurate, specific answers.

Your capabilities:
â€¢ Tell users the next visible eclipse(s) from their location
â€¢ Describe what a specific eclipse will look like from a given place
â€¢ Provide viewing advice (safety, best locations along the path, weather tips)
â€¢ Explain eclipse types (Total, Annular, Hybrid, Partial) and what they look like
â€¢ Suggest the best lat/lon coordinates for viewing a given eclipse

When recommending a viewing location, always include the latitude and longitude
so the user can plug them into the visualization tool. Format coordinates as:
  **Recommended viewing: XX.XÂ°N, XX.XÂ°E**

Keep answers concise but informative. Use the eclipse data provided â€” don't invent
eclipse dates or magnitudes."""


# Conversation history (persists across messages)
chat_history = []


def build_eclipse_context(user_message):
    """
    Analyze the user's message and pull relevant eclipse data to
    inject as context for the LLM.
    """
    msg = user_message.lower()
    context_parts = []

    # --- Try to extract coordinates from the message ---
    obs_lat, obs_lon = None, None

    # Match patterns like "30N 97W", "30.5, -97.2", "lat 30 lon -97"
    coord_patterns = [
        r'(\-?\d+\.?\d*)\s*Â°?\s*[NnSs]?\s*,?\s*(\-?\d+\.?\d*)\s*Â°?\s*[EeWw]?',
        r'lat(?:itude)?\s*[:=]?\s*(\-?\d+\.?\d*)\s*,?\s*lon(?:gitude)?\s*[:=]?\s*(\-?\d+\.?\d*)',
    ]
    for pat in coord_patterns:
        m = re.search(pat, user_message)
        if m:
            obs_lat = float(m.group(1))
            obs_lon = float(m.group(2))
            break

    # --- Check for well-known city names â†’ approximate coords ---
    city_coords = {
        "new york": (40.7, -74.0), "los angeles": (34.1, -118.2),
        "chicago": (41.9, -87.6), "houston": (29.8, -95.4),
        "austin": (30.3, -97.7), "dallas": (32.8, -96.8),
        "denver": (39.7, -105.0), "seattle": (47.6, -122.3),
        "miami": (25.8, -80.2), "atlanta": (33.7, -84.4),
        "london": (51.5, -0.1), "paris": (48.9, 2.3),
        "tokyo": (35.7, 139.7), "sydney": (-33.9, 151.2),
        "cairo": (30.0, 31.2), "mumbai": (19.1, 72.9),
        "beijing": (39.9, 116.4), "mexico city": (19.4, -99.1),
        "toronto": (43.7, -79.4), "berlin": (52.5, 13.4),
        "rome": (41.9, 12.5), "madrid": (40.4, -3.7),
        "san francisco": (37.8, -122.4), "phoenix": (33.4, -112.0),
        "boston": (42.4, -71.1), "washington": (38.9, -77.0),
        "nashville": (36.2, -86.8), "portland": (45.5, -122.7),
        "indianapolis": (39.8, -86.2), "cleveland": (41.5, -81.7),
    }
    for city, (clat, clon) in city_coords.items():
        if city in msg:
            obs_lat, obs_lon = clat, clon
            context_parts.append(f"[Detected city: {city.title()} â†’ {clat}Â°N, {clon}Â°E]")
            break

    # --- "Next eclipse" query ---
    if any(kw in msg for kw in ["next", "upcoming", "when", "soonest", "future"]):
        if obs_lat is not None:
            results = find_next_eclipses(obs_lat, obs_lon, n=3)
            if results:
                context_parts.append(f"NEXT ECLIPSES VISIBLE FROM ({obs_lat}Â°N, {obs_lon}Â°E):")
                for ecl, dt, dist in results:
                    context_parts.append(eclipse_summary(ecl, obs_lat, obs_lon))
                    context_parts.append("---")
            else:
                context_parts.append(f"No upcoming eclipses found visible from ({obs_lat}, {obs_lon}) in the database.")

    # --- Specific date query ---
    date_patterns = [
        r'(\d{4}\s+\w{3}\s+\d{1,2})',        # "2024 Apr 08"
        r'(\w+\s+\d{1,2},?\s+\d{4})',          # "April 8, 2024"
        r'(\d{4})',                              # just a year
    ]
    for pat in date_patterns:
        m = re.search(pat, user_message)
        if m:
            date_str = m.group(1)
            matches = find_eclipse_by_date(date_str)
            if matches:
                context_parts.append(f"ECLIPSES MATCHING '{date_str}':")
                for ecl in matches[:5]:
                    context_parts.append(eclipse_summary(ecl, obs_lat, obs_lon))
                    context_parts.append("---")
            break

    # --- Eclipse type query ---
    for etype in ["total", "annular", "hybrid", "partial"]:
        if etype in msg:
            type_eclipses = [e for e in eclipse_list if e["type"].lower() == etype]
            context_parts.append(f"DATABASE: {len(type_eclipses)} {etype} eclipses in catalog.")
            # Show a few upcoming ones
            now = datetime.now()
            upcoming = [(e, parse_eclipse_date(e["date_raw"]))
                        for e in type_eclipses
                        if parse_eclipse_date(e["date_raw"]) and
                           parse_eclipse_date(e["date_raw"]) > now][:3]
            for e, dt in upcoming:
                context_parts.append(eclipse_summary(e, obs_lat, obs_lon))
                context_parts.append("---")
            break

    # --- General stats if no specific context was found ---
    if not context_parts:
        context_parts.append(
            f"DATABASE: {len(eclipse_list)} solar eclipses from 2001â€“2100. "
            f"Types: Total (68), Annular (72), Hybrid (7), Partial (77). "
            f"Ask about a specific date, location, or eclipse type for detailed info."
        )

    return "\n".join(context_parts), obs_lat, obs_lon


def chat(user_message):
    """
    Send a message to the chatbot. Returns the assistant's reply
    and any extracted coordinates (lat, lon) or (None, None).
    """
    # Build eclipse context
    context, obs_lat, obs_lon = build_eclipse_context(user_message)

    # Add context as a hidden system-level note
    augmented_msg = f"{user_message}\n\n[ECLIPSE DATABASE CONTEXT]\n{context}"

    chat_history.append({"role": "user", "content": augmented_msg})

    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + chat_history

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.7,
            max_tokens=800,
        )
        reply = response.choices[0].message.content
    except Exception as e:
        reply = f"âš  LLM error: {e}"

    chat_history.append({"role": "assistant", "content": reply})

    return reply, obs_lat, obs_lon


print("âœ“ Chat function ready.")

âœ“ Chat function ready.


In [6]:
# ============================================================
# CHAT INTERFACE
# ============================================================
# Type a question and press Enter (or click Send).
# The chatbot will search the eclipse database, inject relevant
# data as context, and use the LLM to answer.
#
# Try:
#   "When is the next eclipse visible from Austin?"
#   "Tell me about the 2026 total eclipse"
#   "Where should I go to see the 2027 Aug total eclipse?"
#   "What eclipses can I see from Tokyo in the next 20 years?"
# ============================================================

# --- Widgets ---
chat_output = widgets.Output(layout={"border": "1px solid #444", "width": "100%",
                                      "min_height": "300px", "max_height": "500px",
                                      "overflow_y": "auto"})
text_input = widgets.Text(
    placeholder="Ask about an eclipseâ€¦ (press Enter)",
    layout={"width": "80%"},
)
send_btn = widgets.Button(description="Send", button_style="primary",
                          layout={"width": "18%"})
coord_display = widgets.HTML(value="<i>No coordinates detected yet.</i>")

# Store last detected coords so user can copy them to the visualization
last_coords = {"lat": None, "lon": None}


def append_chat(role, text):
    """Append a message to the chat display."""
    with chat_output:
        if role == "user":
            display(HTML(
                f'<div style="margin:6px 0;padding:8px 12px;background:#1a3a5c;'
                f'color:#ddd;border-radius:10px;text-align:right;">'
                f'<b>You:</b> {text}</div>'
            ))
        else:
            display(HTML(
                f'<div style="margin:6px 0;padding:8px 12px;background:#2a2a2a;'
                f'color:#eee;border-radius:10px;">'
                f'<b>ðŸŒ’ Eclipse Bot:</b><br>{text}</div>'
            ))


def on_send(_=None):
    msg = text_input.value.strip()
    if not msg:
        return
    text_input.value = ""

    append_chat("user", msg)

    # Show "thinkingâ€¦"
    with chat_output:
        thinking = display(HTML('<i style="color:#888;">Thinkingâ€¦</i>'), display_id=True)

    reply, lat, lon = chat(msg)

    # Replace "thinking" with actual reply
    with chat_output:
        if thinking:
            thinking.update(HTML(""))  # clear thinking indicator
        append_chat("assistant", reply.replace("\n", "<br>"))

    # Update coord display
    if lat is not None and lon is not None:
        last_coords["lat"] = lat
        last_coords["lon"] = lon
        coord_display.value = (
            f'<b>Last detected location:</b> {lat:.1f}Â°N, {lon:.1f}Â°E  '
            f'<span style="color:#888;">â€” use these in the Visualization notebook</span>'
        )


text_input.on_submit(on_send)
send_btn.on_click(on_send)

# --- Layout ---
display(widgets.VBox([
    chat_output,
    widgets.HBox([text_input, send_btn]),
    coord_display,
]))

  text_input.on_submit(on_send)


VBox(children=(Output(layout=Layout(border_bottom='1px solid #444', border_left='1px solid #444', border_rightâ€¦