In [1]:
"""
NUFORC UFO Sightings Highlights Scraper
---------------------------------------

This script uses Selenium + BeautifulSoup + pandas to scrape the NUFORC
"Highlights" table across all paginated pages and save the data into a CSV file.

Why Selenium?
-------------
The NUFORC website dynamically renders the highlights table with JavaScript.
Requests or pandas.read_html alone won't see the table because it's not in
the raw HTML source. Selenium acts as a real browser, executes the JS, and
then gives us the fully-rendered HTML, which we can parse with BeautifulSoup.

Output
------
- A single CSV file containing all UFO sighting highlights (~15k records).
- Columns are preserved as they appear on the NUFORC website.

Requirements
------------
- Google Chrome browser
- ChromeDriver (must match your Chrome version)
- Python libraries: selenium, beautifulsoup4, pandas

Install dependencies (if needed):
    pip install selenium beautifulsoup4 pandas
"""

import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver

# -----------------------------------------------------------------------------
# 1. Setup Chrome WebDriver
# -----------------------------------------------------------------------------
# Make sure you have ChromeDriver installed and that it matches your Chrome version.
# Selenium will open a browser window to load and render each page.
driver = webdriver.Chrome()

# Storage for all dataframes scraped from each page
all_data = []

# -----------------------------------------------------------------------------
# 2. Loop through paginated results
# -----------------------------------------------------------------------------
# NUFORC highlights are paginated. Adjust the range if new pages are added later.
for page in range(0, 2000):  # currently ~18 pages = ~15k records
    url = f"https://nuforc.org/subndx/?id=highlights&pg={page}"
    driver.get(url)
    
    # Give the browser a couple of seconds to fully render the page
    time.sleep(2)
    
    # Parse rendered page with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    # Find the first table element on the page
    table = soup.find("table")
    
    # If no table is found, assume we've reached the end of pagination
    if not table:
        print(f"No table found on page {page}. Stopping scrape.")
        break
    
    # Convert the table HTML into a pandas DataFrame
    df = pd.read_html(str(table))[0]
    
    # Append this page's dataframe to our list
    all_data.append(df)
    print(f"Scraped page {page} with {len(df)} rows.")

# -----------------------------------------------------------------------------
# 3. Close the browser
# -----------------------------------------------------------------------------
driver.quit()

# -----------------------------------------------------------------------------
# 4. Combine all scraped pages into a single DataFrame
# -----------------------------------------------------------------------------
final = pd.concat(all_data, ignore_index=True)

# -----------------------------------------------------------------------------
# 5. Save the combined dataset to CSV
# -----------------------------------------------------------------------------
output_path = r"C:\git\DATA-501-Group-Project-UFO-Sightings-Data-Analysis\data\nuforc_highlights.csv"
final.to_csv(output_path, index=False)

print(f"Scraping complete! Saved {len(final)} records to {output_path}")

  df = pd.read_html(str(table))[0]


Scraped page 0 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 1 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 2 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 3 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 4 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 5 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 6 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 7 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 8 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 9 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 10 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 11 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 12 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 13 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 14 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 15 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 16 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 17 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 18 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 19 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 20 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 21 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 22 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 23 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 24 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 25 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 26 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 27 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 28 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 29 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 30 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 31 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 32 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 33 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 34 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 35 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 36 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 37 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 38 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 39 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 40 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 41 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 42 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 43 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 44 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 45 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 46 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 47 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 48 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 49 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 50 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 51 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 52 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 53 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 54 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 55 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 56 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 57 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 58 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 59 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 60 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 61 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 62 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 63 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 64 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 65 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 66 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 67 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 68 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 69 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 70 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 71 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 72 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 73 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 74 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 75 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 76 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 77 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 78 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 79 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 80 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 81 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 82 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 83 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 84 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 85 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 86 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 87 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 88 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 89 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 90 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 91 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 92 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 93 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 94 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 95 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 96 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 97 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 98 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 99 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 100 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 101 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 102 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 103 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 104 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 105 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 106 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 107 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 108 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 109 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 110 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 111 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 112 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 113 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 114 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 115 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 116 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 117 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 118 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 119 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 120 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 121 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 122 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 123 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 124 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 125 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 126 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 127 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 128 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 129 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 130 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 131 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 132 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 133 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 134 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 135 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 136 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 137 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 138 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 139 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 140 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 141 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 142 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 143 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 144 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 145 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 146 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 147 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 148 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 149 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 150 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 151 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 152 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 153 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 154 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 155 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 156 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 157 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 158 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 159 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 160 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 161 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 162 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 163 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 164 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 165 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 166 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 167 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 168 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 169 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 170 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 171 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 172 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 173 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 174 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 175 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 176 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 177 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 178 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 179 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 180 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 181 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 182 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 183 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 184 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 185 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 186 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 187 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 188 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 189 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 190 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 191 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 192 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 193 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 194 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 195 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 196 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 197 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 198 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 199 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 200 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 201 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 202 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 203 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 204 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 205 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 206 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 207 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 208 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 209 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 210 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 211 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 212 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 213 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 214 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 215 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 216 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 217 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 218 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 219 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 220 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 221 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 222 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 223 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 224 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 225 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 226 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 227 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 228 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 229 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 230 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 231 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 232 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 233 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 234 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 235 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 236 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 237 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 238 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 239 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 240 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 241 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 242 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 243 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 244 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 245 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 246 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 247 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 248 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 249 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 250 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 251 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 252 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 253 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 254 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 255 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 256 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 257 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 258 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 259 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 260 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 261 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 262 with 101 rows.


  df = pd.read_html(str(table))[0]


Scraped page 263 with 101 rows.


KeyboardInterrupt: 