### Purpose ###

A link-scraping tool for the one site every Navy Reservist needs to visit (unless they've memorized and/or bookmarked all 345,986 sites required to stay "administratively ready"), but wants to avoid falling into what is most likely the most ingenious place for an adversary 'watering hole' attack...

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import os

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import os

url = "https://www.kellybeamsley.com/navy/"

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/114.0.0.0 Safari/537.36"
    )
}

response = requests.get(url, headers=headers)
response.raise_for_status()

soup = BeautifulSoup(response.text, 'html.parser')
link_elements = soup.find_all('a', href=True)

links_data = []
for link in link_elements:
    href = link['href'].strip()
    text = link.get_text(strip=True)
    if not href or not text:
        continue

    full_url = href if href.startswith('http') else requests.compat.urljoin(url, href)

    # Skip internal links
    if full_url.startswith("https://www.kellybeamsley.com/"):
        continue

    links_data.append({'Link Text': text, 'URL': full_url})

df_links = pd.DataFrame(links_data)

# Ensure output directory exists
output_folder = "./output"
os.makedirs(output_folder, exist_ok=True)

# Append today's date to the filename
today_str = datetime.now().strftime("%Y%m%d")
filename = os.path.join(output_folder, f"kellybeamsley_external_links_{today_str}.csv")

# Save DataFrame to CSV
df_links.to_csv(filename, index=False)

# Use df_links.shape to output total external links (as of todays date ~ YYYYMMDD)
print(f"Total External Links as of {today_str[:4]}-{today_str[4:6]}-{today_str[6:8]}: {df_links.shape[0]}")
print(f"File saved to: {filename}")

Total External Links as of 2025-11-25: 2905
File saved to: ./output/kellybeamsley_external_links_20251125.csv


In [3]:
df_links.tail()

Unnamed: 0,Link Text,URL
2900,Active Duty Opportunities in the Navy Reserves,https://www.youtube.com/watch?v=Y_y52FyBVJ4&t=...
2901,U.S. Navy Reserve Capabilities,https://www.dvidshub.net/video/742645/us-navy-...
2902,ZULU Time,https://en.wikipedia.org/wiki/Coordinated_Univ...
2903,Zulu Time,https://www.youtube.com/results?search_query=Z...
2904,Kantipur Themes,https://kantipurthemes.com/


In [4]:
n = datetime.now().strftime("%Y-%m-%d")
n

'2025-11-25'

In [7]:
print(f"Total External Links as of {today_str[:4]}-{today_str[4:6]}-{today_str[6:8]}: {df_links.shape[0]}")

Total External Links as of 2025-10-16: 2905


In [5]:
print(f"Total External Links as of {n}: {df_links.shape[0]}")

Total External Links as of 2025-11-25: 2905


In [8]:
# To get ONLY '.mil' links, set up a 'mask':

mask_mil = df_links[df_links['URL'].str.contains('.mil', na=False)]
mask_mil.shape

(1552, 2)

In [10]:
mask_mil.tail()

Unnamed: 0,Link Text,URL
2890,TRIAD Resources,https://www.mynavyhr.navy.mil/Support-Services...
2891,EUCFR Guide,https://www.mynavyhr.navy.mil/Support-Services...
2892,Yellow Ribbon Reintegration Program,https://www.yellowribbon.mil/
2896,ZipServe,https://locker.private.navyreserve.navy.mil/zi...
2898,https://locker.private./opportunitiesnavyreser...,https://locker.private.navyreserve.navy.mil/zi...


In [11]:
# To output a '.mil' only links .csv:

# Append today's date to the filename
today_str = datetime.now().strftime("%Y%m%d") # We already have this built, but no harm in redundancy ;)

# Set up a new filename var to avoid overwriting earlier:
filename_mil = os.path.join(output_folder, f"kellybeamsley_MIL_links_{today_str}.csv")

# Save DataFrame to CSV
mask_mil.to_csv(filename, index=False)


print(f"Total MIL Links as of {today_str[:4]}-{today_str[4:6]}-{today_str[6:8]}: {mask_mil.shape[0]}")
print(f"File saved to: {filename_mil}")

Total MIL Links as of 2025-11-25: 1552
File saved to: ./output/kellybeamsley_MIL_links_20251125.csv
