In [2]:
%pip install beautifulsoup4 requests
%pip install "apache-airflow[celery]==3.0.0" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-3.0.0/constraints-3.10.txt"


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import os
import re
from urllib.parse import urljoin
from PIL import Image
import glob


In [None]:
file_path = "data/hurdat2-1851-2024-040425.csv"

records = []
storm_id, storm_name = None, None

with open(file_path, "r") as f:
    for line in f:
        parts = [p.strip() for p in line.split(",")]
        if len(parts) == 4:
            storm_id, storm_name, count, nan = parts
            continue
        
        if len(parts) > 4:
            date, time, record_id, status, lat, lon, maxwind, minpressure, thirtyne, thirtyse, thirtysw, thirtynw, fivene, fivese, fivesw, fivenw, sixne, sixse, sixsw, sixnw, radmaxwind = parts
            records.append([
                storm_id, storm_name, date, time, record_id, status,
                lat, lon, maxwind, minpressure, thirtyne, thirtyse, thirtysw,
                thirtynw, fivene, fivese, fivesw, fivenw, sixne, sixse, sixsw,
                sixnw, radmaxwind
            ])

# Define column names
cols = ["SID", "Name", "Date", "Time", "RecordID", "Status", "Latitude", "Longitude",
        "MaxWind(kn)", "MinPressure", "34NE", "34SE", "34SW", "34NW", "50NE",
        "50SE", "50SW", "50NW", "64NE", "64SE", "64SW", "64NW", "RadiusOfMaxWind"]

df = pd.DataFrame(records, columns=cols)

df = df.replace(["", " ", "-999"], pd.NA)

df

Unnamed: 0,SID,Name,Date,Time,RecordID,Status,Latitude,Longitude,MaxWind(kn),MinPressure,...,34NW,50NE,50SE,50SW,50NW,64NE,64SE,64SW,64NW,RadiusOfMaxWind
0,AL011851,UNNAMED,18510625,0000,,HU,28.0N,94.8W,80,,...,,,,,,,,,,
1,AL011851,UNNAMED,18510625,0600,,HU,28.0N,95.4W,80,,...,,,,,,,,,,
2,AL011851,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80,,...,,,,,,,,,,
3,AL011851,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80,,...,,,,,,,,,,
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2N,96.8W,80,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55225,AL192024,SARA,20241117,0600,,TS,16.5N,87.5W,35,1001,...,90,0,0,0,0,0,0,0,0,40
55226,AL192024,SARA,20241117,1200,,TS,16.8N,87.9W,35,1001,...,60,0,0,0,0,0,0,0,0,40
55227,AL192024,SARA,20241117,1400,L,TS,17.0N,88.3W,35,1001,...,60,0,0,0,0,0,0,0,0,40
55228,AL192024,SARA,20241117,1800,,TD,17.4N,89.1W,30,1003,...,0,0,0,0,0,0,0,0,0,50


In [3]:
# Convert date column to datetime.date (day resolution)
df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d").dt.date

# Convert time column to datetime.time (hour + minute only)
df["Time"] = pd.to_datetime(df["Time"].str.zfill(4), format="%H%M").dt.time

df

Unnamed: 0,SID,Name,Date,Time,RecordID,Status,Latitude,Longitude,MaxWind(kn),MinPressure,...,34NW,50NE,50SE,50SW,50NW,64NE,64SE,64SW,64NW,RadiusOfMaxWind
0,AL011851,UNNAMED,1851-06-25,00:00:00,,HU,28.0N,94.8W,80,,...,,,,,,,,,,
1,AL011851,UNNAMED,1851-06-25,06:00:00,,HU,28.0N,95.4W,80,,...,,,,,,,,,,
2,AL011851,UNNAMED,1851-06-25,12:00:00,,HU,28.0N,96.0W,80,,...,,,,,,,,,,
3,AL011851,UNNAMED,1851-06-25,18:00:00,,HU,28.1N,96.5W,80,,...,,,,,,,,,,
4,AL011851,UNNAMED,1851-06-25,21:00:00,L,HU,28.2N,96.8W,80,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55225,AL192024,SARA,2024-11-17,06:00:00,,TS,16.5N,87.5W,35,1001,...,90,0,0,0,0,0,0,0,0,40
55226,AL192024,SARA,2024-11-17,12:00:00,,TS,16.8N,87.9W,35,1001,...,60,0,0,0,0,0,0,0,0,40
55227,AL192024,SARA,2024-11-17,14:00:00,L,TS,17.0N,88.3W,35,1001,...,60,0,0,0,0,0,0,0,0,40
55228,AL192024,SARA,2024-11-17,18:00:00,,TD,17.4N,89.1W,30,1003,...,0,0,0,0,0,0,0,0,0,50


In [6]:
url = "https://www.star.nesdis.noaa.gov/GOES/sector_band.php?sat=G19&sector=taw&band=10&length=12&dim=0"

response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

img_urls = [urljoin(url, img.get("src")) for img in soup.find_all("img") if img.get("src")]

# Filter for real satellite frame URLs (GOES imagery)
frame_urls = [u for u in img_urls if "GOES19" in u and u.endswith(".jpg")]

print(f"Found {len(frame_urls)} image frames")

os.makedirs("data/satellite_obs/frames", exist_ok=True)
for i, frame_url in enumerate(frame_urls):
    img_data = requests.get(frame_url).content
    with open(f"data/satellite_obs/frames/frame_{i:02d}.jpg", "wb") as f:
        f.write(img_data)

Found 12 image frames


In [None]:
frames = []
for file in sorted(glob.glob("data/satellite_obs/frames/frame_*.jpg")):
    frame = Image.open(file)
    frames.append(frame)

# Save as animated GIF
frames[0].save(
    "data/satellite_obs/satellite_animation.gif",
    save_all=True,
    append_images=frames[1:],
    duration=200,  # milliseconds per frame
    loop=0
)

print("GIF saved as satellite_animation.gif")

GIF saved as satellite_animation.gif
