## This notebook will work on fetching sea surface temperature (SST) at and around chosen buoy locations where discrete data has already been downloaded for the class project. 

### Current buoy locations:
- Cha'Ba (La Push), 47.97, -124.95
- CCE2 (SoCal), 34.324, -120.814
- Coastal Louisiana, 28.86, -90.479
- Gray's Reef (Georgia), 31.40, -80.87
- SE Bering Sea, 58.87, -164.06
- Mooring TA0155 (South Pacific), 0, -155

### Parameter around buoys for collecting SST:
+/- 1 degree to start with

### Data resolution
- Will be determined based on available ERDDAP datasets
- Expected range: 0.25° to 1° spatial resolution
- Will update once dataset is selected



In [19]:
import sys
print(sys.version)

3.11.14 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 18:30:03) [MSC v.1929 64 bit (AMD64)]


In [20]:
import subprocess
import sys

packages = ['requests', 'pandas', 'xarray', 'netcdf4']
for package in packages:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '-q'])

print("All packages installed successfully!")

All packages installed successfully!


### This section defines the following:
- Buoy coordinates
- Parameters surrounding the buoys, or bounding boxes, that will identify what data to extract for SST

In [21]:
# Define your buoy locations
buoys = {
    'Cha\'Ba (La Push)': {'lat': 47.97, 'lon': -124.95},
    'CCE2 (SoCal)': {'lat': 34.324, 'lon': -120.814},
    'Coastal Louisiana': {'lat': 28.86, 'lon': -90.479},
    'Gray\'s Reef (Georgia)': {'lat': 31.40, 'lon': -80.87},
    'SE Bering Sea': {'lat': 58.87, 'lon': -164.06},
    'Mooring TA0155 (South Pacific)': {'lat': 0, 'lon': -155}
}

# Print them to verify
for name, coords in buoys.items():
    print(f"{name}: {coords['lat']}, {coords['lon']}")

Cha'Ba (La Push): 47.97, -124.95
CCE2 (SoCal): 34.324, -120.814
Coastal Louisiana: 28.86, -90.479
Gray's Reef (Georgia): 31.4, -80.87
SE Bering Sea: 58.87, -164.06
Mooring TA0155 (South Pacific): 0, -155


In [22]:
def calculate_bbox(latitude, longitude, degrees=1):
    """
    Calculate bounding box around a point
    
    Parameters:
    - latitude: center latitude
    - longitude: center longitude
    - degrees: how many degrees in each direction (default ±1°)
    
    Returns:
    - dictionary with lat_min, lat_max, lon_min, lon_max
    """
    bbox = {
        'lat_min': latitude - degrees,
        'lat_max': latitude + degrees,
        'lon_min': longitude - degrees,
        'lon_max': longitude + degrees
    }
    return bbox

# Test it with one buoy
test_bbox = calculate_bbox(47.97, -124.95, degrees=1)
print("Bounding box for La Push buoy:")
print(test_bbox)

Bounding box for La Push buoy:
{'lat_min': 46.97, 'lat_max': 48.97, 'lon_min': -125.95, 'lon_max': -123.95}


### This section sets up ERDDAP API to fetch data

In [23]:
# ERDDAP dataset we're using
dataset_id = 'jplMURSST42mday'

# Parameters for our request
lat_min = 46.97
lat_max = 48.97
lon_min = -125.95
lon_max = -123.95
start_date = '2015-01-01'
end_date = '2025-12-31'

# Build the ERDDAP API URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
url = f"{base_url}{dataset_id}.csv"

# Add parameters
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({lat_min}):1:({lat_max})]"
url += f"[({lon_min}):1:({lon_max})]"

print("Full API URL:")
print(url)

Full API URL:
https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST42mday.csv?sst[(2015-01-01T00:00:00Z):1:(2025-12-31T00:00:00Z)][(46.97):1:(48.97)][(-125.95):1:(-123.95)]


In [24]:
import requests

# Use the URL we built earlier
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    print("Success! Data retrieved.")
    print(f"Data size: {len(response.content)} bytes")
    print("\nFirst 500 characters of data:")
    print(response.text[:500])
else:
    print(f"Error: Status code {response.status_code}")
    print(response.text)

Success! Data retrieved.
Data size: 535839 bytes

First 500 characters of data:
time,latitude,longitude,sst
UTC,degrees_north,degrees_east,degree_C
2015-01-16T00:00:00Z,46.875,-125.875,11.395806451612827
2015-01-16T00:00:00Z,46.875,-125.625,11.627838709677462
2015-01-16T00:00:00Z,46.875,-125.375,11.729967741935468
2015-01-16T00:00:00Z,46.875,-125.125,11.74445161290322
2015-01-16T00:00:00Z,46.875,-124.875,11.663032258064447
2015-01-16T00:00:00Z,46.875,-124.625,11.381032258064465
2015-01-16T00:00:00Z,46.875,-124.375,10.791032258064547
2015-01-16T00:00:00Z,46.875,-124.125,10.2


In [None]:
# ===== BUOY 1: Cha'Ba (La Push) - 47.97, -124.95 =====
from io import StringIO
import pandas as pd
import requests
import os

# Save to the central project data folder (two levels up from this notebook)
save_dir = '../../data/raw/satellite_sources'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    print(f"Created {save_dir} folder\n")

# ===== BUOY 1: Cha'Ba (La Push) =====
print("=" * 50)
print("FETCHING: Cha'Ba (La Push)")
print("=" * 50)

# Set parameters for this buoy
buoy_name = "Cha'Ba (La Push)"
latitude = 47.97
longitude = -124.95
start_date = '2015-01-01'
end_date = '2025-12-31'

# Calculate bounding box
bbox = calculate_bbox(latitude, longitude, degrees=1)
print(f"Bounding box: Lat {bbox['lat_min']} to {bbox['lat_max']}, Lon {bbox['lon_min']} to {bbox['lon_max']}")

# Build API request URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
dataset_id = 'jplMURSST42mday'
url = f"{base_url}{dataset_id}.csv"
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({bbox['lat_min']}):1:({bbox['lat_max']})]"
url += f"[({bbox['lon_min']}):1:({bbox['lon_max']})]"

print(f"\nFetching data...")
try:
    response = requests.get(url, timeout=120)
    
    if response.status_code == 200:
        # Convert to DataFrame
        df = pd.read_csv(StringIO(response.text), skiprows=1)
        
        # Save to CSV
        filename = f"{save_dir}/ChaBa_LaPush_SST.csv"
        df.to_csv(filename, index=False)
        
        print(f"✓ Success! Saved {len(df)} rows")
        print(f"  File: {filename}")
        print(f"  Date range: {df['UTC'].min()} to {df['UTC'].max()}")
        print(f"  SST range: {df['degree_C'].min():.2f}°C to {df['degree_C'].max():.2f}°C")
    else:
        print(f"✗ Error: HTTP {response.status_code}")
        
except Exception as e:
    print(f"✗ Exception: {e}")

Created data/raw/satellite_sources folder

FETCHING: Cha'Ba (La Push)
Bounding box: Lat 46.97 to 48.97, Lon -125.95 to -123.95

Fetching data...
✓ Success! Saved 10287 rows
  File: ./data/raw/satellite_sources/ChaBa_LaPush_SST.csv
  Date range: 2015-01-16T00:00:00Z to 2025-12-16T00:00:00Z
  SST range: 7.02°C to 19.33°C


In [None]:
import pandas as pd

# Load the file you just saved
df = pd.read_csv('../../data/raw/satellite_sources/ChaBa_LaPush_SST.csv')

# Check what columns exist
print("Column names:")
print(df.columns.tolist())
print("\nFirst few rows:")
print(df.head())
print(f"\nShape: {df.shape}")

Column names:
['UTC', 'degrees_north', 'degrees_east', 'degree_C']

First few rows:
                    UTC  degrees_north  degrees_east   degree_C
0  2015-01-16T00:00:00Z         46.875      -125.875  11.395806
1  2015-01-16T00:00:00Z         46.875      -125.625  11.627839
2  2015-01-16T00:00:00Z         46.875      -125.375  11.729968
3  2015-01-16T00:00:00Z         46.875      -125.125  11.744452
4  2015-01-16T00:00:00Z         46.875      -124.875  11.663032

Shape: (10287, 4)


## Summary

This notebook downloads satellite-based Sea Surface Temperature (SST) data from NOAA's ERDDAP service for 6 buoy locations.

**Output files saved to project `data/raw/satellite_sources/`:**
- `ChaBa_LaPush_SST.csv` - Cha'Ba buoy at La Push (47.97°N, 124.95°W)
- `CCE2_SoCal_SST.csv` - CCE2 mooring in Southern California (34.324°N, 120.814°W)
- `CoastalLouisiana_SST.csv` - Coastal Louisiana buoy (28.86°N, 90.479°W)
- `GraysReef_Georgia_SST.csv` - Gray's Reef in Georgia (31.40°N, 80.87°W)
- `SEBeringSeA_SST.csv` - SE Bering Sea mooring (58.87°N, 164.06°W)
- `Mooring_TA0155_SouthPacific_SST.csv` - South Pacific mooring (0°N, 155°W)

**Data source:** JPL Multi-scale Ultra-High Resolution SST (jplMURSST42mday)
**Date range:** 2015-01-01 to 2025-12-31
**Spatial resolution:** ±1° around each buoy location

These files are then used by the data preparation notebook to combine with buoy discrete measurements for ML analysis.

In [None]:
# ===== BUOY 2: CCE2 (SoCal) - 34.324, -120.814 =====
from io import StringIO
import pandas as pd
import requests
import os

# ===== BUOY 2: CCE2 (SoCal) =====
print("=" * 50)
print("FETCHING: CCE2 (SoCal)")
print("=" * 50)

# Set parameters for this buoy
buoy_name = "CCE2 (SoCal)"
latitude = 34.324
longitude = -120.814
start_date = '2015-01-01'
end_date = '2025-12-31'

# Calculate bounding box
bbox = calculate_bbox(latitude, longitude, degrees=1)
print(f"Bounding box: Lat {bbox['lat_min']} to {bbox['lat_max']}, Lon {bbox['lon_min']} to {bbox['lon_max']}")

# Build API request URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
dataset_id = 'jplMURSST42mday'
url = f"{base_url}{dataset_id}.csv"
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({bbox['lat_min']}):1:({bbox['lat_max']})]"
url += f"[({bbox['lon_min']}):1:({bbox['lon_max']})]"

print(f"\nFetching data...")
try:
    response = requests.get(url, timeout=300)
    
    if response.status_code == 200:
        # Convert to DataFrame
        df = pd.read_csv(StringIO(response.text), skiprows=1)
        
        # Save to CSV
        filename = f"{save_dir}/CCE2_SoCal_SST.csv"
        df.to_csv(filename, index=False)
        
        print(f"✓ Success! Saved {len(df)} rows")
        print(f"  File: {filename}")
        print(f"  Date range: {df['UTC'].min()} to {df['UTC'].max()}")
        print(f"  SST range: {df['degree_C'].min():.2f}°C to {df['degree_C'].max():.2f}°C")
    else:
        print(f"✗ Error: HTTP {response.status_code}")
        
except Exception as e:
    print(f"✗ Exception: {e}")

FETCHING: CCE2 (SoCal)
Bounding box: Lat 33.324 to 35.324, Lon -121.814 to -119.814

Fetching data...
✓ Success! Saved 10287 rows
  File: ./data/raw/satellite_sources/CCE2_SoCal_SST.csv
  Date range: 2015-01-16T00:00:00Z to 2025-12-16T00:00:00Z
  SST range: 10.93°C to 22.23°C


In [None]:
# ===== BUOY 3: Coastal Louisiana - 28.86, -90.479 =====
from io import StringIO
import pandas as pd
import requests
import os

# ===== BUOY 3: Coastal Louisiana =====
print("=" * 50)
print("FETCHING: Coastal Louisiana")
print("=" * 50)

# Set parameters for this buoy
buoy_name = "Coastal Louisiana"
latitude = 28.86
longitude = -90.479
start_date = '2015-01-01'
end_date = '2025-12-31'

# Calculate bounding box
bbox = calculate_bbox(latitude, longitude, degrees=1)
print(f"Bounding box: Lat {bbox['lat_min']} to {bbox['lat_max']}, Lon {bbox['lon_min']} to {bbox['lon_max']}")

# Build API request URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
dataset_id = 'jplMURSST42mday'
url = f"{base_url}{dataset_id}.csv"
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({bbox['lat_min']}):1:({bbox['lat_max']})]"
url += f"[({bbox['lon_min']}):1:({bbox['lon_max']})]"

print(f"\nFetching data...")
try:
    response = requests.get(url, timeout=300)
    
    if response.status_code == 200:
        # Convert to DataFrame
        df = pd.read_csv(StringIO(response.text), skiprows=1)
        
        # Save to CSV
        filename = f"{save_dir}/CoastalLouisiana_SST.csv"
        df.to_csv(filename, index=False)
        
        print(f"✓ Success! Saved {len(df)} rows")
        print(f"  File: {filename}")
        print(f"  Date range: {df['UTC'].min()} to {df['UTC'].max()}")
        print(f"  SST range: {df['degree_C'].min():.2f}°C to {df['degree_C'].max():.2f}°C")
    else:
        print(f"✗ Error: HTTP {response.status_code}")
        
except Exception as e:
    print(f"✗ Exception: {e}")

FETCHING: Coastal Louisiana
Bounding box: Lat 27.86 to 29.86, Lon -91.479 to -89.479

Fetching data...
✓ Success! Saved 10287 rows
  File: ./data/raw/satellite_sources/CoastalLouisiana_SST.csv
  Date range: 2015-01-16T00:00:00Z to 2025-12-16T00:00:00Z
  SST range: 11.46°C to 32.14°C


In [None]:
# ===== BUOY 4: Gray's Reef (Georgia) - 31.40, -80.87 =====
from io import StringIO
import pandas as pd
import requests
import os

# ===== BUOY 4: Gray's Reef (Georgia) =====
print("=" * 50)
print("FETCHING: Gray's Reef (Georgia)")
print("=" * 50)

# Set parameters for this buoy
buoy_name = "Gray's Reef (Georgia)"
latitude = 31.40
longitude = -80.87
start_date = '2015-01-01'
end_date = '2025-12-31'

# Calculate bounding box
bbox = calculate_bbox(latitude, longitude, degrees=1)
print(f"Bounding box: Lat {bbox['lat_min']} to {bbox['lat_max']}, Lon {bbox['lon_min']} to {bbox['lon_max']}")

# Build API request URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
dataset_id = 'jplMURSST42mday'
url = f"{base_url}{dataset_id}.csv"
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({bbox['lat_min']}):1:({bbox['lat_max']})]"
url += f"[({bbox['lon_min']}):1:({bbox['lon_max']})]"

print(f"\nFetching data...")
try:
    response = requests.get(url, timeout=300)
    
    if response.status_code == 200:
        # Convert to DataFrame
        df = pd.read_csv(StringIO(response.text), skiprows=1)
        
        # Save to CSV
        filename = f"{save_dir}/GraysReef_Georgia_SST.csv"
        df.to_csv(filename, index=False)
        
        print(f"✓ Success! Saved {len(df)} rows")
        print(f"  File: {filename}")
        print(f"  Date range: {df['UTC'].min()} to {df['UTC'].max()}")
        print(f"  SST range: {df['degree_C'].min():.2f}°C to {df['degree_C'].max():.2f}°C")
    else:
        print(f"✗ Error: HTTP {response.status_code}")
        
except Exception as e:
    print(f"✗ Exception: {e}")

FETCHING: Gray's Reef (Georgia)
Bounding box: Lat 30.4 to 32.4, Lon -81.87 to -79.87

Fetching data...
✓ Success! Saved 10287 rows
  File: ./data/raw/satellite_sources/GraysReef_Georgia_SST.csv
  Date range: 2015-01-16T00:00:00Z to 2025-12-16T00:00:00Z
  SST range: 8.36°C to 30.74°C


In [None]:
# ===== BUOY 5: SE Bering Sea - 58.87, -164.06 =====
from io import StringIO
import pandas as pd
import requests
import os

# ===== BUOY 5: SE Bering Sea =====
print("=" * 50)
print("FETCHING: SE Bering Sea")
print("=" * 50)

# Set parameters for this buoy
buoy_name = "SE Bering Sea"
latitude = 58.87
longitude = -164.06
start_date = '2015-01-01'
end_date = '2025-12-31'

# Calculate bounding box
bbox = calculate_bbox(latitude, longitude, degrees=1)
print(f"Bounding box: Lat {bbox['lat_min']} to {bbox['lat_max']}, Lon {bbox['lon_min']} to {bbox['lon_max']}")

# Build API request URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
dataset_id = 'jplMURSST42mday'
url = f"{base_url}{dataset_id}.csv"
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({bbox['lat_min']}):1:({bbox['lat_max']})]"
url += f"[({bbox['lon_min']}):1:({bbox['lon_max']})]"

print(f"\nFetching data...")
try:
    response = requests.get(url, timeout=300)
    
    if response.status_code == 200:
        # Convert to DataFrame
        df = pd.read_csv(StringIO(response.text), skiprows=1)
        
        # Save to CSV
        filename = f"{save_dir}/SEBeringSeA_SST.csv"
        df.to_csv(filename, index=False)
        
        print(f"✓ Success! Saved {len(df)} rows")
        print(f"  File: {filename}")
        print(f"  Date range: {df['UTC'].min()} to {df['UTC'].max()}")
        print(f"  SST range: {df['degree_C'].min():.2f}°C to {df['degree_C'].max():.2f}°C")
    else:
        print(f"✗ Error: HTTP {response.status_code}")
        
except Exception as e:
    print(f"✗ Exception: {e}")

FETCHING: SE Bering Sea
Bounding box: Lat 57.87 to 59.87, Lon -165.06 to -163.06

Fetching data...
✓ Success! Saved 10287 rows
  File: ./data/raw/satellite_sources/SEBeringSeA_SST.csv
  Date range: 2015-01-16T00:00:00Z to 2025-12-16T00:00:00Z
  SST range: -1.80°C to 15.92°C


In [None]:
# ===== BUOY 6: Mooring TA0155 (South Pacific) - 0, -155 =====
from io import StringIO
import pandas as pd
import requests
import os

# ===== BUOY 6: Mooring TA0155 (South Pacific) =====
print("=" * 50)
print("FETCHING: Mooring TA0155 (South Pacific)")
print("=" * 50)

# Set parameters for this buoy
buoy_name = "Mooring TA0155 (South Pacific)"
latitude = 0
longitude = -155
start_date = '2015-01-01'
end_date = '2025-12-31'

# Calculate bounding box
bbox = calculate_bbox(latitude, longitude, degrees=1)
print(f"Bounding box: Lat {bbox['lat_min']} to {bbox['lat_max']}, Lon {bbox['lon_min']} to {bbox['lon_max']}")

# Build API request URL
base_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/'
dataset_id = 'jplMURSST42mday'
url = f"{base_url}{dataset_id}.csv"
url += f"?sst[({start_date}T00:00:00Z):1:({end_date}T00:00:00Z)]"
url += f"[({bbox['lat_min']}):1:({bbox['lat_max']})]"
url += f"[({bbox['lon_min']}):1:({bbox['lon_max']})]"

print(f"\nFetching data...")
try:
    response = requests.get(url, timeout=300)
    
    if response.status_code == 200:
        # Convert to DataFrame
        df = pd.read_csv(StringIO(response.text), skiprows=1)
        
        # Save to CSV
        filename = f"{save_dir}/Mooring_TA0155_SouthPacific_SST.csv"
        df.to_csv(filename, index=False)
        
        print(f"✓ Success! Saved {len(df)} rows")
        print(f"  File: {filename}")
        print(f"  Date range: {df['UTC'].min()} to {df['UTC'].max()}")
        print(f"  SST range: {df['degree_C'].min():.2f}°C to {df['degree_C'].max():.2f}°C")
    else:
        print(f"✗ Error: HTTP {response.status_code}")
        
except Exception as e:
    print(f"✗ Exception: {e}")

FETCHING: Mooring TA0155 (South Pacific)
Bounding box: Lat -1 to 1, Lon -156 to -154

Fetching data...
✓ Success! Saved 10287 rows
  File: ./data/raw/satellite_sources/Mooring_TA0155_SouthPacific_SST.csv
  Date range: 2015-01-16T00:00:00Z to 2025-12-16T00:00:00Z
  SST range: 24.90°C to 30.65°C
