In [None]:
!pip install netCDF4 h5netcdf h5py xarray --upgrade

In [None]:
# from google.colab import drive
import xarray as xr

# Mount Google Drive
# drive.mount('/content/drive')

# Path to the merged dataset
# file_path = "oisst_california_1981_2025.nc"
file_path = "./content/merged.nc"

# Open dataset using xarray
ds = xr.open_dataset(file_path, engine="netcdf4")

# Show dataset metadata
print(ds)


In [None]:
import matplotlib.pyplot as plt

# Select one date to plot
single_day = ds.sst.sel(time="2020-06-01")

# Create plot
plt.figure(figsize=(10, 6))
single_day.plot(cmap="coolwarm")

# Customize title
plt.title("Sea Surface Temperature (SST) - June 1, 2020")
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import imageio.v2 as imageio
import os

# Assuming `ds.sst` is your dataset and `time` is your time dimension
sst = ds.sst
time_steps = sst.time.values  # Get all time steps

# Determine global min/max for consistent color scale
vmin, vmax = sst.min().item(), sst.max().item()

# Create directory to save frames
os.makedirs("sst_frames", exist_ok=True)

# Generate images for each time step
image_files = []
for i, time in enumerate(time_steps):
    plt.figure(figsize=(10, 6))
    sst.sel(time=time).plot(cmap="coolwarm", vmin=vmin, vmax=vmax)  # Consistent scale
    plt.title(f"Sea Surface Temperature (SST) - {str(time)[:10]}")

    frame_filename = f"sst_frames/frame_{i:03d}.png"
    plt.savefig(frame_filename)
    plt.close()
    image_files.append(frame_filename)

# Create a GIF from the images
gif_filename = "sst_animation.gif"
with imageio.get_writer(gif_filename, mode="I", duration=0.5) as writer:
    for file in image_files:
        image = imageio.imread(file)
        writer.append_data(image)

print(f"GIF saved as {gif_filename}")


In [None]:
import matplotlib.pyplot as plt
import imageio.v2 as imageio
import os
import numpy as np

# Initialize an array to store the maximum SST at each point (starting with -infinity)
sst_max_updated = np.full_like(ds.sst[0].values, -np.inf)

# Create directory to save frames
os.makedirs("sst_max_frames", exist_ok=True)

# Generate images for each time step
image_files = []
for i, time in enumerate(ds.sst.time.values):
    # Get the current SST for this time step and ensure we are working with a 2D array
    current_sst = ds.sst.sel(time=time).values

    # Update the maximum SST at each grid point
    sst_max_updated = np.maximum(sst_max_updated, current_sst)

    # Create a plot for the current state of maximum SST
    plt.figure(figsize=(10, 6))
    plt.imshow(sst_max_updated, cmap="inferno", origin="lower")
    plt.colorbar(label="Max SST (°C)")
    plt.title(f"Maximum SST Ever Recorded - {str(time)[:10]}")

    # Save the frame
    frame_filename = f"sst_max_frames/frame_{i:03d}.png"
    plt.savefig(frame_filename)
    plt.close()
    image_files.append(frame_filename)

# Create a GIF from the images
gif_filename = "sst_max_evolution.gif"
with imageio.get_writer(gif_filename, mode="I", duration=0.5) as writer:
    for file in image_files:
        image = imageio.imread(file)
        writer.append_data(image)

print(f"GIF saved as {gif_filename}")


In [None]:
# Print the time index of the dataset
print(ds['time'])

# Print the first few rows of the dataset to see the time values and data
print(ds.head())

# If you're still seeing issues, you can also inspect the data types
print(ds.dtypes)


In [None]:
import pandas as pd

# Make sure 'time' is in datetime format
import xarray as xr

# Sort the dataset by the 'time' dimension
ds = ds.sortby('time')

# Interpolate along the 'time' dimension
ds = ds.interpolate_na(dim='time', method='linear')

# Resample to a regular frequency (e.g., monthly 'MS' for month start)
ds_resampled = ds.resample(time='1MS').mean()  # or use another aggregation function like .mean(), .sum(), etc.


# Resample the dataset to monthly frequency and compute the mean for each month
# monthly_ds = ds.resample(time='1MS').mean()

# # Now, 'monthly_ds' contains the monthly averaged data
# print(monthly_ds)

In [None]:
# Compute spatial mean SST over entire region
sst_mean = ds_resampled.sst.mean(dim=["lat", "lon"])

# Plot time series
plt.figure(figsize=(12, 6))
sst_mean.plot()

# Customize title
plt.title("Average Sea Surface Temperature (SST) Over Time (1981-2025)")
plt.ylabel("SST (°C)")
plt.xlabel("Time")
plt.grid()
plt.show()


In [None]:
sst_mean.to_dataframe().to_csv("avg_sst_california.csv", index=False)