In [None]:
!pip install copernicusmarine

In [None]:
# Parameters
start_date = "2025-01-01"
end_date = "2025-01-02"

In [None]:
import copernicusmarine
import os
from datetime import datetime, timedelta
import xarray as xr
import pandas as pd
import json

# --- STEP 1: DEFINE YOUR CREDENTIALS AND PARAMETERS ---
# Make sure to replace the placeholder values with your actual Copernicus Marine Service credentials and desired data parameters.
# The `os.environ` method is a secure way to handle secrets like passwords.
os.environ['COPERNICUSMARINE_SERVICE_USERNAME'] = 'joaogudino@gmail.com'
os.environ['COPERNICUSMARINE_SERVICE_PASSWORD'] = 'DDJtQRT6Lhsqx*Q'
username = os.environ['COPERNICUSMARINE_SERVICE_USERNAME']
password = os.environ['COPERNICUSMARINE_SERVICE_PASSWORD']

# Product and Dataset IDs
dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m"
product_id = "GLOBAL_ANALYSISFORECAST_PHY_001_024"

# Variables to download
variables_to_download = ["uo", "vo"]

# Define the geographical area of interest for the Caribbean coast of Costa Rica
min_longitude = -83.70
max_longitude = -82.55
min_latitude = 9.54
max_latitude = 10.94

# --- DYNAMIC TIME RANGE ---
# This section uses the start_date and end_date from your parameters cell
start_datetime = start_date
end_datetime = end_date
output_file = "/tmp/downloaded_data.nc"

# --- STEP 2: DOWNLOAD AND PROCESS THE DATA ---
print("Starting data download...")
# This command downloads a subset of the data.
copernicusmarine.subset(
        dataset_id=dataset_id,
        variables=variables_to_download,
        username=username,
        password=password,
        minimum_longitude=min_longitude,
        maximum_longitude=max_longitude,
        minimum_latitude=min_latitude,
        maximum_latitude=max_latitude,
        start_datetime=start_datetime,
        end_datetime=end_datetime,
        output_filename=output_file
    )
print("Data downloaded successfully!")

# Load the downloaded NetCDF file into an xarray dataset
ds = xr.open_dataset(output_file)
print("Dataset loaded successfully!")

# Convert the xarray dataset to a pandas DataFrame
df = ds.to_dataframe().reset_index()
print("DataFrame created successfully!")

# Drop any rows where 'uo' or 'vo' is null
df = df.dropna(subset=['uo', 'vo'])
print("Null values removed successfully!")

# Calculate current speed
df['current_speed'] = (df['uo']**2 + df['vo']**2)**0.5
print("Current speed calculated successfully!")

# --- NEW SECTION: LINKING DATA TO ALL BEACHES ---
# Define the list of beaches from your JSON object
beaches = [
    {
      "location": "Playa Blanca, Cahuita",
      "latitude": 9.7613,
      "longitude": -82.8369,
      "pollution_level": "Medium",
      "is_conservation_area": True
    },
    {
      "location": "Tortuguero",
      "latitude": 10.5411,
      "longitude": -83.4850,
      "pollution_level": "Low",
      "is_conservation_area": True
    },
    {
      "location": "Playa Manzanillo",
      "latitude": 9.6267,
      "longitude": -82.6455,
      "pollution_level": "Low",
      "is_conservation_area": True
    }
]

# Find the closest data point for each beach
final_df_list = []
for beach in beaches:
    # Find the closest latitude and longitude in the DataFrame
    df['distance'] = ((df['latitude'] - beach['latitude'])**2 + (df['longitude'] - beach['longitude'])**2)**0.5
    closest_lat = df.loc[df['distance'].idxmin()]['latitude']
    closest_lon = df.loc[df['distance'].idxmin()]['longitude']

    # Filter the original DataFrame to get all data for the closest grid point
    all_data_for_closest_point = df[(df['latitude'] == closest_lat) & (df['longitude'] == closest_lon)].copy()

    # Add the beach name to each row
    all_data_for_closest_point['location'] = beach['location']

    # Append to our final list of DataFrames
    final_df_list.append(all_data_for_closest_point)

# Concatenate all the DataFrames into a single one
final_df = pd.concat(final_df_list)

# Convert the 'time' column to a readable datetime format
final_df['time'] = pd.to_datetime(final_df['time'], unit='ms')

# Format the 'time' column as a string for readable JSON output
final_df['time'] = final_df['time'].dt.strftime('%Y-%m-%d')

# Clean up the final DataFrame to only show relevant columns
final_df = final_df[['location', 'latitude', 'longitude', 'time', 'current_speed', 'uo', 'vo']]

# Convert the pandas DataFrame to a list of dictionaries (JSON format)
json_data = final_df.to_json(orient='records')
print("JSON data created successfully!")

# Save the JSON data to a file in the /tmp directory
with open('/tmp/final_data.json', 'w') as f:
    f.write(json_data)
print("Data processed and saved to JSON successfully!")