In [1]:
import pandas as pd
import numpy as np
import copernicusmarine as cm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cm.login(username="kparihar", password="axv1xrv@nhu3WJW0jrh")

File /home/kshitiz/.copernicusmarine/.copernicusmarine-credentials already exists, overwrite it ? [y/N]:

INFO - 2024-07-31T21:27:35Z - Credentials file stored in /home/kshitiz/.copernicusmarine/.copernicusmarine-credentials.


True

Dataset 1: Global Ocean Color (https://data.marine.copernicus.eu/product/OCEANCOLOUR_GLO_BGC_L4_MY_009_104/description)
- dataset_id: "cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M"
- variables: ["CHL"]
- Parameter definitions:
    - CHL [mg/m3]: Mass concentration of chlorophyll a in sea water
- Spatial resolution: 4km x 4km

Dataset 2: Global Ocean OSTIA Sea Surface Temperature (https://data.marine.copernicus.eu/product/SST_GLO_SST_L4_REP_OBSERVATIONS_010_011/description)
- dataset_id: "METOFFICE-GLO-SST-L4-REP-OBS-SST"
- variables: ["analysed_sst"]
- Parameter definitions:
    - analysed_sst [K]: Sea surface temperature
- Spatial resolution: 0.05deg x 0.05deg

Dataset 3: Multi Observation Global Ocean Sea Surface Salinity and Sea Surface Density (https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_S_SURFACE_MYNRT_015_013/description)
- dataset_id: "cmems_obs-mob_glo_phy-sss_my_multi_P1M"
- variables: ["sos","dos"]
- Parameter definitions: 
    - sos: Sea surface salinity
    - dos [kg/m3]: sea surface density
- Spatial resolution: 0.125deg x 0.125deg

Dataset 4: Global Ocean Surface Carbon (https://data.marine.copernicus.eu/product/MULTIOBS_GLO_BIO_CARBON_SURFACE_REP_015_008/description)
- dataset_id: "dataset-carbon-rep-monthly"
- variables: ["fgco2","omega_ar","omega_ca","ph","spco2","talk","tco2"]
- Parameter definitions:
    - fgco2 [molC/m2/yr]: Surface downward mass flux of carbon dioxide expressed as carbon
    - omega_ar: Aragonite saturation state in sea water
    - omega_ca: Calcite saturation state in sea water
    - ph: Sea water ph reported on total scale
    - spco2 [micro atm]: Surface partial pressure of carbon dioxide in sea water
    - talk [micro/mol kg]: Total alkalinity in sea water
    - tco2 [micro/mol kg]: dissolved inorganic carbon in sea water
- Spatial resolution: 0.25deg x 0.25deg
- NOTE: though the resolution is same as that for Pisces, the coordinates at which the values are provided are different from pisces. so in this case coarse_grain() would just make adjustments to align the coordinates with pisces

Dataset 5: Bathymetry (Sea floor depth below geoid) data from Global Ocean Physics Reanalysis (https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_PHY_001_030/description)
- dataset_id: "cmems_mod_glo_phy_my_0.083deg_static"
- variables: ["deptho"]
- Parameter definition:
    - deptho [m]: Sea floor depth below geoid
- Spatial resolution: 0.083deg x 0.083deg
Also need to specify min and max depth parameters for getting bathymetry data
- depth: [0.49402499198913574, 5727.9169921875]

In [17]:
# filename for saving the data as csv
filename = 'carbon'

In [18]:
# Set parameters
data_request = {
    "dataset_id" : "dataset-carbon-rep-monthly",
    "variables" : ["fgco2","omega_ar","omega_ca","ph","spco2","talk","tco2"],
    "longitude" : [-180, 180], 
    "latitude" : [-90, 90],
    "time" : ["2021-03-01", "2021-03-31"],
    "depth": [None, None] # for bathymetry set it to [0.49402499198913574, 5727.9169921875]
}

In [19]:
df = cm.read_dataframe(
    dataset_id=data_request["dataset_id"],
    variables=data_request["variables"],
    minimum_longitude=data_request["longitude"][0],
    maximum_longitude=data_request["longitude"][1],
    minimum_latitude=data_request["latitude"][0],
    maximum_latitude=data_request["latitude"][1],
    minimum_depth=data_request["depth"][0],
    maximum_depth=data_request["depth"][1],
    start_datetime=data_request["time"][0],
    end_datetime=data_request["time"][0]
)

INFO - 2024-07-31T21:33:17Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-31T21:33:17Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-31T21:33:19Z - Service was not specified, the default one was selected: "arco-geo-series"
  return Timestamp(date).to_pydatetime()


In [20]:
# remove the index columns corresponding to time, latitude and longitude
df = df.reset_index()
# only keep latitude, longitude and feature columns
df = df[["latitude","longitude"]+data_request["variables"]]
# drop na rows
df = df.dropna()

df.head()

Unnamed: 0,latitude,longitude,fgco2,omega_ar,omega_ca,ph,spco2,talk,tco2
73440,-75.375,-179.875,1.587086,1.466226,2.338735,8.087303,351.4552,2321.090332,2194.776367
73441,-75.375,-179.625,1.428289,1.458979,2.327192,8.085032,353.459381,2320.877197,2195.430908
73442,-75.375,-179.375,1.288172,1.454097,2.319419,8.083524,354.782806,2320.651367,2195.791016
73443,-75.375,-179.125,1.154647,1.447437,2.308806,8.081388,356.694336,2320.527588,2196.470459
73444,-75.375,-178.875,1.018365,1.43867,2.294847,8.078671,359.114868,2320.257324,2197.253906


In [14]:
# function to coarse grain the data and make resolution same as Pisces data
def coarse_grain(df, features):
  """
    Parameters
    ----------
    df: pandas dataframe containing the data accessed from copernicus mariner
    features: name of the features in the dataframe

    Output
    ------
    a pandas dataframe with feature values for 0.25 deg x 0.25 deg resolution 

    """
  
  df["0_1"] = df["latitude"].to_numpy() - np.floor(df["latitude"])
  conditions = [df["0_1"] < 0.25,
                (df["0_1"] >= 0.25)  & (df["0_1"] < 0.5),
                (df["0_1"] >= 0.5)  & (df["0_1"] < 0.75),
                (df["0_1"] >= 0.75)  & (df["0_1"] < 1)]
  outputs = [0, 0.25, 0.5, 0.75]
  df['latitude'] = np.floor(df["latitude"]) + np.select(conditions, outputs)
  df = df.drop(columns=["0_1"])

  df["0_1"] = df["longitude"].to_numpy() - np.floor(df["longitude"])
  # if not redefined then conditions is based on latitude
  conditions = [df["0_1"] < 0.25,
              (df["0_1"] >= 0.25)  & (df["0_1"] < 0.5),
              (df["0_1"] >= 0.5)  & (df["0_1"] < 0.75),
              (df["0_1"] >= 0.75)  & (df["0_1"] < 1)]
  df['longitude'] = np.floor(df["longitude"]) + np.select(conditions, outputs)
  df = df.drop(columns=["0_1"])

  return df.groupby(["latitude","longitude"])[features].mean() 

In [21]:
# Need coarse grain the values because all these datasets have finer resolution than Pisces data
# Pisces data has resolution of 0.25 deg x 0.25 deg
df_cg = coarse_grain(df, df.columns[2:].tolist())
print(df_cg.shape)
df_cg.head()

(579411, 7)


Unnamed: 0_level_0,Unnamed: 1_level_0,fgco2,omega_ar,omega_ca,ph,spco2,talk,tco2
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-75.5,-180.0,1.587086,1.466226,2.338735,8.087303,351.4552,2321.090332,2194.776367
-75.5,-179.75,1.428289,1.458979,2.327192,8.085032,353.459381,2320.877197,2195.430908
-75.5,-179.5,1.288172,1.454097,2.319419,8.083524,354.782806,2320.651367,2195.791016
-75.5,-179.25,1.154647,1.447437,2.308806,8.081388,356.694336,2320.527588,2196.470459
-75.5,-179.0,1.018365,1.43867,2.294847,8.078671,359.114868,2320.257324,2197.253906


In [16]:
df_cg.to_csv(filename+"_cg.csv")