In [45]:
# Dependencies
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import netCDF4 

import warnings
warnings.filterwarnings('ignore')

In [46]:
# Paths to load the files

# Model plankton dataset
path_mod = 'data/cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M_1663434082196.nc'

# Product variable
mod = xr.open_dataset(path_mod)

In [47]:
mod

In [48]:
# Target Variable (Clorophyll - CHL)
CHL = mod['CHL']
CHL

# CHL SUBSET of one specific point (coordinate) ----> df

In [49]:
# coordinates, time
lon, lat = -75.5 , -45
months = mod["time"]

In [50]:
chl = mod['CHL'].sel(lon=lon, lat=lat, time=months, method='nearest')
chl

In [51]:
# Chl Array
chlorophyll = np.array(chl)
chlorophyll

array([0.71422833, 0.74513483, 0.7001885 , 0.46028078, 1.0328232 ,
       0.41510385, 0.34843612, 1.1815248 , 0.991909  , 0.98454666,
       0.7108044 , 0.88255537, 0.625687  , 0.51184213, 0.68925047,
       1.4418905 , 0.44744745, 0.32287535, 0.36315316, 0.4263361 ,
       0.95482147, 0.96632874, 0.5396061 , 0.8179376 , 0.64476997,
       0.5798963 , 0.6625683 , 0.49198186, 0.6425198 , 0.9168838 ,
       0.53811014, 0.456529  , 0.36727187, 0.5641924 , 1.1326298 ,
       1.0588819 , 1.3407774 , 1.4925351 , 1.4965256 , 0.9113728 ,
       0.39466447, 0.33819002, 0.35813504, 0.3616891 , 0.63896585,
       0.47198442, 1.1813204 , 0.91612303, 1.3254797 , 0.83892375,
       0.66233075, 0.4686347 , 0.61411047, 0.2712752 , 0.50024956,
       0.8267308 , 0.3092125 , 1.5070316 , 1.0952752 , 0.56299776,
       0.7503416 , 0.77567685, 0.36929   , 0.60988206, 0.39192143,
       0.5100615 , 0.4436796 , 0.7881116 , 1.1615305 , 1.103413  ,
       1.3835009 , 0.97644144, 0.74620503, 0.7744416 , 0.40761

In [52]:
# Dataframe
df_chl = pd.DataFrame(data={'chlorophyll' : chlorophyll.flatten()})
df_chl

Unnamed: 0,chlorophyll
0,0.714228
1,0.745135
2,0.700189
3,0.460281
4,1.032823
...,...
260,0.824019
261,0.771279
262,1.474253
263,1.021748


# Time df

In [53]:
# Checking variables
nc = netCDF4.Dataset(path_mod, mode='r')
nc.variables.keys()

dict_keys(['CHL', 'DINO', 'lon', 'time', 'lat'])

In [54]:
# Time
time = pd.to_datetime(mod["time"])
time

DatetimeIndex(['2000-01-01', '2000-02-01', '2000-03-01', '2000-04-01',
               '2000-05-01', '2000-06-01', '2000-07-01', '2000-08-01',
               '2000-09-01', '2000-10-01',
               ...
               '2021-04-01', '2021-05-01', '2021-06-01', '2021-07-01',
               '2021-08-01', '2021-09-01', '2021-10-01', '2021-11-01',
               '2021-12-01', '2022-01-01'],
              dtype='datetime64[ns]', length=265, freq=None)

In [55]:
Time = nc.variables['time']

In [57]:
# DataFrame
df_time = pd.DataFrame(time)
df_time

Unnamed: 0,0
0,2000-01-01
1,2000-02-01
2,2000-03-01
3,2000-04-01
4,2000-05-01
...,...
260,2021-09-01
261,2021-10-01
262,2021-11-01
263,2021-12-01


# Concat chl_df & time_df 

In [58]:
df = pd.concat([df_time,df_chl], axis=1)

In [59]:
df

Unnamed: 0,0,chlorophyll
0,2000-01-01,0.714228
1,2000-02-01,0.745135
2,2000-03-01,0.700189
3,2000-04-01,0.460281
4,2000-05-01,1.032823
...,...,...
260,2021-09-01,0.824019
261,2021-10-01,0.771279
262,2021-11-01,1.474253
263,2021-12-01,1.021748


In [60]:
# Changing names of columns for Prophet time-series analysis
table_cols = ["ds", "y"]

In [61]:
df.columns = table_cols

In [62]:
df

Unnamed: 0,ds,y
0,2000-01-01,0.714228
1,2000-02-01,0.745135
2,2000-03-01,0.700189
3,2000-04-01,0.460281
4,2000-05-01,1.032823
...,...,...
260,2021-09-01,0.824019
261,2021-10-01,0.771279
262,2021-11-01,1.474253
263,2021-12-01,1.021748


In [64]:
# Save df
df.to_csv("data/prophet_chl_monthly_2000-2022_whales.csv", index=False)