In [1]:
# Dependencies
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import netCDF4 

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Paths to load the files

# Model plankton dataset
path_mod = 'data/cmems_obs-oc_glo_bgc-plankton_my_l4-gapfree-multi-4km_P1D_1663435141098.nc'

# Product variable
mod = xr.open_dataset(path_mod)

In [3]:
mod

In [4]:
# Target Variable (Clorophyll - CHL)
CHL = mod['CHL']
CHL

# CHL SUBSET of one specific point ----> df

In [5]:
# coordinates, time
lon, lat = -75.5 , -45
months = mod["time"]

In [6]:
chl = mod['CHL'].sel(lon=lon, lat=lat, time=months, method='nearest')
chl

In [7]:
# Chl Array
chlorophyll = np.array(chl)
chlorophyll

array([0.8616517 , 0.8057574 , 0.85655683, ..., 0.96767205, 0.96767205,
       1.0767134 ], dtype=float32)

In [8]:
# Dataframe
df_chl = pd.DataFrame(data={'chlorophyll' : chlorophyll.flatten()})
df_chl

Unnamed: 0,chlorophyll
0,0.861652
1,0.805757
2,0.856557
3,0.856562
4,0.739469
...,...
4013,0.603901
4014,0.639898
4015,0.967672
4016,0.967672


# Time df

In [9]:
# Checking variables
nc = netCDF4.Dataset(path_mod, mode='r')
nc.variables.keys()

dict_keys(['CHL', 'lon', 'time', 'lat'])

In [10]:
# Time
time = pd.to_datetime(mod["time"])
time

DatetimeIndex(['2010-01-01', '2010-01-02', '2010-01-03', '2010-01-04',
               '2010-01-05', '2010-01-06', '2010-01-07', '2010-01-08',
               '2010-01-09', '2010-01-10',
               ...
               '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-25',
               '2020-12-26', '2020-12-27', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', length=4018, freq=None)

In [11]:
Time = nc.variables['time']

In [12]:
# DataFrame
df_time = pd.DataFrame(time)
df_time

Unnamed: 0,0
0,2010-01-01
1,2010-01-02
2,2010-01-03
3,2010-01-04
4,2010-01-05
...,...
4013,2020-12-27
4014,2020-12-28
4015,2020-12-29
4016,2020-12-30


# Concat chl_df & time_df 

In [13]:
df = pd.concat([df_time,df_chl], axis=1)

In [14]:
df

Unnamed: 0,0,chlorophyll
0,2010-01-01,0.861652
1,2010-01-02,0.805757
2,2010-01-03,0.856557
3,2010-01-04,0.856562
4,2010-01-05,0.739469
...,...,...
4013,2020-12-27,0.603901
4014,2020-12-28,0.639898
4015,2020-12-29,0.967672
4016,2020-12-30,0.967672


In [15]:
# Changing names of columns for Prophet time-series analysis
table_cols = ["ds", "y"]

In [16]:
df.columns = table_cols

In [17]:
df

Unnamed: 0,ds,y
0,2010-01-01,0.861652
1,2010-01-02,0.805757
2,2010-01-03,0.856557
3,2010-01-04,0.856562
4,2010-01-05,0.739469
...,...,...
4013,2020-12-27,0.603901
4014,2020-12-28,0.639898
4015,2020-12-29,0.967672
4016,2020-12-30,0.967672


In [18]:
# Save df
df.to_csv("data/prophet_chl_daily_2010-2020_whales.csv", index=False)