In [4]:
import pydap.client
import numpy as np
import matplotlib.pyplot as plt

import glob
from subprocess import *
import sys, os
from pylab import *
import pandas as pd 
import datetime
from netCDF4 import Dataset
import xarray as xr

from ctypes import c_float, c_int, cdll, c_short
from numpy.ctypeslib import ndpointer
import time
import multiprocessing 

## Setup C Interface

In [5]:
so_file = '/media/gsilsbe/nasa_npp/scripts/cafe/py_giop.so'
lib = cdll.LoadLibrary(so_file)
c_cafe = lib.main
c_cafe.restype = ndpointer(dtype=c_float, shape=(3,))

cafe_in = np.empty((5, 4320*2160))
cafe_in[:] = np.NaN

def call_cafe(i):
    return c_cafe(c_float(cafe_in[0, i]), c_float(cafe_in[1, i]), c_float(cafe_in[2, i]),  
                 c_float(cafe_in[3, i]), c_float(cafe_in[4, i]))


## Functions called from Wrapper

In [6]:
def nasa_url(d0, d1, ext):
    return ('/media/gsilsbe/nasa_npp/modisa_r2022/monthly/A' + d0.strftime('%Y') +  d0.strftime('%m') + d0.strftime('%d') + '_' +
            d0.strftime('%Y') + d1.strftime('%m') + d1.strftime('%d')  + ext)

## Giant Wrapper

In [7]:

def cafe(date0, outdir):
    
    date1 = date0 + pd.DateOffset(months=1)  - pd.DateOffset(days=1)
    dayofyear =  np.mean([date0.dayofyear, date1.dayofyear]).astype(int) #average day of year for daylength calcs
    
    outfile = outdir + '/A' + date0.strftime('%Y') + date0.strftime('%j')  + date1.strftime('%Y')  + date1.strftime('%j') + '.L3m_MO_IOP_9km.nc'

    if not os.path.exists(outfile):

        # Stack openDAP into 1D Arrays
        cafe_in[0,:] = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.IOP.adg_443.9km.nc'), engine='netcdf4').adg_443.stack(z=('lat', 'lon'))
        cafe_in[1,:] = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.IOP.aph_443.9km.nc'), engine='netcdf4').aph_443.stack(z=('lat', 'lon'))
        cafe_in[2,:] = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.IOP.bbp_443.9km.nc'), engine='netcdf4').bbp_443.stack(z=('lat', 'lon'))
        cafe_in[3,:] = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.IOP.bbp_s.9km.nc'), engine='netcdf4').bbp_s.stack(z=('lat', 'lon'))
        cafe_in[4,:] = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.CHL.chlor_a.9km.nc'), engine='netcdf4').chlor_a.stack(z=('lat', 'lon'))

        lat = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.CHL.chlor_a.9km.nc'), engine='netcdf4').lat
        lon = xr.open_dataset( nasa_url(date0, date1, '.L3m.MO.CHL.chlor_a.9km.nc'), engine='netcdf4').lon

        # Create index of pixels where all data are present
        rowsums = cafe_in.sum(axis=0)
        pixloc = np.argwhere(np.isfinite(rowsums)).flatten()

        # Print some information about the run
        print('Valid pixels:', len(pixloc), '% Coverage', round(100 * len(pixloc) / (len(lat) * len(lon)), 1))
        start = time.time()  
        print (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        # Run CAFE in Parallel
        a_pool = multiprocessing.Pool(processes=15)
        result = a_pool.map(call_cafe, pixloc)
        a_pool.close()
        a_pool.join()

        # See how many minutes it takes
        print('CAFE C Code Duration (mins):', round((time.time() - start)/60))

        # Convert C output to numpy array
        cafe_out = np.array(result)
        cafe_max = np.array([5,5,0.15])
        
        def back_to_numpy(ind):
            var = np.empty(2160 * 4320) * np.nan
            np.put(var, pixloc, cafe_out[:,ind])
            var = np.reshape(var, (2160, 4320))
            var = np.where(var<0, np.nan, var)
            var = np.where(var>cafe_max[ind], np.nan, var)
            return(var)

        adg = back_to_numpy(0)
        aph = back_to_numpy(1)
        bbp = back_to_numpy(2)

        ds = xr.Dataset(
            data_vars=dict(
                adg=(["lat", "lon"], adg),
                aph=(["lat", "lon"], aph),
                bbp=(["lat", "lon"], bbp),
            ),
            coords={'time':date0, 'lat':lat, 'lon':lon},
        )

        ds.to_netcdf(outfile, encoding={
                     'adg': {"dtype": 'short', "scale_factor": 0.0001,"add_offset": 2.5,"_FillValue": -32767},
                     'aph': {"dtype": 'short', "scale_factor": 0.0001,"add_offset": 2.5,"_FillValue": -32767},
                     'bbp': {"dtype": 'short', "scale_factor": 5e-6,"add_offset": 0.14,"_FillValue": -32767}})


In [10]:
ts0 = pd.date_range("2002-08-01", "2023-07-31", freq="MS")
outdir = '/media/gsilsbe/nasa_npp/cafe/monthly_9km/'

for j in ts0:
    cafe(j, outdir)