# create repackaged WOA DATA for each Dataset

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import glob

In [2]:
mld1 = xr.open_dataset('MLDClimatology_DeBoyerMontagut/mld_DT02_c1m_reg2.0.nc')

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


In [32]:
TempNCDFfiles = glob.glob('WOA2018/Temperature/*.nc', recursive=True)
WOAtemp = xr.open_mfdataset(TempNCDFfiles,combine='by_coords',decode_times=False)

In [16]:
def MLD_at_latlon(lats=40,lons=-20, mlddata=mld1, justMLD=True):
    """This function returns MLD depth across the year at a specific location"""
    #ToDO include check whether the spot is on land or water
    #Make sure the lat lon usage across everything is coherent!
    mld2 = mlddata.sel(lat=lats,lon=lons,method='nearest').to_dataframe()
    mld2['time'] = mld2.index
    mld2.index = range(1,13)
    mld2.index.names = ['time']
    if mld2['mask'].any()==0:
        return pd.DataFrame()
    elif justMLD==True:
        return mld2[['mld']]#.reset_index()
    else:
        return mld2#.reset_index()

In [17]:
def WOADatInLocation(lats=40,lons=-20, WOAtempdata=WOAtemp,var='t_an', justTempDepth=True):
    """This function returns the xarray of Temperature in a specific location"""
    WOAtemp1 = WOAtempdata.sel(nbounds=0)
    WOAtemp_df = WOAtemp1.sel(lat=lats,lon=lons, method='nearest').to_dataframe()
    WOAtemp_df.index = pd.MultiIndex.from_arrays([WOAtemp_df.index.get_level_values(level='depth'), [i for j in range(0,57) for i in range(1,13)]])
    WOAtemp_df.index.names = ['depth','time']
    
    if justTempDepth==True:
        return WOAtemp_df[[var]]#.reset_index()
    else:
        return WOAtemp_df#.reset_index()

In [18]:
def interpolateWOAprofiles(WOAdat):
    """this function interpolates the WOA profiles to a resolution of 1 meter"""
    WOAgrid = WOAdat.unstack(level=1)
    WOAgrid1 = WOAgrid.reindex(range(0,1501))
    return WOAgrid1.interpolate(method='linear')

In [19]:
def MeanAboveMLD(WOAint, MLD):
    """this function filters all temperature measurements above a certain depth and takes the mean"""
    WOAint_d1 = WOAint
    WOAint_d1.columns = WOAint.columns.droplevel()
    WOAx = WOAint_d1.reset_index()
    out = []
    for i in range(1,13):
        out.append({'Month':i,'Temp':WOAx[WOAx['depth'] < MLD[MLD['time']==i]['mld'].values[0]][i].mean()})
    return pd.DataFrame(out)

In [20]:
def MeanAboveMLDatLoc(lats=40, lons=-20, month = 1, mlddat=mld1, WOAdat=WOAtemp, var='t_an'):
    
    MLD = MLD_at_latlon(lats,lons,mlddat).reset_index()
    if MLD.empty == True:
        return MLD
    else:
        WOA = WOADatInLocation(lats,lons,WOAdat,var)
        WOA_int = interpolateWOAprofiles(WOA)
        return MeanAboveMLD(WOA_int,MLD)

In [10]:
%timeit MeanAboveMLDatLoc(20,10,7)

7.42 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# START CREATING NCDF FILE

In [11]:
from netCDF4 import Dataset

In [28]:
rootgrp = Dataset('17_netcdf4.nc', 'w', format='NETCDF4')

import time
rootgrp.description = 'WOA 2018 Temperature averaged above MLD'
rootgrp.history = "Created " + time.ctime(time.time())
rootgrp.source = "WOA2018 + DeBoyer Montague et al. 2004 MLD"

lats = np.arange(-89.5,90.5,1)
lons = np.arange(-179.5,180.5,1)
times = np.arange(1,13,1)

var = rootgrp.createDimension("var", None)
time = rootgrp.createDimension("time", len(times))
lat = rootgrp.createDimension("lat", len(lats))
lon = rootgrp.createDimension("lon", len(lons))
print(rootgrp.dimensions)

varss = rootgrp.createVariable("var","f8",("lat","lon","time"))
month = rootgrp.createVariable("time","i4",("time",))
latitudes = rootgrp.createVariable("lat","f4",("lat",))
longitudes = rootgrp.createVariable("lon","f4",("lon",))


latitudes.units = "degrees north"
longitudes.units = "degrees east"

month.units = "climatological months"
month.calendar = "gregorian"

latitudes[:] = lats
longitudes[:] = lons
month[:] = times

for i in range(len(lats)):
    for j in range(len(lons)):
        print(lats[i],lons[j])
        MAMLD = MeanAboveMLDatLoc(i,j)
        for k in range(len(times)):
            if MAMLD.empty == True:
                varss[i, j, k] = np.nan
            else:
                varss[i, j, k] = MAMLD['Temp'].values[k]


rootgrp.close()

OrderedDict([('var', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'var', size = 0
), ('time', <class 'netCDF4._netCDF4.Dimension'>: name = 'time', size = 12
), ('lat', <class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 180
), ('lon', <class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 360
)])
-89.5
-88.5


KeyboardInterrupt: 

In [22]:
testdata = xr.open_dataset('16_netcdf4.nc')
testdata.variables

Frozen(OrderedDict([('var', <xarray.Variable (lat: 180, lon: 360, time: 12)>
[777600 values with dtype=float64]), ('time', <xarray.IndexVariable 'time' (time: 12)>
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32)
Attributes:
    units:     climatological months
    calendar:  gregorian), ('lat', <xarray.IndexVariable 'lat' (lat: 180)>
array([-89.5, -88.5, -87.5, -86.5, -85.5, -84.5, -83.5, -82.5, -81.5, -80.5,
       -79.5, -78.5, -77.5, -76.5, -75.5, -74.5, -73.5, -72.5, -71.5, -70.5,
       -69.5, -68.5, -67.5, -66.5, -65.5, -64.5, -63.5, -62.5, -61.5, -60.5,
       -59.5, -58.5, -57.5, -56.5, -55.5, -54.5, -53.5, -52.5, -51.5, -50.5,
       -49.5, -48.5, -47.5, -46.5, -45.5, -44.5, -43.5, -42.5, -41.5, -40.5,
       -39.5, -38.5, -37.5, -36.5, -35.5, -34.5, -33.5, -32.5, -31.5, -30.5,
       -29.5, -28.5, -27.5, -26.5, -25.5, -24.5, -23.5, -22.5, -21.5, -20.5,
       -19.5, -18.5, -17.5, -16.5, -15.5, -14.5, -13.5, -12.5, -11.5, -10.5,
        -9.5,  -8.5,  -7.5,