# create repackaged WOA DATA for each Dataset

In [27]:
import numpy as np
import pandas as pd
import xarray as xr
import glob

## MLD data-set to use:
new one "MLD_minDTR02"

variable is: "mld_mindtr02_rmoutliers_smth_okrg"

In [28]:
mldX = xr.open_dataset('MLDClimatology_DeBoyerMontagut/2019_11_07_data_L3_mldmindtr02_c1m_sameasSent2MHRio20141103__Sent2BenjaminPostUcsdBremen/mld_mindtr02_l3.nc')

In [29]:
landsea = xr.open_dataset('LandSeaMask/landsea.nc')

In [64]:
TempNCDFfiles = glob.glob('WOA2018/Temperature/*.nc', recursive=True)
WOAtemp = xr.open_mfdataset(TempNCDFfiles,combine='by_coords',decode_times=False)

In [61]:
NitrateNCDFfiles = glob.glob('WOA2018/Nitrate/*.nc', recursive=True)
WOAnitrate = xr.open_mfdataset(NitrateNCDFfiles,combine='by_coords',decode_times=False)

In [30]:
PhosphateNCDFfiles = glob.glob('WOA2018/Phosphate/*.nc', recursive=True)
WOAphosphate = xr.open_mfdataset(PhosphateNCDFfiles,combine='by_coords',decode_times=False)

In [46]:
SilicateNCDFfiles = glob.glob('WOA2018/Silicate/*.nc', recursive=True)
WOAsilicate = xr.open_mfdataset(SilicateNCDFfiles,combine='by_coords',decode_times=False)

In [47]:
WOAsilicate.variables

Frozen(OrderedDict([('depth', <xarray.IndexVariable 'depth' (depth: 43)>
array([  0.,   5.,  10.,  15.,  20.,  25.,  30.,  35.,  40.,  45.,  50.,  55.,
        60.,  65.,  70.,  75.,  80.,  85.,  90.,  95., 100., 125., 150., 175.,
       200., 225., 250., 275., 300., 325., 350., 375., 400., 425., 450., 475.,
       500., 550., 600., 650., 700., 750., 800.], dtype=float32)
Attributes:
    standard_name:  depth
    bounds:         depth_bnds
    positive:       down
    units:          meters
    axis:           Z), ('lat', <xarray.IndexVariable 'lat' (lat: 180)>
array([-89.5, -88.5, -87.5, -86.5, -85.5, -84.5, -83.5, -82.5, -81.5, -80.5,
       -79.5, -78.5, -77.5, -76.5, -75.5, -74.5, -73.5, -72.5, -71.5, -70.5,
       -69.5, -68.5, -67.5, -66.5, -65.5, -64.5, -63.5, -62.5, -61.5, -60.5,
       -59.5, -58.5, -57.5, -56.5, -55.5, -54.5, -53.5, -52.5, -51.5, -50.5,
       -49.5, -48.5, -47.5, -46.5, -45.5, -44.5, -43.5, -42.5, -41.5, -40.5,
       -39.5, -38.5, -37.5, -36.5, -35.5, -34.5

## now let's try all the other WOA2018 datasets
- and finally extract forcing for one or two specific locations, and use these for test implemetation towards the model


In [48]:
def WOADatInLocation(WOAfile, lats=40,lons=-20, WOAvar='Temp', justValPerDepth=True):
    """This function returns the xarray of Temperature in a specific location"""
    
    WOAdat = WOAfile.sel(nbounds=0)
    WOA_df = WOAdat.sel(lat=lats,lon=lons, method='nearest').to_dataframe()
    
    if WOAvar=='Temp':
        var='t_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,57) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    elif WOAvar=='N':
        var='n_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,43) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    elif WOAvar=='P':
        var='p_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,43) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    elif WOAvar=='Si':
        var='i_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,43) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    
    if justValPerDepth==True:
        return WOA_df[[var]]#.reset_index()
    else:
        return WOA_df#.reset_index()

In [49]:
#WOADatInLocation(WOAphosphate,WOAvar='P')

In [50]:
def MLD_at_latlon(lats=40,lons=-20, mlddatas=mldX, landseadat=landsea, justMLD=True):
    """This function returns MLD depth across the year at a specific location"""
    #ToDO include check whether the spot is on land or water
    #Make sure the lat lon usage across everything is coherent!
    mld1 = mlddatas.drop_dims(['nlines1','nlines2','nlines3','nprf'])
    mld2 = mld1.sel(lat=lats,lon=lons,method='nearest').to_dataframe()
    ls = landseadat.sel(lat=lats,lon=180+lons,method='nearest').variables['LSMASK'].values
    mld2['time'] = mld2.index
    mld2.index = range(1,13)
    mld2.index.names = ['time']
    if ls!=0:
        return pd.DataFrame()
    elif justMLD==True:
        return mld2[['mld_mindtr02_rmoutliers_smth_okrg']]#.reset_index()
    else:
        return mld2#.reset_index()

In [67]:
a = MLD_at_latlon(40,-20,mldX).reset_index()

a.to_csv('../MLD_40minus20.csv')

In [52]:
#WOA = WOADatInLocation(WOAnitrate,lats=40,lons=-20,WOAvar='N')

In [53]:
def interpolateWOAprofiles(WOAdat,var):
    """this function interpolates the WOA profiles to a resolution of 1 meter"""
    WOAgrid = WOAdat.unstack(level=1)
    if var=='Temp':
        WOAgrid1 = WOAgrid.reindex(range(0,1501))
        return WOAgrid1.interpolate(method='linear')
    elif var=='N' or var=='P' or var=='Si':
        WOAgrid1 = WOAgrid.reindex(range(0,801))
        return WOAgrid1.interpolate(method='linear')

In [54]:
#WOA_int = interpolateWOAprofiles(WOA,'N')

In [55]:
#WOA_int

In [56]:
def ValueBelowMLD(WOAint, MLD):
    """this function filters all temperature measurements above a certain depth and takes the mean"""
    WOAint_d1 = WOAint
    WOAint_d2 = WOAint_d1
    WOAint_d2.columns = WOAint_d1.columns.droplevel()
    WOAx = WOAint_d2.reset_index()
    out = []
    for i in range(1,13):
        out.append({'Month':i,'Conc':WOAx[WOAx['depth'] > MLD[MLD['time']==i]['mld_mindtr02_rmoutliers_smth_okrg'].values[0]][i].iloc[0:5].mean()})
    return pd.DataFrame(out)

In [57]:
def MeanAboveMLD(WOAint, MLD):
    """this function filters all temperature measurements above a certain depth and takes the mean"""
    WOAint_d1 = WOAint
    WOAint_d1.columns = WOAint.columns.droplevel()
    WOAx = WOAint_d1.reset_index()
    out = []
    for i in range(1,13):
        out.append({'Month':i,'Temp':WOAx[WOAx['depth'] < MLD[MLD['time']==i]['mld_mindtr02_rmoutliers_smth_okrg'].values[0]][i].mean()})
    return pd.DataFrame(out)

In [58]:
def ReturnAnalyzedWOAdata(WOAdat, lats=40, lons=-20, mlddat=mldX, WOAvar='Temp', var='t_an'):
    
    MLD = MLD_at_latlon(lats,lons,mlddat).reset_index()
    if MLD.empty == True:
        return MLD
    else:
        WOA = WOADatInLocation(WOAdat,lats=lats,lons=lons,WOAvar=WOAvar)
        WOA_int = interpolateWOAprofiles(WOA,WOAvar)
        if WOAvar=='Temp':
            return MeanAboveMLD(WOA_int,MLD)
        elif WOAvar=='N' or WOAvar=='P' or WOAvar=='Si':
            return ValueBelowMLD(WOA_int,MLD)

## QUESTIONS that remain:

- to what depth should I take the mean when calculating N0 ?   
- (for all nuts, inlc P & Si this matters)
- any use for oxygen & AOU? (this is in WOA2018 as well)
- and Salinity?

In [65]:
a = ReturnAnalyzedWOAdata(WOAtemp,40,-20,WOAvar='Temp')
a.to_csv('../Tmld_40minus20.csv')

# START CREATING NCDF FILE

In [97]:
from netCDF4 import Dataset

In [None]:
rootgrp = Dataset('01_netcdf4.nc', 'w', format='NETCDF4')

import time
rootgrp.description = 'WOA 2018 Data analyzed with MLD climatology'
rootgrp.history = "Created " + time.ctime(time.time())
rootgrp.source = "WOA2018 + DeBoyer Montague et al. 2004"

lats = np.arange(-89.5,90.5,1)
lons = np.arange(-179.5,180.5,1)
times = np.arange(1,13,1)

var = rootgrp.createDimension("var", None)
time = rootgrp.createDimension("time", len(times))
lat = rootgrp.createDimension("lat", len(lats))
lon = rootgrp.createDimension("lon", len(lons))
print(rootgrp.dimensions)

varss = rootgrp.createVariable("var","f8",("lat","lon","time"))
month = rootgrp.createVariable("time","i4",("time",))
latitudes = rootgrp.createVariable("lat","f4",("lat",))
longitudes = rootgrp.createVariable("lon","f4",("lon",))


latitudes.units = "degrees north"
longitudes.units = "degrees east"

month.units = "climatological months"
month.calendar = "gregorian"

latitudes[:] = lats
longitudes[:] = lons
month[:] = times

for i in range(len(lats)):
    for j in range(len(lons)):
        print(lats[i],lons[j])
        MAMLD = ReturnAnalyzedWOAdata(i,j) #HERE NEED TO FIX!
        for k in range(len(times)):
            if MAMLD.empty == True:
                varss[i, j, k] = np.nan
            else:
                varss[i, j, k] = MAMLD['Temp'].values[k]


rootgrp.close()

OrderedDict([('var', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'var', size = 0
), ('time', <class 'netCDF4._netCDF4.Dimension'>: name = 'time', size = 12
), ('lat', <class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 180
), ('lon', <class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 360
)])
-89.5 -179.5
-89.5 -178.5
-89.5 -177.5
-89.5 -176.5
-89.5 -175.5
-89.5 -174.5
-89.5 -173.5
-89.5 -172.5
-89.5 -171.5
-89.5 -170.5
-89.5 -169.5
-89.5 -168.5
-89.5 -167.5
-89.5 -166.5
-89.5 -165.5
-89.5 -164.5
-89.5 -163.5
-89.5 -162.5
-89.5 -161.5
-89.5 -160.5
-89.5 -159.5
-89.5 -158.5
-89.5 -157.5
-89.5 -156.5
-89.5 -155.5
-89.5 -154.5
-89.5 -153.5
-89.5 -152.5
-89.5 -151.5
-89.5 -150.5
-89.5 -149.5
-89.5 -148.5
-89.5 -147.5
-89.5 -146.5
-89.5 -145.5
-89.5 -144.5
-89.5 -143.5
-89.5 -142.5
-89.5 -141.5
-89.5 -140.5
-89.5 -139.5
-89.5 -138.5
-89.5 -137.5
-89.5 -136.5
-89.5 -135.5
-89.5 -134.5
-89.5 -133.5
-89.5 -132.5
-89.5 -131.5
-89.5 -130.5
-89.5 -129.5
-89.5 -128.5
-89

In [22]:
testdata = xr.open_dataset('16_netcdf4.nc')
testdata.variables

Frozen(OrderedDict([('var', <xarray.Variable (lat: 180, lon: 360, time: 12)>
[777600 values with dtype=float64]), ('time', <xarray.IndexVariable 'time' (time: 12)>
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32)
Attributes:
    units:     climatological months
    calendar:  gregorian), ('lat', <xarray.IndexVariable 'lat' (lat: 180)>
array([-89.5, -88.5, -87.5, -86.5, -85.5, -84.5, -83.5, -82.5, -81.5, -80.5,
       -79.5, -78.5, -77.5, -76.5, -75.5, -74.5, -73.5, -72.5, -71.5, -70.5,
       -69.5, -68.5, -67.5, -66.5, -65.5, -64.5, -63.5, -62.5, -61.5, -60.5,
       -59.5, -58.5, -57.5, -56.5, -55.5, -54.5, -53.5, -52.5, -51.5, -50.5,
       -49.5, -48.5, -47.5, -46.5, -45.5, -44.5, -43.5, -42.5, -41.5, -40.5,
       -39.5, -38.5, -37.5, -36.5, -35.5, -34.5, -33.5, -32.5, -31.5, -30.5,
       -29.5, -28.5, -27.5, -26.5, -25.5, -24.5, -23.5, -22.5, -21.5, -20.5,
       -19.5, -18.5, -17.5, -16.5, -15.5, -14.5, -13.5, -12.5, -11.5, -10.5,
        -9.5,  -8.5,  -7.5,