# create repackaged WOA DATA for each Dataset

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import glob

## MLD data-set to use:
new one "MLD_minDTR02"

variable is: "mld_mindtr02_rmoutliers_smth_okrg"

In [2]:
mldX = xr.open_dataset('../MLDClimatology_DeBoyerMontagut/2019_11_07_data_L3_mldmindtr02_c1m_sameasSent2MHRio20141103__Sent2BenjaminPostUcsdBremen/mld_mindtr02_l3.nc')

In [3]:
#landsea = xr.open_dataset('LandSeaMask/landsea.nc')

In [4]:
#TempNCDFfiles = glob.glob('../WOA2018/Temperature/*.nc', recursive=True)
#WOAtemp = xr.open_mfdataset(TempNCDFfiles,combine='by_coords',decode_times=False)

In [5]:
NitrateNCDFfiles = glob.glob('../WOA2018/Nitrate/*.nc', recursive=True)
WOAnitrate = xr.open_mfdataset(NitrateNCDFfiles,combine='by_coords',decode_times=False)

In [6]:
#PhosphateNCDFfiles = glob.glob('WOA2018/Phosphate/*.nc', recursive=True)
#WOAphosphate = xr.open_mfdataset(PhosphateNCDFfiles,combine='by_coords',decode_times=False)

In [7]:
#SilicateNCDFfiles = glob.glob('WOA2018/Silicate/*.nc', recursive=True)
#WOAsilicate = xr.open_mfdataset(SilicateNCDFfiles,combine='by_coords',decode_times=False)

In [8]:
#WOAsilicate.variables

## now let's try all the other WOA2018 datasets
- and finally extract forcing for one or two specific locations, and use these for test implemetation towards the model


In [9]:
def WOADatInLocation(WOAfile, lats=40,lons=-20, WOAvar='Temp', justValPerDepth=True):
    """This function returns the xarray of Temperature in a specific location"""
    
    WOAdat = WOAfile.sel(nbounds=0)
    WOA_df = WOAdat.sel(lat=lats,lon=lons, method='nearest').to_dataframe()
    
    if WOAvar=='Temp':
        var='t_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,57) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    elif WOAvar=='N' or WOAvar=='N_above':
        var='n_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,43) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    elif WOAvar=='P' or WOAvar=='P_above':
        var='p_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,43) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    elif WOAvar=='Si' or WOAvar=='Si_above':
        var='i_an'
        WOA_df.index = pd.MultiIndex.from_arrays([WOA_df.index.get_level_values(level='depth'), [i for j in range(0,43) for i in range(1,13)]])
        WOA_df.index.names = ['depth','time']
    
    if justValPerDepth==True:
        return WOA_df[[var]]#.reset_index()
    else:
        return WOA_df#.reset_index()

In [10]:
#WOADatInLocation(WOAphosphate,WOAvar='P')

In [11]:
def MLD_at_latlon(lats=40,lons=-20, mlddatas=mldX, justMLD=True):
    """This function returns MLD depth across the year at a specific location"""
    #ToDO include check whether the spot is on land or water
    #Make sure the lat lon usage across everything is coherent!
    mld1 = mlddatas.drop_dims(['nlines1','nlines2','nlines3','nprf'])
    mld2 = mld1.sel(lat=lats,lon=lons,method='nearest').to_dataframe()
    #ls = landseadat.sel(lat=lats,lon=180+lons,method='nearest').variables['LSMASK'].values
    mld2['time'] = mld2.index
    mld2.index = range(1,13)
    mld2.index.names = ['time']
    #if ls!=0:
    #    return pd.DataFrame()
    if justMLD==True:
        return mld2[['mld_mindtr02_rmoutliers_smth_okrg']]#.reset_index()
    else:
        return mld2#.reset_index()

In [12]:
#WOA = WOADatInLocation(WOAnitrate,lats=40,lons=-20,WOAvar='N')

In [13]:
def interpolateWOAprofiles(WOAdat,var):
    """this function interpolates the WOA profiles to a resolution of 1 meter"""
    WOAgrid = WOAdat.unstack(level=1)
    if var=='Temp':
        WOAgrid1 = WOAgrid.reindex(range(0,1501))
        return WOAgrid1.interpolate(method='linear')
    elif var=='N' or var=='P' or var=='Si':
        WOAgrid1 = WOAgrid.reindex(range(0,801))
        return WOAgrid1.interpolate(method='linear')
    elif var=='N_above' or var=='P_above' or var=='Si_above':
        WOAgrid1 = WOAgrid.reindex(range(0,801))
        return WOAgrid1.interpolate(method='linear')

In [14]:
#WOA_int = interpolateWOAprofiles(WOA,'N')

In [15]:
#WOA_int

In [44]:
def ValueBelowMLD(WOAint, MLD):
    """this function filters all temperature measurements above a certain depth and takes the mean"""
    WOAint_d1 = WOAint
    WOAint_d2 = WOAint_d1
    WOAint_d2.columns = WOAint_d1.columns.droplevel()
    WOAx = WOAint_d2.reset_index()
    out = []
    print(WOAx[WOAx['depth'] > MLD[MLD['time']==1]['mld_mindtr02_rmoutliers_smth_okrg'].values[0]][1].iloc[0:10])
    for i in range(1,13):
        out.append({'Month':i,'Conc':WOAx[WOAx['depth'] > MLD[MLD['time']==i]['mld_mindtr02_rmoutliers_smth_okrg'].values[0]][i].iloc[0:10].mean()})
    return pd.DataFrame(out)

In [45]:
def MeanAboveMLD(WOAint, MLD):
    """this function filters all temperature measurements above a certain depth and takes the mean"""
    WOAint_d1 = WOAint
    WOAint_d1.columns = WOAint.columns.droplevel()
    WOAx = WOAint_d1.reset_index()
    out = []
    print(WOAx[WOAx['depth'] < MLD[MLD['time']==1]['mld_mindtr02_rmoutliers_smth_okrg'].values[0]][1])
    for i in range(1,13):
        out.append({'Month':i,'Conc':WOAx[WOAx['depth'] < MLD[MLD['time']==i]['mld_mindtr02_rmoutliers_smth_okrg'].values[0]][i].mean()})
    return pd.DataFrame(out)

In [46]:
def ReturnAnalyzedWOAdata(WOAdat, lats=40, lons=-20, mlddat=mldX, WOAvar='Temp', var='t_an'):
    
    MLD = MLD_at_latlon(lats,lons,mlddat).reset_index()
    if MLD.empty == True:
        return MLD
    else:
        WOA = WOADatInLocation(WOAdat,lats=lats,lons=lons,WOAvar=WOAvar)
        WOA_int = interpolateWOAprofiles(WOA,WOAvar)
        if WOAvar=='Temp':
            return MeanAboveMLD(WOA_int,MLD)
        elif WOAvar=='N' or WOAvar=='P' or WOAvar=='Si':
            return ValueBelowMLD(WOA_int,MLD)
        elif WOAvar=='N_above' or WOAvar=='P_above' or WOAvar=='Si_above':
            return MeanAboveMLD(WOA_int,MLD)

In [47]:
lat = -10
lon = -110

In [48]:
ReturnAnalyzedWOAdata(WOAnitrate,lat,lon,WOAvar='N') - ReturnAnalyzedWOAdata(WOAnitrate,lat,lon,WOAvar='N_above')

40    9.510378
41    9.383310
42    9.256243
43    9.129176
44    9.002109
45    8.875041
46    8.762367
47    8.649693
48    8.537020
49    8.424346
Name: 1, dtype: float32
0     10.789656
1     10.778853
2     10.768050
3     10.757248
4     10.746445
5     10.735642
6     10.696705
7     10.657767
8     10.618830
9     10.579892
10    10.540955
11    10.573217
12    10.605480
13    10.637744
14    10.670007
15    10.702270
16    10.686105
17    10.669940
18    10.653774
19    10.637609
20    10.621445
21    10.547772
22    10.474101
23    10.400429
24    10.326757
25    10.253085
26    10.224511
27    10.195937
28    10.167363
29    10.138789
30    10.110215
31    10.027373
32     9.944531
33     9.861691
34     9.778849
35     9.696007
36     9.658881
37     9.621756
38     9.584629
39     9.547503
Name: 1, dtype: float32


Unnamed: 0,Month,Conc
0,0,-1.414227
1,0,-0.07442
2,0,-0.146029
3,0,-0.099424
4,0,1.124047
5,0,1.273954
6,0,-0.125897
7,0,-0.079595
8,0,-0.111808
9,0,-1.418342


## QUESTIONS that remain:

- to what depth should I take the mean when calculating N0 ?   
- (for all nuts, inlc P & Si this matters)
- any use for oxygen & AOU? (this is in WOA2018 as well)
- and Salinity?

# START CREATING NCDF FILE

In [44]:
from netCDF4 import Dataset

In [45]:
WOAnitrate = WOAnitrate.persist()
mldX = mldX.persist()

In [46]:
rootgrp = Dataset('NitrateAboveMLD_WOA_tmld_test01.nc', 'w', format='NETCDF4')

import time
rootgrp.description = 'WOA 2018 Data analyzed with MLD climatology'
rootgrp.history = "Created " + time.ctime(time.time())
rootgrp.source = "WOA2018 + DeBoyer Montague et al. 2004"

lat_r = np.arange(-89.5,90.5,1)
lon_r = np.arange(-179.5,180.5,1)
time_r = np.arange(1,13,1)

var = rootgrp.createDimension("n0", None)
time = rootgrp.createDimension("time", len(time_r))
lat = rootgrp.createDimension("lat", len(lat_r))
lon = rootgrp.createDimension("lon", len(lon_r))


varss = rootgrp.createVariable("n0","f8",("lat","lon","time"))
month = rootgrp.createVariable("time","i4",("time",))
latitudes = rootgrp.createVariable("lat","f4",("lat",))
longitudes = rootgrp.createVariable("lon","f4",("lon",))


latitudes.units = "degrees north"
longitudes.units = "degrees east"
varss.units = "degrees centigrade"

month.units = "climatological months"
month.calendar = "gregorian"

latitudes[:] = lat_r
longitudes[:] = lon_r
month[:] = time_r

                
for i in range(len(lat_r)):
    print(i/len(lat_r)*100, end="\r")
    for j in range(len(lon_r)):
        MAMLD = ReturnAnalyzedWOAdata(WOAnitrate,lat_r[i],lon_r[j],WOAvar='N_above') #HERE NEED TO FIX!
        for k in range(len(time_r)):
            if MAMLD.empty == True:
                varss[i, j, k] = np.nan
            else:
                varss[i, j, k] = MAMLD['Conc'].values[k]


rootgrp.close()

99.444444444444446

In [21]:
testdata = xr.open_dataset('Temp_WOA_tmld_test02.nc')
testdata.variables

Frozen({'t_mld': <xarray.Variable (lat: 180, lon: 360, time: 12)>
[777600 values with dtype=float64]
Attributes:
    units:    degrees centigrade, 'time': <xarray.IndexVariable 'time' (time: 12)>
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32)
Attributes:
    units:     climatological months
    calendar:  gregorian, 'lat': <xarray.IndexVariable 'lat' (lat: 180)>
array([-89.5, -88.5, -87.5, -86.5, -85.5, -84.5, -83.5, -82.5, -81.5, -80.5,
       -79.5, -78.5, -77.5, -76.5, -75.5, -74.5, -73.5, -72.5, -71.5, -70.5,
       -69.5, -68.5, -67.5, -66.5, -65.5, -64.5, -63.5, -62.5, -61.5, -60.5,
       -59.5, -58.5, -57.5, -56.5, -55.5, -54.5, -53.5, -52.5, -51.5, -50.5,
       -49.5, -48.5, -47.5, -46.5, -45.5, -44.5, -43.5, -42.5, -41.5, -40.5,
       -39.5, -38.5, -37.5, -36.5, -35.5, -34.5, -33.5, -32.5, -31.5, -30.5,
       -29.5, -28.5, -27.5, -26.5, -25.5, -24.5, -23.5, -22.5, -21.5, -20.5,
       -19.5, -18.5, -17.5, -16.5, -15.5, -14.5, -13.5, -12.5, -11.5, -10.

In [23]:
testdata.sel(lat=40,lon=-20, method='nearest')['t_mld']

<xarray.DataArray 't_mld' (time: 12)>
array([14.735965, 14.203766, 14.04381 , 14.488364, 15.369067, 17.386347,
       19.672104, 20.914284, 20.832928, 19.515116, 17.51606 , 15.989733])
Coordinates:
  * time     (time) int32 1 2 3 4 5 6 7 8 9 10 11 12
    lat      float32 40.5
    lon      float32 -19.5
Attributes:
    units:    degrees centigrade