# FK17 NetCDF Production

The following script is used to produce a NetCDF4 file containing the data on the _FK17_ sheet of the _Datasets_IC12_FK17_TIR18_SarahWauthy.xlsx_ file.

In [None]:
import numpy as np
from netCDF4 import Dataset
import pandas as pd
import os
import bisect


Files are pulled and saved from the local working directory so we use strings with directory filepaths and the os module to switch between directories for loading and saving.

In [None]:
rawfiledirectory = r"C:\Users\Alfie\Desktop\IMAS\ncfilesraw"
outfiledirectory = r"C:\Users\Alfie\Desktop\IMAS\ncfiles"
os.chdir(rawfiledirectory)
filename = 'Datasets_IC12_FK17_TIR18_SarahWauthy.xlsx'
sheet = 'FK17'

Data is read in.

In [None]:
chemistry = pd.read_excel(filename, sheet_name = sheet, 
                          header=3, usecols = "D:J").to_numpy()

Get the agemodel depths and years into separate arrays (easier for bisect)

In [None]:
agedepths = pd.read_excel(filename, sheet_name = sheet, 
                          header=3, usecols = "A", nrows=49).squeeze()
ageyears = pd.read_excel(filename, sheet_name = sheet, 
                         header=3, usecols = "B", nrows=49).squeeze()

Sample depths actually extend beyond last value in agemodel, so this nan must be filled with a value for bisect to work

In [None]:
agedepths[48] = 100; ageyears[48] = 1969

Stack a zeros column (index 6) alongside the chemistry data for a year dim

In [None]:
z = np.zeros((len(chemistry),1), dtype=int)
chemistry = np.append(chemistry, z, axis=1)
#Roll sample id to be in index 7
chemistry = np.roll(chemistry, -1, 1)

Records which cross a depth-year boundary in the age model are split into two
records with the boundary depth taking the top/bottom depth in either split,
with all chemistry data being copied into both.
Bisect returns the index for this depth in agedepths, giving the following
index if equal to a depth-year boundary.

In [None]:
for i in range(len(chemistry)): #1142
    # Get agemodel indices for top and bottom depths
    if(not(pd.isna(chemistry[i,0]))):
        t = bisect.bisect(agedepths, chemistry[i,0]) 
        b = bisect.bisect(agedepths, chemistry[i,1])
        chemistry[i,6] = ageyears[t]
        if(t != b):
            split = chemistry[i] #duplicate the record that crosses a boundary
            #set the top of the split sample to be the boundary, append
            split[0] = agedepths[bisect.bisect(agedepths, chemistry[i,0])]
            split[6] = ageyears[b] #Set new year
            chemistry = np.vstack([chemistry, split])
            
            #set bottom of original sample to be the boundary
            chemistry[i,1] = agedepths[bisect.bisect(agedepths,
                                                     chemistry[i,0])] 

Then sort in ascending order of topdepth.

In [None]:
chemistry = chemistry[chemistry[:, 0].argsort()]

Next we change to the save directory and create a netCDF file.

In [None]:
os.chdir(outfiledirectory)
ncout = Dataset('FK17.nc','w','NETCDF4') # using netCDF3 for output format 
ncout.description = "Preliminary FK17 data, agemodel unconfirmed. FK17 position: -70.536501S, 24.075568E"

Create the topdepth dimension and all of the chemistry variables.

In [None]:
ncout.createDimension('topdepth',size=None)
yearvar = ncout.createVariable('year','int',('topdepth'))
yearvar[:] = chemistry[:,6]
tdvar = ncout.createVariable('topdepth','float32',('topdepth'))
tdvar[:] = chemistry[:,0]
thickvar = ncout.createVariable('thickness', 'float32',('topdepth'),fill_value=np.nan)
thickvar.description = "Thickness of this sample"
navar = ncout.createVariable('Na', 'float32',('topdepth'),fill_value=np.nan)
navar.units = "ppb"
navar[:] = chemistry[:,3]
msavar = ncout.createVariable('MSA', 'float32',('topdepth'),fill_value=np.nan)
msavar.units = "ppb"
msavar[:] = chemistry[:,4]
so4var = ncout.createVariable('SO4', 'float32',('topdepth'),fill_value=np.nan)
so4var.units = "ppb"
so4var[:] = chemistry[:,5]
sampleidvar = ncout.createVariable('sample_id','str',('topdepth'))
sampleidvar[:] = chemistry[:,7]

Insert all the data.

In [None]:
for i in range(len(chemistry)):
    thickvar[i] = chemistry[i,1]-chemistry[i,0]    

In [None]:
ncout.close()