In [8]:
import numpy as np
import pandas as pd
import os
import pickle

In [9]:
#exports an excel file of all data contained within the pickle files within each folder to the current working directory

#user inputs
#folder name of the location (i.e. axial_base, oregon_offshore, oregon_slope)
locationName = "xxxx"

#Input the following based on Operating System:
#For windows, input "\\"
#For Mac, Unix, Ubuntu, input "/"
#e.x for windows: osType = "\\"
#by default, it is set to \\ for windows

osType = "\\"

In [10]:
#set up pathing
directory = os.getcwd()
#relPath = os.path.relpath(locationName, directory)
path = directory + osType + locationName

In [11]:
# function to extract data from pickle files
def getData(path):

    sound_speed = []
    depth_mean = []
    dates = []

    os.chdir(path)

    for file in os.listdir(path):
        with open(file, 'rb') as f:
            loadFile = pickle.load(f)

            # Get date for 1D array
            date = file.replace(".pkl","")
            #dates into an array
            dates.append(date)

            # Speed of sound put into array
            sound_speed.append(loadFile.parameter_mean)

            # Depth_mean into array
            depth_mean.append(loadFile.depth_mean) 
            
    f.close()

    dates = np.array(dates)
    sound_speed = np.array(sound_speed)
    depth_mean = np.array(depth_mean)
    return dates, depth_mean, sound_speed

In [12]:
# interpolation function
# interpolates values between two numbers 
# input: 2 1D arrays: 1 is depth, 1 is sound speed
# output: 1D array of interpolated sound speeds with respect to bin sizes of the depth (1m in this case)
# make new empty array with dimensions of the depth (3000 in this case)
# make equal size bins (1m)
# if you can interpolate, then insert interpolated value, if not: then append.NaN

def soundSpeedInterpolation(depth, soundSpeed):

    # interpolation conditions
    # if not NaN & i+1 is not NaN -> interpolate btwn i and i+1
    # else -> NaN
    
    interpValues = []
    jstart = 0
    
    #i goes through all the depths of set bins of 0-2999 depths each by seperated by 1m
    for i in range (0,3000):
        
        #checks if the next depth has no data, if no data, then insert a nan value since we can skip the interpolation function
        if np.isnan(depth[jstart+1]):
            interpValues.append(np.nan)
        else:
            #found represents if an interpolated depth has been found
            found = False
            lendepth = len(depth)-1
            
            # target depth
            x = i
            
            # checking if nan value in the depth array, if it hits a nan value, then immediately goes to i array to fill 
            # rest of array with nan values
             
            # j goes through the array of actual depths and interpolating based on those depths
            # tries to interpolate each specified depth, but if it can't find the specified in the data, then insert
            # nan value, then check for the next depth
            
            for j in range (jstart, lendepth):
                
                
                #if the current value nan value, then break and insert nan value
                if np.isnan(depth[j]):
                    break

                # for efficiency of the process
                lowerDepth = depth[j]
                upperDepth = depth[j+1]
                lowerSpeed = soundSpeed[j]
                upperSpeed = soundSpeed[j+1]
                
                # interpolation function 
                if x >= lowerDepth and x < upperDepth and upperDepth - lowerDepth < 2:
                    slope = (upperSpeed - lowerSpeed) / (upperDepth - lowerDepth)

                    # interpolation
                    interpSoundSpeed = slope * (x - lowerDepth) + lowerSpeed
                    
                    found = True
                    
                    # makes loops more efficient by last known value
                    jstart = j+1
                    break
            
            if found:
                interpValues.append(interpSoundSpeed)
            else: 
                interpValues.append(np.nan)
            
    interpValues = np.array(interpValues)
    return interpValues

In [13]:
# Organize data into 2D array (Columns = Dates, Rows = Depth, each entry = sound speed)
# Interpolate sound speed at desired depths
# plot 2D array using contourf
# contourf args: X = 1D array of dates, Y = 1D array of depths, Z = 2D array of sound speeds @ Depth & Date
# N = 2193, M = 3000

def organizeData(depth_mean, sound_speed):

    Z = []
    
    # columns = dates (i), rows = depth (j)
    # Get Z values
    for i in range(0,len(depth_mean)):
        Z.append(soundSpeedInterpolation(depth_mean[i], sound_speed[i]))

    Z = np.array(Z)
    return Z

In [14]:
#sets excel file name
excelFile = locationName + ".csv"

#get data
dataTuple = getData(path)
dates = dataTuple[0]
depth_mean = dataTuple[1]
sound_speed = dataTuple[2]

#organize data into pandas dataframe
rawData = organizeData(depth_mean, sound_speed)
data = rawData.T
data = pd.DataFrame(data)

#sets dates for the data in the pandas dataframe
data = data.set_axis(dates, axis=1, inplace=False)

#export to excel
os.chdir(directory)
data.to_csv(excelFile, index = False, na_rep='NaN')