In [None]:
import numpy as np
import pandas as pd
import pickle
import os
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
import re

In [None]:
#User inputs

#4 file names for the data that wants to be loaded
#NOTE: Excel Generation generates a CSV post change on 8/16/22, code has been adjusted here to read CSV files
#NOTE: These files should be in the working directory of the .ipynb file
file1 = "axial_base_2015-01-01_2022-01-01"
file2 = "oregon_offshore_2015-01-01_2022-01-01"
file3 = "oregon_shelf_2015-01-01_2022-01-01"
file4 = "oregon_slope_2015-01-01_2022-01-01"

#input location name without _ 
#is not case sensitive
focusLocation = "oregon offshore"

In [None]:
#format fileName into strings for use later. 
#index _ in string.
def fileNameAttributes(fileName):
    char = "_"
    indices = [i.start() for i in re.finditer(char,fileName)]

    #grab strings
    locationName = fileName[:indices[1]]
    startDate = fileName[indices[1]+1:indices[2]]
    endDate = fileName[indices[2]+1:]

    #format locationName into title format
    locationName = locationName.replace("_"," ").title()
    
    return locationName, startDate, endDate

In [None]:
# find local maxima function
# finds local maxima in the first 200m range area
# returns the depth of the max sound speed and the max sound speed
def localMaxima(dayData):
    
    localMaximaDepth = 0
    maxSoundSpeed = 0
    
    for i in range(0,201):
        if not np.isnan(dayData[i]) and dayData[i] > maxSoundSpeed:
            localMaximaDepth = i
            maxSoundSpeed = dayData[i]
            
    if localMaximaDepth == 0:
        localMaximaDepth = np.nan
            
    return localMaximaDepth, maxSoundSpeed

In [None]:
#converts dates into strings to be used in pandas data fetching
def convertDate(date):
    year = str(date.date().year)
        
    month = date.date().month
    if month < 10:
        month = '0' + str(month)
    else:
        month = str(month)
        
    day = date.date().day
    if day < 10:
        day = '0' + str(day)
    else:
        day = str(day)
        
    #combines values into a date usable by Pandas 
    date = year + '-' + month + '-' + day
    return date

In [None]:
#10m to local maxima data storage
def ThirtyMAndLocalMaxima(data, startDate, endDate):

    dates = pd.date_range(startDate, endDate)
    numColumns = len(data.columns)
    
    #set up arrays
    TenM = []
    speedLocalMaxima = []
    depthLocalMaxima = []
    
    #loop through the dates
    #add local maximas and speed of sound at 10m to arrays
    for i in range(0, numColumns):
        
        date = convertDate(dates[i])
        
        #adds speed of sound at 10m and local maximas to lists
        TenMSpeed = data[date].values[30]
        TenM.append(TenMSpeed)
        
        localMax = 0
        
        localMax = localMaxima(data[date])
        
        LocalMaximaDepth = localMax[0]
        depthLocalMaxima.append(LocalMaximaDepth)
        
        LocalMaximaSpeed = localMax[1]
        speedLocalMaxima.append(LocalMaximaSpeed)
    
    #sets up dictionary for the lists, then converts it to a pandas dataframe and returns it
    d = {'Dates': dates, 'Depth Local Maxima': depthLocalMaxima, 'Speed Local Maxima': speedLocalMaxima, '30m Sound Speed': TenM,}
    storeData = pd.DataFrame(data=d)
    return storeData

In [None]:
#only works for 30 meter data 
def excludeFlat(data):
    tracker = []
    
    depthData = data["Depth Local Maxima"].to_numpy()
    
    for i in range(0,len(depthData)):
        
        pt = depthData[i]
        
        if i == len(depthData)-1:
            tracker.append(True)
            break
        
        if abs(depthData[i]-depthData[i+1]) <= 3:
            tracker.append(False)
        else:
            tracker.append(True)
            
    newData = data.loc[tracker,:]
    
    return newData

In [None]:
def averageSoundSpeed(data, startDate, endDate):
   
    dates = pd.date_range(startDate, endDate)
    numColumns = len(data.columns)
    
    #set up arrays
    speedAvgs = []
    
    #goes through every depth and gets average speed
    for i in range (0,201):
        
        speedSum = 0
        countNaN = 0
        
        for j in range(0, numColumns):
            
            date = convertDate(dates[j])

            speed = data[date].values[i]
            
            if not np.isnan(speed):
                speedSum = speedSum + speed
            else:
                countNaN = countNaN + 1
        if speedSum == 0:
            speedAvgs.append(np.nan)
        else:
            speedAvg = speedSum / (numColumns - countNaN)
            speedAvgs.append(speedAvg)
            
    #sets up dictionary for the lists, then converts it to a pandas dataframe and returns it
    depth = list(range(0,201))
    d = {'Average Speed of Sound': speedAvgs, 'Depth': depth}
    storeData = pd.DataFrame(data=d)
    return storeData

In [None]:
#extrapolation function to get the sound speed
#takes in dateData
#extrapolates data at 30m 
#uses linear extrapolation function because data is mostly linear in the 0 to 50m range
#returns a tuple of data: depth, sound speed
def extrapolateSoundSpeed(data, date):
    #check if data exist at 30m
    depth = 30
    soundSpeed = data[date].values[depth]
    if not np.isnan(soundSpeed):
        return depth, soundSpeed
    
    #sets up for later data checks
    depthAbove30 = np.nan
    dataAbove30 = True
    depthBelow30 = np.nan
    dataBelow30 = True
    
    #10m tolerance as data likely won't be accurate outside of that range
    
    #check for data above 30m 
    for i in range (20,30):
        soundSpeed = data[date].values[i]
        if not np.isnan(soundSpeed):
            depthAbove30 = i
    
    if not np.isnan(depthAbove30):
        soundSpeedAbove30 = data[date].values[depthAbove30]
    else:
        dataAbove30 = False
    
    #check for data below 30m
    for j in range (31,40):
        soundSpeed = data[date].values[j]
        if not np.isnan(soundSpeed):
            depthBelow30 = j
            #need break to get closest data point to 30
            break
    
    if not np.isnan(depthBelow30):
        soundSpeedBelow30 = data[date].values[depthBelow30]
    else:
        dataBelow30 = False
        
    if dataAbove30 == False and dataBelow30 == False:
        depth = np.nan
        soundSpeed = np.nan
        return depth, soundSpeed
    
    #interpolation if data above and below 30m
    if not np.isnan(depthAbove30) and not np.isnan(depthBelow30):
        slope = (soundSpeedAbove30 - soundSpeedBelow30) / (depthAbove30 - depthBelow30)
        interpSoundSpeed = slope * (35 - depthBelow30) + soundSpeedBelow30
        depth = 35
        return depth, interpSoundSpeed
    
    #extrapolation above 30m
    if not np.isnan(depthAbove30) and np.isnan(depthBelow30):
        
        slope = np.nan
        exterpSoundSpeed = np.nan
        
        for i in range(20,30):
            current = data[date].values[i]
            infront = data[date].values[i + 1]
            
            if not np.isnan(infront):
                slope = (infront - current) / ((i+1) - i)
                exterpSoundSpeed = slope * (30 - i) + current
        
        return slope, exterpSoundSpeed
    
    #extrapolation below 30m
    if np.isnan(depthAbove30) and not np.isnan(depthBelow30):
        
        slope = np.nan
        exterpSoundSpeed = np.nan
        
        for i in range(31,40):
            current = data[date].values[i]
            infront = data[date].values[i + 1]
            
            if not np.isnan(infront):
                slope = (infront - current) / ((i+1) - i)
                exterpSoundSpeed = slope * (30 - i) + current
                #return here because, need slope closest to 30m
                return slope, exterpSoundSpeed

    #no fullfills no conditions? Return nan
    depth = np.nan
    soundSpeed = np.nan
    return depth, soundSpeed

In [None]:
#Sound channel dimensions finder
#takes in the base data and uses the ThirtyMAndLocalMaxima function to help find the sound channel dimensions
def soundChannelDim(data, startDate, endDate):
    
    #set dateTuple data to variables
    dates = pd.date_range(start=startDate,end=endDate)
    numColumns = len(data.columns)
    
    ThirtyM_LMM_Data = ThirtyMAndLocalMaxima(data, startDate, endDate)
    
    lengthArray = []
    widthArray = []
    
    for i in range(0,ThirtyM_LMM_Data.shape[0]):
        #length calculation
        length = np.nan
        width = np.nan
        
        date = convertDate(dates[i])
        
        shallowTuple = extrapolateSoundSpeed(data, date)
        
        shallowDepth = shallowTuple[0]
        shallowSoundSpeed = shallowTuple[1]
        
        if not np.isnan(shallowDepth):
            localMaximaDepth = ThirtyM_LMM_Data['Depth Local Maxima'].values[i]
            length = localMaximaDepth - shallowDepth
            
        lengthArray.append(length)
        
        #width calculation
        if not np.isnan(shallowSoundSpeed):
            localMaximaSoundSpeed = ThirtyM_LMM_Data['Speed Local Maxima'].values[i]
            width = localMaximaSoundSpeed - shallowSoundSpeed
            
        widthArray.append(width)

    d = {'Dates': dates, 'Sound Channel Length': lengthArray, 'Sound Channel Width': widthArray}
    storeData = pd.DataFrame(data=d)
    return storeData        

In [None]:
#Load Data for all 4 datasets
location1 = pd.read_csv(file1 + ".csv")
location2 = pd.read_csv(file2 + ".csv")
location3 = pd.read_csv(file3 + ".csv")
location4 = pd.read_csv(file4 + ".csv")

In [None]:
#get file name attributes
location1Attributes = fileNameAttributes(file1)
location2Attributes = fileNameAttributes(file2)
location3Attributes = fileNameAttributes(file3)
location4Attributes = fileNameAttributes(file4)

#location1 attributes
location1Name = location1Attributes[0]
location1Start = location1Attributes[1]
location1End = location1Attributes[2]

#location2 attributes
location2Name = location2Attributes[0]
location2Start = location2Attributes[1]
location2End = location2Attributes[2]

#location3 attributes
location3Name = location3Attributes[0]
location3Start = location3Attributes[1]
location3End = location3Attributes[2]

#location4 attributes
location4Name = location4Attributes[0]
location4Start = location4Attributes[1]
location4End = location4Attributes[2]

In [None]:
#focus data set
focusData = location1
focusStart = location1Start
focusEnd = location1End

if (focusLocation.casefold() == location2Name.casefold()):
    focusData = location2
    focusStart = location2Start
    focusEnd = location2End
elif (focusLocation.casefold() == location3Attributes[0].casefold()):
    focusData = location3
    focusStart = location3Start
    focusEnd = location3End
else:
    focusData = location4
    focusStart = location4Start
    focusEnd = location4End

In [None]:
#average speed of sound
avgSoundSpeed = averageSoundSpeed(focusData, focusStart, focusEnd)
soundSpeedScatter = hv.Scatter(avgSoundSpeed)
soundSpeedScatter.opts(invert_yaxis=True, 
             width=800, height=500,
             tools = ['hover'],
             title = "Average Sound Speed from " + focusStart + " to " + focusEnd + " for "+ focusLocation.title())
soundSpeedScatter

In [None]:
#calculate 30m and local max and min
location1_30mLMM_Data = ThirtyMAndLocalMaxima(location1, location1Start, location1End)
location2_30mLMM_Data = ThirtyMAndLocalMaxima(location2, location2Start, location2End)
location3_30mLMM_Data = ThirtyMAndLocalMaxima(location3, location3Start, location3End)
location4_30mLMM_Data = ThirtyMAndLocalMaxima(location4, location4Start, location4End)

In [None]:
#remove flat data portions
location1_truncated = excludeFlat(location1_30mLMM_Data)
location2_truncated = excludeFlat(location2_30mLMM_Data)
location3_truncated = excludeFlat(location3_30mLMM_Data)
location4_truncated = excludeFlat(location4_30mLMM_Data)

In [None]:
#create scatter plots
#Multiple plots reference: https://justinbois.github.io/bootcamp/2020/lessons/l27_holoviews.html
#Another reference: http://holoviews.org/user_guide/Composing_Elements.html

location1_Scatter = hv.Scatter(location1_truncated, label = location1Name)
location2_Scatter = hv.Scatter(location2_truncated, label = location2Name)
location3_Scatter = hv.Scatter(location3_truncated, label = location3Name)
location4_Scatter = hv.Scatter(location4_truncated, label = location4Name)

In [None]:
ThirtyM_And_LM_Plots = location1_Scatter * location2_Scatter * location3_Scatter * location4_Scatter
ThirtyM_And_LM_Plots.opts(
    invert_yaxis=True, 
    width=800, height=500,
    tools = ['hover'],
    title = "Sound Speed Local Maxima at Depth"
)

In [None]:
#sound channel dimensions
#location1
location1_soundChannelDim = soundChannelDim(location1, location1Start, location1End)
location1_soundChannelLength = location1_soundChannelDim[["Dates", "Sound Channel Length"]]
location1_soundChannelWidth = location1_soundChannelDim[["Dates", "Sound Channel Width"]]

#location2
location2_soundChannelDim = soundChannelDim(location2, location2Start, location2End)
location2_soundChannelLength = location2_soundChannelDim[["Dates", "Sound Channel Length"]]
location2_soundChannelWidth = location2_soundChannelDim[["Dates", "Sound Channel Width"]]

#location3
location3_soundChannelDim = soundChannelDim(location3, location3Start, location3End)
location3_soundChannelLength = location3_soundChannelDim[["Dates", "Sound Channel Length"]]
location3_soundChannelWidth = location3_soundChannelDim[["Dates", "Sound Channel Width"]]

#location4
location4_soundChannelDim = soundChannelDim(location4, location4Start, location4End)
location4_soundChannelLength = location4_soundChannelDim[["Dates", "Sound Channel Length"]]
location4_soundChannelWidth = location4_soundChannelDim[["Dates", "Sound Channel Width"]]

In [None]:
location1_Scatter_L = hv.Scatter(location1_soundChannelLength, label = location1Name)
location2_Scatter_L = hv.Scatter(location2_soundChannelLength, label = location2Name)
location3_Scatter_L = hv.Scatter(location3_soundChannelLength, label = location3Name)
location4_Scatter_L = hv.Scatter(location4_soundChannelLength, label = location4Name)

soundChannelLengthPlots = location1_Scatter_L * location2_Scatter_L * location3_Scatter_L * location4_Scatter_L
soundChannelLengthPlots.opts(
                        invert_yaxis=True,
                        width=800, height=500,
                        tools = ['hover'],
                        title = "Sound Channel Length")

In [None]:
location1_Scatter_W = hv.Scatter(location1_soundChannelWidth, label = location1Name)
location2_Scatter_W = hv.Scatter(location2_soundChannelWidth, label = location2Name)
location3_Scatter_W = hv.Scatter(location3_soundChannelWidth, label = location3Name)
location4_Scatter_W = hv.Scatter(location4_soundChannelWidth, label = location4Name)

soundChannelWidthPlots = location1_Scatter_W * location2_Scatter_W * location3_Scatter_W * location4_Scatter_W
soundChannelWidthPlots.opts(
                        invert_yaxis=True,
                        width=800, height=500,
                        tools = ['hover'],
                        title = "Sound Channel Width")