# Spectral Density

## Load Libraries

In [1]:
import os
import pickle
import numpy as np

from pandas import DataFrame, concat, Series, date_range, read_csv, read_pickle

In [2]:
# from functions.get_hist_loglog import __get_hist_loglog
from functions.replace_noise_psd_with_nan import __replace_noisy_psds_with_nan
from functions.cut_frequencies_array import __cut_frequencies_array
from functions.get_median_psd import __get_median_psd
from functions.get_percentiles import __get_percentiles

In [3]:
if os.uname().nodename == 'lighthouse':
    root_path = '/home/andbro/'
    data_path = '/home/andbro/kilauea-data/'
    archive_path = '/home/andbro/freenas/'
    bay_path = '/home/andbro/bay200/'
elif os.uname().nodename == 'kilauea':
    root_path = '/home/brotzer/'
    data_path = '/import/kilauea-data/'
    archive_path = '/import/freenas-ffb-01-data/'
    bay_path = '/bay200/'
elif os.uname().nodename == 'lin-ffb-01':
    root_path = '/home/brotzer/'
    data_path = '/import/kilauea-data/'
    archive_path = '/import/freenas-ffb-01-data/'
    bay_path = '/bay200/'

## Configurations

In [17]:
config = {}

config['stations'] = ['FFB1', 'FFB2', 'FFB3']
config['stations'] = ['RY01', 'RY02', 'RY03', 'RY04', 'RY05', 'RY06', 'RY07', 'RY08', 'RY09']

config['d1'], config['d2'] = "2023-03-10", "2023-03-17"

config['path_to_data'] = archive_path+f"ModalAnalysis/data/PSDS/"

config['path_to_outdata'] = archive_path+f"ModalAnalysis/data/PSDS_median/"

config['outpath_figures'] = data_path+f"modal_analysis/figures/"

config['frequency_limits'] = 1e-3, 1e1

config['plower'], config['pupper'] = 2.5, 97.5


## Methods

In [12]:
def __filter_psds(psds, thresholds):

    from numpy import mean, array

    psds_filtered = []
        ## filter mean psds values
#         m_psd = mean(psd)
#         if m_psd > thresholds[0] and m_psd < thresholds[1]:
#             psds_filtered.append(psd)

    ## filter for periods larger than 20 seconds
    if mean(psd[0:63]) < thresholds[0]:
        psds_filtered.append(psd)

    print(f" -> removed {len(psds)- len(psds_filtered)} of {len(psds)} psds due to thresholds: {thresholds[0]} & {thresholds[1]}")
    return array(psds_filtered)

In [13]:
def __read_files2(seed, tbeg, tend):

    from numpy import array
    from pandas import read_pickle

    net, sta, loc, cha = seed.split('.')

    psds_medians_out, times_out = [], []

    dat, dates = [], []
    for jj, day in enumerate(date_range(tbeg, tend)):

        day = str(day).split(" ")[0].replace("-", "")

        filename = f"{sta}/{cha}/{day[:4]}_{sta}_{cha[-1]}_3600_{day}_hourly.pkl"

        # skip if file does not exist
        if not os.path.isfile(config['path_to_data']+filename):
            print(f" -> skipping {filename} ...")
            continue

        print(filename)
        try:
            out = read_pickle(config['path_to_data']+filename)

            ff1, dat1 = out['frequencies'], out['psd']

        except Exception as e:
            print(e)
            print(f" -> {day}: no data found")
            continue

        for _k, _psd in enumerate(dat1):
            dat.append(_psd)
            dates.append(f"{day}_{str(_k).rjust(2, '0')}")

    dat = array(dat)

    return dat, ff1

In [14]:
def __read_files(seed, tbeg, tend):

    from numpy import array

    net, sta, loc, cha = seed.split('.')

    psds_medians_out, times_out = [], []

    dat, dates = [], []
    for jj, day in enumerate(date_range(tbeg, tend)):

        day = str(day).split(" ")[0].replace("-", "")

        filename = f"{sta}/{cha}/{day[:4]}_{sta}_{cha}_{day}_hourly.pkl"

        # skip if file does not exist
        if not os.path.isfile(config['path_to_data']+filename):
            print(f" -> skipping {filename} ...")
            continue

        try:
            out = read_pickle(config['path_to_data']+filename)

            ff1, dat1 = out['frequencies'], out['psd']

        except Exception as e:
            print(e)
            print(f" -> {day}: no data found")
            continue

        for _k, _psd in enumerate(dat1):
            dat.append(_psd)
            dates.append(f"{day}_{str(_k).rjust(2, '0')}")

    dat = array(dat)

    return dat, ff1

## RUN for all files 

In [15]:
tmp_ff = read_pickle("./ff_template.pkl")

In [18]:
plower, pupper = config['plower'], config['pupper']

for sta in config['stations']:

    print(f"-> {sta} ... ")

    out_df = DataFrame()

    for c in ["Z", "N", "E"]:
        print(f" -> {c} ...")

        try:
            psds, ff = __read_files(f"XX.{sta}..HH{c}", config['d1'], config['d2'])
        except:
            print(f" -> failed to load {c} data!")

        print(len(ff), np.shape(psds))
        if len(ff) != np.shape(psds)[1]:
            print(len(ff), "!=", np.shape(psds)[1])
            ff = tmp_ff

        try:
            out_df['frequencies'] = ff

            out_df[f'psds_median_{c}'] = __get_median_psd(psds)
            out_df[f'perc_low_{c}'], out_df[f'perc_high_{c}'] = __get_percentiles(psds, p_low=plower, p_high=pupper)
        except:
            print(f" -> processing failed!")
            pass

    out_df.to_pickle(config['path_to_outdata']+f"{sta}_psd_stats.pkl")
    print(f" -> stored: {sta}_psd_stats.pkl")


-> RY01 ... 
 -> Z ...
720002 (192, 720002)
 -> N ...
720002 (192, 720002)
 -> E ...
720002 (192, 720002)
 -> stored: RY01_psd_stats.pkl


In [10]:
# df = DataFrame()
# df['ff'] = ff
# df.to_pickle("./ff_template.pkl")