In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import header
paths = header.setup_environment()
import platform
if platform.system()=='Darwin':
    INPUTDIR = '/Users/thompsong/Dropbox/PROFESSIONAL/RESEARCH/3_Project_Documents/NASAprojects/201602_Rocket_Seismology/DATA/2022_DATA/WellData/MERGED'
else:
    INPUTDIR = os.path.join(paths['new_data'], '03_merge_inventories')
import libWellData as LLE
transducersDF = LLE.get_transducers_dataframe(paths)
display(transducersDF)

# Load in the summary of all files - raw 4 hourly data 
dfall2 = pd.read_csv(os.path.join(INPUTDIR, 'all2.csv'))
#display(dfall2['subdir'])

LLE.qc_dataframe(dfall2)

# Split in baro, 20Hz, 100Hz
all_dataframes = LLE.split_by_subdir(dfall2, verbose=True)
#print(all_dataframes)


In [None]:
'''
print(dfdata)
for k in dfdata.keys():
    print(k)
    display(dfdata[k])
'''

In [None]:
'''
# 10.2: Correct analog and digital air column transducers for calibration, elevation above water, and apply DC shift (no temperature correction)
dcshifts2 = {'AirPressureShallow':0.0, 'AirPressureDeep':-0.003058, '1226420':-14.423795, '1226429':-14.556290}
aircolumns = ['AirPressureShallow', 'AirPressureDeep', '1226420', '1226429']
dfbaro_dcshifted = LLE.correctBarometricData(dfdata['baro'], aircolumns[:2], transducersDF, temperatureCorrect=False, heightCorrect=True, dcshifts=dcshifts2)
df100hz_dcshifted = LLE.correctBarometricData(dfdata['100hz'], aircolumns[2:], transducersDF, temperatureCorrect=False, heightCorrect=True, dcshifts=dcshifts2)

# 10.4: Correct digital water column transducers for calibration and barometric pressure
correctedAllSensorsPSI = LLE.rawdf2psidf(df100hz_dcshifted, transducersDF, temperatureCorrect=False, airpressureCorrect=True, depthCorrect=False)
watercolumns = ['1226419', '1226421', '2151691', '2149882']
display(correctedAllSensorsPSI[watercolumns])

# 10.5: plot PSI
correctedAllSensorsPSI.plot(x='datetime', y=aircolumns[2:]+watercolumns, style='-', ylabel='PSI')

# 10.6: convert to water levels in meters
correctedAllSensorsMeters = LLE.psi2meters(correctedAllSensorsPSI, watercolumns)
correctedAllSensorsMeters.plot(x='datetime', y=watercolumns, style='-', ylabel='Meters')

# 10.7: convert to water levels in meters relative to the set depth measured by Steve Krupa
relativeAllSensorsMeters = LLE.relative_to_set_depth(correctedAllSensorsMeters, transducersDF, watercolumns)
relativeAllSensorsMeters.plot(x='datetime', y=watercolumns, style='-', ylabel='Meters')

# 10.8: estimate correct set depths from median of each, and shift by this amount
estimatedAllSensorsMeters = LLE.estimate_sensor_depths(correctedAllSensorsMeters, watercolumns)
estimatedAllSensorsMeters.plot(x='datetime', y=watercolumns, style='-', ylabel='Meters')   
'''

In [None]:

# RSAM simulation
import obspy

# Create an ObsPy Trace from the DataFrame (we assume 'amplitude' is the seismic data)
trace = Trace()
trace.data = df['amplitude'].values  # Assign the amplitude as the seismic data
trace.stats.station = "S01"
trace.stats.network = "NET"
trace.stats.sampling_rate = sampling_rate
trace.stats.starttime = obspy.UTCDateTime(df['time'].iloc[0])

# Step 2: Create a Stream object containing the Trace
stream = Stream(traces=[trace])

# Step 3: Display some information
print(stream)
print("Trace Data:", stream[0].data[:10])  # Print the first 10 samples of the trace data
print("Trace Stats:", stream[0].stats)

# Plot the Stream data
stream.plot()



import numpy as np
def simulate_rsam(thisdf, watercolumns, sample_interval_seconds=1):
# Step 1: Handle NaN values
# We can forward fill the NaN values
tr.data = np.nan_to_num(tr.data, nan=np.nanmean(tr.data))  # Replace NaNs with the mean value of the trace

# Alternatively, you could use interpolation:
# tr.interpolate()  # Uncomment to use linear interpolation

# Step 2: Downsample (Resample the Trace)
# Let's downsample to 10 Hz
new_sampling_rate = 10
tr_resampled = tr.resample(sampling_rate=new_sampling_rate)
    for col in thisdf.columns:
        if col in watercolumns:
            print(col)
            # Here we replace NaN with 0, perform a linear detrend, apply a 20-s highpass filter, and then compute the absolute value, and put it back into the dataframe
            tr = obspy.Trace(data=thisdf[col].to_numpy()) # or use .values instead of to_numpy()
            tr.id = f'{col[0:2]}.{col[2:7]}.{col[7:9]}.{col[9:]}
            #nan_positions = np.where(np.isnan(tr.data))
            tr.data = np.nan_to_num(tr.data, nan=0.0)
            tr.detrend('linear')
            tr.filter('highpass', freq=0.05)
            #tr.data[nan_positions] = np.nan # put the NaNs back?
            thisdf[col] = abs(tr.data)
    #print(thisdf.columns)
    #thisdf.reset_index(inplace=True)
    display(thisdf)
    # Here we resample the data
    resampleddf = thisdf.resample(f'{sample_interval}s', on='datetime').median()
    print(resampleddf)
    #for col in resampleddf.columns:
    #    resampleddf[col] = resampleddf[col]-resampleddf[col][0]
    resampleddf.plot(kind='line')
    plt.show()

    st = obspy.Stream()
    for col in resampleddf:
        tr2 = obspy.Trace(data=resampleddf[col].to_numpy())
        tr2.data = np.nan_to_num(tr2.data, nan=0.0)
        tr2.id = f'{col[0:2]}.{col[2:7]}.{col[7:9]}.{col[9:]}' # create SEED ID
        tr2.stats.delta = float(sample_interval)
        tr2.stats.starttime=obspy.UTCDateTime(resampleddf.index[0])
        st.append(tr2)
    st.plot(equal_scale=False);
    print(st)
    return st
rsamst = simulate_rsam(estimatedAllSensorsMeters, watercolumns)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
def detrend_dataframe(dforiginal, valuecolumns=[]):
    if len(valuecolumns)>0:
        df = dforiginal.copy()
        for col in valuecolumns:
            if col in df.columns:

                # Step 1: Handle missing values (e.g., forward-fill)
                df[col] = df[col].fillna(method='ffill')  # Forward fill

                # Step 2: Detrend the time series using linear regression

                # Create an array of time (numerical index)
                time = np.arange(len(df)).reshape(-1, 1)

                # Fit a linear regression model to the data
                model = LinearRegression()
                model.fit(time, df['value'])

                # Predict the trend
                trend = model.predict(time)

                # Detrended data: original data - predicted trend
                df[col] = df[col] - trend
        return df
    else:
        raise IOError('No columns given')

import pandas as pd
import numpy as np
import scipy.signal as signal
import matplotlib.pyplot as plt

def high_pass_dataframe(dforiginal, valuecolumns, freqmin=0.2, sampling_rate=100):
    if len(valuecolumns)>0:
        df = dforiginal.copy()
        for col in valuecolumns:
            if col in df.columns:

                # Step 1: Handle missing values (e.g., forward-fill)
                df[col] = df[col].fillna(method='ffill')  # Forward fill

                # Step 2: Filter
                df[col] = high_pass_filter(df[col], freqmin=freqmin, sampling_rate=sampling_rate)

        return df
    else:
        raise IOError('No columns given')



# Define the high-pass filter
def high_pass_filter(data, cutoff_freq, sampling_rate, order=4):
    # Normalize the cutoff frequency
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff_freq / nyquist
    
    # Design a Butterworth high-pass filter
    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
    
    # Apply the filter using filtfilt (zero-phase filtering)
    filtered_data = signal.filtfilt(b, a, data)
    return filtered_data



In [None]:
'''
#st.filter('bandpass', freqmin=10, freqmax=20)  # optional prefiltering
print(st)
from obspy.signal.trigger import coincidence_trigger

st2 = st.copy()

trigs = coincidence_trigger("recstalta", 2, 1, st2, int(len(st2)/2), sta=sample_interval*5, lta=sample_interval*100)
for thistrig in trigs:
    display(thistrig)
    noisewindow = st2.copy().trim(starttime=thistrig['time']-60, endtime=thistrig['time']-10)
    for tr in noisewindow:
        if not np.all(tr.data):
            thistrig['trace_ids'].remove(tr.id)
    if len(thistrig['trace_ids'])>len(st2)/2:
        st3 = st2.copy().trim(starttime=thistrig['time']-60, endtime=thistrig['time']+60+thistrig['duration'])
        st3.plot(equal_scale=False);
'''