In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>")) 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr"
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from obspy import read
from msnoise.api import *
from wxs_dvv import *
import os
import numpy as np
import tqdm
plt.rcParams['figure.figsize'] = (16,8)

# Compute dv/v using the wavelet method

Following Shujuan's presentation we use a pre-processed dataset of cross-correlations functions (CCFs) to exeriment with the wavelet method.
Here, we will play with single station (SC) products of station "CI.LJR" in the Tejon Pass, between the San Emig-
dio and Tehachapi Mountains, California.
The station was selected as it was used in [Clements and Denolle (2023, JGR)](https://doi.org/10.1093/gji/ggz495) who used it compare the dv/v with variations of groundwater levels in Southern California . We look at the 2011 to 2017 period, as it includes the 2011-2016 severe drought period that significantly impacted the ground water level. A fitting subject for thhe theme of this workshop.

![Figure 3 of Clements and Denolle (2023)](Figures/Clement2023.jpg)

- Here we will try and reproduce the dv/v curve for that station, first using similar parameters (Frequency range, time lags), and then individually explore other parametrizations. Here we are using the wavelet method, meaning that we can easily adjust the frequeny range of interest after processing the CCFs

- Because the authors made a very good effort at making their work reproducible (Yay!) we can easily compare the resulting time series to theirs.

- Depending on the horse-power under the hood of your laptop, the processing of the full period (2011-2017) can take a while. Adjust the processing to your taste (e.i. time period or components configuration). Use this processing time efficiently by taking a look at the CCFs through MSNoise, either using the command line or a new/seperate Jupyter notebook (Keep an eye on yesterday's notebook!).

- If you quickly go through the full practical, go back to yesterday afternoon's dataset and play with wavelet method to measure the dv/v in different filters with a high spectral resolution.

## Getting parameters from the msnoise DB

When pluging into an MSNoise workflow, you can always use the parameters as they are in the database, or modify them below to explore the results (e.i. Start, end freqmin, freqmax, mov_stack,...).

In [None]:
db = connect()
params = get_params(db)
filterid=1 #Make sure this is the proper frequency band
fs = params.cc_sampling_rate
lag_min = params.dtt_minlag
lag_max = params.dtt_minlag+params.dtt_width

# We'll start with a broad-ish frequency range, and play with it later
freqmin=0.1
freqmax=4.0

# Ask me about this variable if your curious!
subdaily=False

saveit=True
plot = False


stations_to_analyse = ["%s.%s.%s" % (sta.net, sta.sta, sta.locs()[0]) for sta in get_stations(db, all=False)]
pairs = ["{}:{}".format(sta,sta) for sta in stations_to_analyse]

start = params.startdate
end = params.enddate

comps = ["EZ", "NZ", "EN"]
mov_stack = 5

# Obtain a list of dates between ``start_date`` and ``enddate``
_, _, datelist = build_movstack_datelist(db)
taxis = get_t_axis(db)
mov_stack = 1

# Get the results for two station, filter id=1, ZZ component, mov_stack=1 and the results as a 2D array:
n, ccfs = get_results(db, pairs[0].split(":")[0], pairs[0].split(":")[0], filterid, "EZ", datelist,
                      1, format="matrix", params=params)
# Convert to a pandas DataFrame object for convenience, and drop empty rows:
df = pd.DataFrame(ccfs, index=pd.DatetimeIndex(datelist), columns=taxis)
df = df.dropna()

# Define the 99% percentile of the data, for visualisation purposes:
clim = df.mean(axis="index").quantile(0.999)

## What have we got?

In [None]:
fig, ax = plt.subplots()
plt.pcolormesh(df.columns, df.index.to_pydatetime(), df.values,
               vmin=-clim, vmax=clim, rasterized=True, cmap="RdBu")
plt.colorbar()
plt.title("Interferogram")
plt.xlabel("Lag Time (s)")
plt.ylim(df.index[0],df.index[-1])
plt.xlim(-20, 20)
plt.subplots_adjust(left=0.15)

# Starting simple

- First we are only looking at one trace and the reference.

- You were only given the daily CCFs, so you will need to stack them before they can be used.

- This is the occasion for you to add other mov_stacks to the configuration if you want (What about 90 days?).

In [None]:
! msnoise reset STACK -a
! msnoise cc stack -r
! msnoise reset STACK
! msnoise cc stack -m
! msnoise info -j

Let's look at one day first

Here we are reading the CCF files for the reference  and the first day analysed
we use

In [None]:
date = pd.date_range(start, end,freq="D")[0]# chose a day to plot, this just picks the first one but play with others!
pair = pairs[0]
sta1 = pair.split(":")[0]
sta2 = sta1
comp = "NZ"
ref_file = "STACKS/{:02d}/REF/{}/{}_{}.MSEED".format(filterid,
                                                  comp,
                                                  sta1,
                                                  sta2)

fn2 = "STACKS/{:02d}/{:03d}_DAYS/{}/{}_{}/{}.MSEED".format(filterid,mov_stack,comp,
                                                   sta1,
                                                   sta2,
                                                   date.date())
ref=read(ref_file)[0].data
current = read(fn2)[0].data
t = read(fn2)[0].times()-120
ori_waveform = (ref/ref.max())
new_waveform = (current/current.max())

Let's take a look at them

In [None]:
fig = plt.figure()
plt.plot(t,ori_waveform, label="Current trace")
plt.plot(t,new_waveform, label="Reference", alpha=0.7)
plt.xlim(-50,50)
plt.legend()
plt.show()

## The cross-wavelet transform

Now we use the same function described in Shujuan's paper on both traces

The inputs are the following:

    trace_ref,
    trace_current,
    fs, Sampling frequency --> extracted from the DB
    ns, smoothing parameter
    nt, smoothing parameter
    vpo, Spacing parameter between discrete scales, higher means finer resolution
    freqmin,
    freqmax,
    nptsfreq, Number of frequency points between freqmin and freqmax

In [None]:
# Cross wavelet transform
WXamp, WXspec, WXangle, Wcoh, WXdt, freqs, coi = xwt(ori_waveform, new_waveform, fs, 3, 0.25, 10, freqmin, freqmax, 100)

While most of the magic happened in the cell above, we still don't have a dv/v.
For this we will calculate a similar linear regression as the one discussed yesterday for the MWCS method. Here, however, we calculate it for every frequency point and using a weighting function rejecting data point with low coherence

In [None]:
# get the dv/v from the linear regression and the weighting function
dvv, err, wf =get_dvv(freqs, t, WXamp, Wcoh, WXdt, lag_min, lag_max, freqmin=freqmin, freqmax=freqmax)

In [None]:
#Plotting the results
do_plot(t, WXamp, WXspec, WXangle, Wcoh, WXdt, freqs, coi, wf, pair, date, comp)

Now you will find the figure in "WCT/Figure"
![XWT example](Figures/XWT.jpg)

# Kick it up a notch

Now let's run the job for all dates and all components. This might take a while...

In [None]:
for comp in comps:
    sta1 = pair.split(":")[0]
    sta2 = pair.split(":")[1]
    ref_file = "STACKS/{:02d}/REF/{}/{}_{}.MSEED".format(filterid,
                                                      comp,
                                                      sta1,
                                                      sta2)    
    dvv_list = []
    err_list = []
    data_dates = []
    cur_steps=mov_stack
    dates=pd.date_range(start, end,freq="D")
    date_select = dates
    if not os.path.isfile(ref_file):
        print("Ref file {} does not exist.".format(ref_file))
        continue
    sref=read(ref_file)
    pbar = tqdm.tqdm(date_select, desc="Extracting CCF for pair {}".format(pair))
    for date in pbar:
        ref = sref.copy()[0].data
        pbar.set_description("Working on CCF pair {} on {}".format(pair, date.date()))
        fn2 = "STACKS/{:02d}/{:03d}_DAYS/{}/{}_{}/{}.MSEED".format(filterid,cur_steps,comp,
                                                           sta1,
                                                           sta2,
                                                           date.date())     

        if not os.path.isfile(fn2):
            pbar.set_description("File {} does not exist.".format(fn2))
            continue   
        current = read(fn2)[0].data
        t = read(fn2)[0].times()-120
        ori_waveform = (ref/ref.max()) #TODO make normalisation optional
        new_waveform = (current/current.max())


        WXamp, WXspec, WXangle, Wcoh, WXdt, freqs, coi = xwt(ori_waveform, new_waveform, fs, 3, 0.25, 10, freqmin, freqmax, 400)# TODO get freq lims from db 
        dvv, err, wf =get_dvv(freqs, t, WXamp, Wcoh, WXdt, lag_min, lag_max, freqmin=freqmin, freqmax=freqmax)
        dvv_list.append(dvv)
        err_list.append(err)
        data_dates.append(date)
        if plot:
            do_plot(t, WXamp, WXspec, WXangle, Wcoh, WXdt, freqs, coi, wf, pair, date, comp)

    if len(dvv_list)>1: # Check if the list has more than 1 measurement to save it
        #inx = np.where((freqs>=freqmin) & (freqs<=freqmax)) # Select a new frequency range
        dvv_df = pd.DataFrame(columns=freqs, index=data_dates)
        err_df = pd.DataFrame(columns=freqs, index=data_dates)
        pbar = tqdm.tqdm(data_dates, desc="Formating the DataFrame")
        for i, date in enumerate(pbar):
            dvv_df.iloc[i]=dvv_list[i]
            err_df.iloc[i]=err_list[i]
            pbar.set_description("Formating the DataFrame for {}".format(date.date()))
        if saveit:
            if not os.path.isdir("WCT"):
                os.makedirs("WCT")
            dfn = "{} {}_ {} - {}.pkl".format(pair.replace(":","_"),comp,str(dvv_df.index[0].date()),str(dvv_df.index[-1].date()))
            efn = "Err {} {}_ {} - {}.pkl".format(pair.replace(":","_"),comp,str(dvv_df.index[0].date()),str(dvv_df.index[-1].date()))
            path = os.path.join("WCT",dfn)
            epath = os.path.join("WCT",efn)
            dvv_df.to_pickle(path)    # Save dvv
            err_df.to_pickle(epath)
    elif saveit:
        print("Not enough measurements to save...")
print("Done")

# Computer says no...

If you feel like your compute is taking too long to process even on one component combination, we left the solutions in the ./WTC/BAK folder.
To use them, you can copy them to ./WTC using the command below and move on to the next notebook.

In [None]:
# Works for all, including windows
import glob
import shutil
dest_dir = "WCT/"
for file in glob.glob('WCT/BAK/*'):
    print(file)
    shutil.copy(file, dest_dir)

In [None]:
# Works for Linux and MAC
!cp WCT/BAK/* WCT/

When you are happy with the processing (should be near 2200 files processed for each component combination if you did not make any compromises) move on to plotting the result of your labor