In [None]:
from importlib import reload
from pathlib import Path
import os
import numpy as np
from scipy.signal import medfilt, medfilt2d, get_window
from scipy.ndimage import median_filter

from obspy.clients.filesystem.sds import Client as SDSClient
from obspy.clients.fdsn import RoutingClient
from obspy.core import UTCDateTime as UTC, read

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

from data_quality_control import sds_db, base, util, analysis
from data_quality_control.analysis import Analyzer

In [None]:
# Only for display in documentation!
from IPython.core.display import display, HTML 

In [None]:
median_filter()

In [None]:
medfilt2d()

## Define parameters

In [None]:
# NSLC
nslc_code = "GR.BFO..BHZ"

overlap = 60 #3600
fmin, fmax = (4, 14)
nperseg = 2048
winlen_in_s = 3600
proclen = 24*3600

outdir = 'output'

sds_root = os.path.abspath('../../sample_sds/')
inventory_routing_type = "eida-routing"

In [None]:
startdate = UTC("2020-12-24")
enddate = UTC("2021-01-15")

In [None]:
P = base.BaseProcessedData().from_file("output/GR.BFO..BHZ_2020.hdf5")

In [None]:
P

First, we initialize an Analyzer by setting the path to the 
HDF5-data (`outdir`), a station code and the `fileunit`, i.e.
which name format the HDF5-files have, that we want to analyze.

The initial object does not have any data yet.

In [None]:
reload(analysis)
reload(base)
#reload(util)
lyza = analysis.Analyzer(outdir, nslc_code,
                            fileunit="year")

In [None]:
print(lyza)

We can inquire which files and time ranges are available for
the given code, location and fileunit.

In [None]:
files = lyza.get_available_datafiles()
print(files)

In [None]:
lyza.get_available_timerange()

## View data for time range

In order to view the data, the amplitudes and spectra are treated differently.
Amplitudes are loaded as they are in the HDF5-file. Thus, we obtain an array of
shape `N_proclen x N_winlen`. The sample data covers 16 days and we used 
`proclen_seconds = 86400`, i.e. 1 day, so the first dimension is 16. 
The `winlen_seconds = 3600`, thus 24 windows per day which gives the second dimension
of the amplitude array.

For the spectra, there are two options:
1. load all spectra within a specific time range
2. load spectra for selected times given as list

The spectra are stored in the HDF5-files as 3D arrays. The
first two dimensions correspond to those of the amplitude
array; the third dimension is the frequency axis.
In contrast, the Analyzer flattens the first to dimensions,
i.e. the resulting array is basically a spectrogram, thus
a sequence of spectra over time.

This allows to select spectra only for specific times.
For example, one may want to select only those hours
where the wind speed is in a specific range.

In [None]:
startdate = UTC("2020-12-25")
enddate = UTC("2021-01-15")

In [None]:
lyza.get_data(startdate, enddate)

In [None]:
lyza

In [None]:
reload(analysis)
reload(base)
#reload(util)
lyza = analysis.Analyzer(outdir, nslc_code,
                            fileunit="year")

lyza.get_data(startdate, enddate)

In [None]:
lyza.trim(UTC("2020-12-30"), UTC("2021-01-06"))

In [None]:
lyza

In [None]:
lyza.trim(startdate, enddate)

In [None]:
lyza.plot_spectrogram();

In [None]:
lyza.trim(startdate, enddate, fill_value=np.nan)

In [None]:
lyza.plot_spectrogram();

In [None]:
def get_overlapping_frames(x, inc, winlen_samples):
    
    nf = (x.size-winlen_samples) // inc +1
    #print(nf)
    
    indf = inc * np.arange(nf)
    #print(indf)
    #print(np.expand_dims(indf, 1) + 
    #      np.expand_dims(np.arange(winlen_samples), 0))
    f = x[np.expand_dims(indf, 1) + 
          np.expand_dims(np.arange(winlen_samples), 0)]

    return f

In [None]:
psds = lyza.psds.copy()
psds.shape

In [None]:
x = psds[:24,0]
x.shape

In [None]:
f = get_overlapping_frames(x, 2, 6)
f.shape

In [None]:
smoothed = []
for psd in psds.T:
    #print(psd.shape)
    f = get_overlapping_frames(psd, 3, 6)
    x = np.nanmedian(f, axis=1)
    smoothed.append(x)
smoothed = np.array(smoothed).T

In [None]:
smoothed.shape

In [None]:
inc = 3
winlen = 6
smoothed = np.array([np.nanmedian(get_overlapping_frames(x, inc, winlen),axis=1) for x in psds.T]).T

smoothed.shape

In [None]:
plt.imshow(np.log10(smoothed.T), aspect="auto")

In [None]:
plt.imshow(np.log10(psds.T), aspect="auto")

class Interpolator():
    def __init__(self, datadir, nslc_code, fileunit="year"):
        self.stationcode = nslc_code
        
        self.datadir = datadir
        self.fileunit  = fileunit
        self.iter_time = util.TIME_ITERATORS[self.fileunit]
        
        # Get fmtstr of data files
        fmtstr_base, sep, fmtstr_time = util.FNAME_FMTS[self.fileunit].rpartition("_")
        self.fmtstr = (fmtstr_base.format(
                        outdir=self.datadir, **self.nslc_as_dict()) + 
                        sep + fmtstr_time)
        #self.logger = logging.getLogger(module_logger.name+
        #                    '.'+"Analyzer")
        #self.logger.setLevel(logging.DEBUG)
     
    def nslc_as_dict(self):
        d = {k: v for k, v in zip(["network", "station", "location", "channel"], 
                                  self.stationcode.split("."))}
        return d
        
    def interpolate(self):
        files =  [str(f) for f in 
                Path(self.datadir).glob(self.stationcode+"_"+util.FNAME_WILDCARD[self.fileunit]+".hdf5")]
        files.sort()
        
        for f in files:
            data = base.BaseProcessedData().from_file(f)
            print(f)
            print(data)
            print()

In [None]:
reload(analysis)
reload(base)
reload(util)
class Interpolator(analysis.Analyzer):
    def __init__(self, datadir, nslc_code, fileunit="year"):
        super().__init__(datadir, nslc_code, fileunit)
    
    
    def _get_SECONDS_PER_WINDOW(self, TSTA, TEND):
        """
        Read first file in list to get window size in seconds.
        """
        self.logger.debug("\n\nLooking for window size")
        for tsta, tend in self.iter_time(TSTA, TEND):
            self.get_data(tsta, tend)
            self.logger.debug("Time range to get window size: {} - {}".format(tsta, tend))
            self.logger.info("Expecting window size is {:g}s".format(self.seconds_per_window))
            self.SECONDS_PER_WINDOW = self.seconds_per_window
            break
        
    #@property
    def _set_check_SECONDS_PER_WINDOW(self):
        if not hasattr(self, "SECONDS_PER_WINDOW"):
            self.logger.info("Expecting window size = {:g}s".format(
                self.seconds_per_window))
            self.SECONDS_PER_WINDOW = self.seconds_per_window
        elif self.SECONDS_PER_WINDOW != self.seconds_per_window:
            msg = "Window size changed"
            self.logger.error(msg)
            raise RuntimeError(msg)

    def interpolate(self, win, inc=1):
        TSTA, TEND = self.get_available_timerange()
        
        self._get_SECONDS_PER_WINDOW(TSTA, TEND)
        
        self.logger.debug("\n\nStarting interpolation\n")
        for tsta, tend in self.iter_time(TSTA, TEND):
            
            tsta = tsta
            tend = tend + 24*3600 + (win-inc)*self.SECONDS_PER_WINDOW 
            self.logger.info("Interpolating {} - {}".format( tsta, tend))
            self.get_data(tsta, tend)
            self.trim(tsta, tend, fill_value=np.nan)
            self._set_check_SECONDS_PER_WINDOW()
            self.logger.debug("{} - {}".format(self.startdate, self.enddate))
            self.logger.debug("\n")
            #self.fill_days()
            
            

In [None]:
reload(analysis)
#reload(Analyzer)
reload(base)
polly = Interpolator(outdir, nslc_code )

In [None]:
polly.interpolate(6, 3)

In [None]:
polly

In [None]:
polly.__dict__.keys()

In [None]:
polly.SECONDS_PER_WINDOW

In [None]:
fm = np.nanmedian(f, axis=1)

# Figuring out how my get_overlapping_frames() work

In [None]:
sdsclient = Client(sds_root)

In [None]:
endtime = startdate+24*3600
st = sdsclient.get_waveforms(*nslc_code.split("."), startdate, endtime)
st = st.trim(startdate-60, endtime+60, pad=True, fill_value=0)
tr = st[0]
x = np.arange(1,24+1).repeat(72000)
print(x.shape)
print(tr.stats.npts)
tr.data[60*20+1:-60*20] = x

In [None]:
tr.data.size % 24 # 74400

In [None]:
tr.plot(endtime=startdate+600);

In [None]:
procparams = base.ProcessingParameters()

In [None]:
procparams

In [None]:
nf = int(procparams.proclen_seconds/
        procparams.winlen_seconds)

In [None]:
nf

In [None]:
f, taps = get_overlapping_tapered_frames(tr, startdate, 24, int(3600*20), 60*20)

In [None]:
f.shape

In [None]:
plt.imshow(f, aspect="auto")

In [None]:
#plt.plot(f[10,60*20+1:-60*20])
plt.plot(f[11,:])
#plt.xlim(-1, 10)

In [None]:
3600*20

In [None]:
6*2*100*2

In [None]:
x = np.arange(24).repeat(74400)

In [None]:
x

In [None]:
def get_overlapping_tapered_frames(tr, starttime, nf, winlen_samples,
                           taper_samples):
    sr = tr.stats.sampling_rate
    
    # Samples in window including tapers
    nwin = int(winlen_samples + 2*taper_samples)
    
    # Total number of samples of trace to process
    proclen_samples = int(nf * winlen_samples + 2*taper_samples)
    
    # Cut out the needed data
    x = tr.slice(starttime-taper_samples/sr).data[:proclen_samples]
    
    # Ratio of tapers to total window size
    a =  2*taper_samples / nwin
    win = get_window(('tukey', a), nwin, fftbins=False)
    
    # From obspy.signal.enframe()
    #nx = len(x)
    #nwin = len(win)
    if (len(win) == 1):
        length = win
    else:
        length = nwin
    #nf = int(np.fix((nx - length + winlen_samples) // winlen_samples))
    # f = np.zeros((nf, length))
    indf = winlen_samples * np.arange(nf)
    f = x[np.expand_dims(indf, 1) + 
          np.expand_dims(np.arange(length), 0)]
    print(indf)
    print(length)
    print(np.expand_dims(indf, 1) + 
          np.expand_dims(np.arange(length), 0))
    #f = f * win
    #f[np.any(np.isnan(f), axis=1),:] = np.nan
    #no_win, _ = f.shape
    return f, taper_samples

In [None]:
endtime = startdate+24*3600
st = sdsclient.get_waveforms(*nslc_code.split("."), startdate, endtime)
#st = st.trim(startdate-60, endtime+60, pad=True, fill_value=0)
tr = st[0]

sr = 0.01
x = np.arange(1,24+1).repeat(int(3600*sr))
#x = np.insert()
print(x.shape)
print(x.size/24)

tr.data = x
tr.stats.sampling_rate = sr
tr.stats.starttime = startdate
tr = tr.trim(startdate-600, endtime+600, pad=True, fill_value=0)
print(tr.stats.npts)

In [None]:
tr.stats.starttime, tr.stats.endtime

In [None]:
tr.plot();

In [None]:
f, taps = get_overlapping_tapered_frames(tr, startdate, 24, int(3600*sr), 600*sr)

In [None]:
f.shape

In [None]:
plt.imshow(f, aspect="auto")

In [None]:
f[1,:]

In [None]:
f[0, 6:6+36]

In [None]:
x2 = np.append(x, np.zeros(10))

In [None]:
f1 = get_overlapping_frames(x2, 18, 36 )

In [None]:
f1.shape

In [None]:
plt.imshow(f1, aspect="auto")

In [None]:
plt.plot(f1[-1,:])

In [None]:
UTC()-11*3600-24*60 + 3600

In [None]:
UTC()