In [None]:
 filehead = os.path.join(self.datadir, self.stationcode)
        fmtstr = filehead + "_{:04d}*.hdf5"
        logger.info("Looking for data file %s" % fmtstr)
        _year = self.sdate.year
        files = []
        while _year <= self.edate.year:
            searchstr = fmtstr.format(_year)
            fnames = glob(searchstr)

            if len(fnames) > 1:
                files.append(self.select_longest(fnames))
            elif len(fnames) == 0:
                _year = _year +1
                continue
            else:
                files.append(fnames[0])

            # Get end year of latest file
            ## Remove file-ext and path
            f, ext = os.path.splitext(files[-1])
            _endtime = UTC(f.split('_')[-1])
            if _endtime.year >= self.endtime.year:
                break
            _year = _year + 1

In [None]:
from importlib import reload
import os
from datetime import timedelta

import numpy as np

import h5py

from obspy.clients.filesystem.sds import Client
from obspy.clients.fdsn import RoutingClient
from obspy.core import UTCDateTime as UTC
from obspy.signal import util

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

from data_quality_control import base, sds_db
#from data_quality_control import processing
#from data_quality_control.processing import ProcessingParameters

In [None]:
overlap = 60 #3600
fmin, fmax = (4, 14)
nperseg = 2048
winlen_in_s = 3600
proclen = 24*3600

In [None]:
from importlib import reload
import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

In [None]:
#import configparser
from datetime import timedelta, time
from glob import glob
import os.path
import numpy as np

#from scipy.signal import welch, get_window

#from obspy.signal.filter import bandpass
#from obspy.clients.filesystem.sds import Client
#from obspy.clients.fdsn import RoutingClient
from obspy.core import UTCDateTime as UTC
# from obspy.signal import util

import plotly.graph_objects as go

import h5py

from data_quality_control import base, sds_db

In [None]:
import logging
logger = logging.getLogger('processing')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)  # set level
cformatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                            datefmt='%y-%m-%d %H:%M:%S')
ch.setFormatter(cformatter)
if not logger.hasHandlers():
    logger.addHandler(ch)

# Develop base analysizer for new data layout

In [None]:
class Analyzer():
    def __init__(self, sdate, edate, 
                 datadir, stationcode, fileunit="year",
                stime="00:00", etime="23:59:59:999999"):
        self.sdate = UTC(sdate).date
        self.edate = UTC(edate).date
        self._update_time(stime, etime)
        self._update_datetime()
        self.datadir = datadir
        self.stationcode = stationcode
        self.fileunit  = fileunit
        self.iter_time = base.TIME_ITERATORS[self.fileunit]
        
        # Get fmtstr of data files
        fmtstr_base, sep, fmtstr_time = base.FNAME_FMTS[self.fileunit].rpartition("_")
        self.fmtstr = (fmtstr_base.format(
                        outdir=self.datadir, **self.nslc_as_dict()) + 
                        sep + fmtstr_time)
        
        
        self.files = self.get_filenames()
        
        
        #self.get_data()
    def nslc_as_dict(self):
        d = {k: v for k, v in zip(["network", "station", "location", "channel"], 
                                  self.stationcode.split("."))}
        return d
    
    
    def _tstr2time(self, t):
        return time(*[int(s) for s in t.split(':')])
        
            
    def _update_datetime(self):
        self.starttime = UTC("{}T{}".format(self.sdate, self.stime))
        self.endtime = UTC("{}T{}".format(self.edate, self.etime))


    def _update_time(self, stime, etime):
        if stime:
            self.stime = self._tstr2time(stime)
        if etime:
            self.etime = self._tstr2time(etime)
        self._update_datetime()
    

    def get_all_data(self, sdate=None, edate=None, 
                 datadir=None, stationcode=None):
        if sdate is not None:
            self.sdate = UTC(sdate)
        if edate is not None:
            self.edate = UTC(sdate)
        if datadir is not None:
            self.datadir = datadir
        if stationcode is not None:
            self.stationcode = stationcode
        self._update_datetime()
            
        files = sorted(self.get_filenames())
        if len(files) == 0:
            logger.warn("No files for %s in %s between %s and %s" %
                        (self.stationcode, self.datadir, 
                        self.sdate, self.edate))
            return
        
        # If we found files, a
        data = processing.BaseProcessedData()
        for file in files:
            data.extend_from_file(file)
        self.data = data
            
        
            
    def get_filenames(self):
        
        
        logger.info("Looking for data file %s" % self.fmtstr)
        files = []
        
        
        for starttime, endtime in self.iter_time(self.starttime, self.endtime):
            files.append(self.fmtstr.format(year=starttime.year, 
                                        month=starttime.month, 
                                        day=starttime.day,
                                        hour=starttime.hour))
   
        return files



    def select_longest(self, fnames):
        logger.debug("Found %s files for year." % 
                     str(len(fnames)))
        f, ext = os.path.splitext(fnames[0])
        print(f.split('_')[-1])
        edate = UTC(f.split('_')[-1])
        for _f in fnames[1:]:
            _f = os.path.split(
                    os.path.splitext(
                        _f)[0])[-1]
            _edate = UTC(_f.split('_')[-1])
            if _edate >= edate:
                edate = _edate
            if edate >= self.endtime:
                break
        print(f+ext)
        return f+ext
    
    
    def iter_files(self):
        """
        Generator that returns open h5py.File object for
        each filename in self.files.
        """
        for fname in self.files:
            logger.debug("Opening file %s" % fname)
            try:
                val = h5py.File(fname, 'r')
                # Return file object
                yield val
                # Close before proceding
                val.close()
            # Always close file before we 
            # present the error
            except:
                val.close()
                logger.error("Error while opening file %s" % fname)
                raise
                
            
    def get_data(self, 
                 stime=None, etime=None):
        
        DATA = base.BaseProcessedData()
        for fname in self.files:
            print(fname)
            DATA.extend_from_file(fname)
        
        # Cut out desired time range
        self._update_time(stime, etime)
        
        
        i = int((self.starttime - DATA.startdate) / 
                    DATA.proclen_seconds)
        j = int((self.endtime + DATA.proclen_seconds - DATA.startdate) / 
                        DATA.proclen_seconds)
        
        print(i, j)
        self.amps = DATA.amplitudes[i:j,:]

        self.psds = DATA.psds[i:j,:]
        self.freqax = DATA.frequency_axis
        self.proclen_seconds = DATA.proclen_seconds
        self.winlen_seconds = DATA.seconds_per_window
        self.nwin = self.amps.shape[1]
        return DATA
                
    def infostr(self):
        t = (self.stationcode + "<br>" +
            "{} - {}<br>".format(self.sdate, self.edate) +
            "{} - {}".format(self.stime, self.etime))
        return t


    def plot(self):
        return self.plot_amplitudes(), self.plot_psds()


    def plot_amplitudes(self, func=None):

        title = ("Hourly 75%-amplitude<br>" + 
                    self.infostr()
            )

        if func:
            z = func(self.amps)
        else:
            z = self.amps
        dateax, timeax = self._get_time_axis()
        
        # Numpy-datetime can give you a **really** hard time to convert
        # between the different increments....
        xticks = [str(timedelta(
            **{np_td2datetime_td_keywords[str(timeax.dtype)] : int(np.int64(s))})) 
               for s in timeax]
        
        char = str(timeax.dtype)[-2]
        #timeax = np.array(timeax, dtype=np.datetime64(None, char))
        print(xticks, timeax)
        fig = self._plotly_3dsurface(timeax, dateax, z,
                        name="amplitudes")

        #xticks = [str(s).split("T")[-1] for s in timeax]
        
        fig.update_layout(title=title,
            scene=dict(
                xaxis=dict(title='Time', ticktext=xticks, tickvals=timeax),
                yaxis=dict(title='Date'),
                zaxis=dict(title="m/s")
            )
        )
        return fig

        

    def plot_psds(self, func=None):

        title = ("Hourly power spectral density\n" + 
                    self.infostr()
            )
        if func:
            z = func(self.psds)
        else:
            z = self.psds
        try:
            funcname = func.__name__+"(", ")"
        except AttributeError:
            funcname = "", ""
        nwin = z.shape[1]
        z = z.reshape((z.shape[0]*z.shape[1], z.shape[2]))
        dateax, timeax = self._get_time_axis()
        datetimeax = dateax[:,None] + timeax[None,:]
        y = datetimeax.ravel()
        x = self.freqax
        fig = self._plotly_3dsurface(x, y, z, name="psds")
        fig.update_layout(title=title, 
                        scene=dict(
                            xaxis=dict(title='Frequency, Hz'),
                            yaxis=dict(title='Datetime'),
                            zaxis=dict(title="psd, {}m^2/s^2/Hz{}".format(*funcname)
                                        )
                                )
                            ),
                        
        return fig

    def _plotly_3dsurface(self,x,y, z, name=None, cmin=None, cmax=None):
        #sh_0, sh_1 = z.shape
        #y, x = np.linspace(0, sh_0-1, sh_0), np.linspace(0, sh_1-1, sh_1)
        fig = go.Figure(data=[go.Surface(z=z, x=x, y=y, name=name, 
                                            cmin=cmin, cmax=cmax)])
        fig.update_layout(autosize=True,
                          width=800, height=500,
                          scene=dict(aspectmode='manual',
                                     aspectratio=dict(x=1, y=2, z=0.5))
                          #margin=dict(l=65, r=50, b=65, t=90)
                         )
        #fig.show()
        return fig


    def _get_time_axis(self):
        sdate = np.datetime64(self.sdate, 'h')
        edate = np.datetime64(self.edate, 'h') + np.timedelta64(1, "D")
        dateax = np.arange(sdate, edate, np.timedelta64(1, "D"), 
                    dtype='datetime64')

        dur = self.etime.hour - self.stime.hour
        if dur <= 0:
            dur = dur + 24

        timeax = np.arange(dur+1, dtype=np.timedelta64) + self.stime.hour
        
        dtflag, dtinc = base.choose_datetime_inc(self.proclen_seconds)

        dateax = np.arange(self.starttime, 
                            self.endtime+self.proclen_seconds,
                            dtinc,
                        dtype='datetime64[{}]'.format(dtflag))
        
        dtflag, dtinc = base.choose_datetime_inc(self.winlen_seconds)
        dur = self.nwin*self.winlen_seconds
        timeax = np.arange(0, int(dur/base.datetime_flags[dtflag])+dtinc, 
                           dtinc, 
                           dtype='datetime64[{}]'.format(dtflag))
                                                
        timeax = np.arange(0, self.nwin*dtinc, np.timedelta64(dtinc, dtflag))
        #print(timeax)
        return dateax, timeax
        

np_td2datetime_td_keywords = {'timedelta64[{}]'.format(v[0]) : v.lower() for 
                              v in ["minutes", "hours", "Days", "Months", "Years"]}



dict(
        m = "minutes",
        h = "hours", 
        s = "seconds",
        M = "months",
    D = "days"
)                                                


In [None]:
reload(base)
# Station id
network = 'GR'
station = 'BFO'
location = ''
channel = 'HHZ'

# Data source
#datadir = '/home/lehr/sds/processed/'
datadir = "."


# Date range that you want to inspect
startdate = UTC("2020-12-28")
enddate = UTC("2021-01-05")


# Choose time range 
## Full time range (= all 24h)
stime, etime = "00:00", "23:00"

## Time range crossing midnight
#stime, etime = "19:00", "05:00"

stationcode = "{}.{}.{}.{}".format(network, station, 
                               location, channel)
analyzer = Analyzer(
    startdate, enddate, datadir, stationcode, fileunit="year")

In [None]:
data = analyzer.get_data(#["amplitudes", "psds"], 
                    # stime="00:00", etime="23:00"
                    stime=stime, etime=etime)

In [None]:
analyzer.plot_amplitudes()

In [None]:
analyzer.plot_psds(np.log)

# Develop SDSBASEAnalyzer

In [None]:
from data_quality_control import analysis

In [None]:
class SDSDataBaseAnalyzer(analysis.Analyzer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


    def get_filenames(self):
        
        filehead = os.path.join(self.datadir, self.stationcode)
        fmtstr = filehead + "_{:04d}.hdf5"
        logger.info("Looking for data file %s" % fmtstr)
        _year = self.sdate.year
        files = []
        while _year <= self.edate.year:
            
            searchstr = fmtstr.format(_year)
            fnames = glob(searchstr)
            
            if len(fnames) == 0:
                _year = _year +1
                continue
            else:
                files.append(fnames[0])
           
            _year = _year + 1

        return files
        
        
    def get_psd_at_frequency(self, freq):
        """
        Get power spectral density at specific frequency
        as time series (np.array).
        """
        self.get_filenames()
        file_iterator = self.iter_files()
        f = next(file_iterator)
        freqax = f["frequency_axis"][:]
        idx = np.where(np.isclose(freqax, freq, atol=freqax[1]/2))[0]
        ista = get_proclen_index(f, self.starttime)
        print(ista)
        DATA = f["psds"][ista:,:,idx].ravel().tolist()
        
        for f in file_iterator:
            print(f)
            data = f["psds"][:,:,idx]
            tsta = UTC(*f.attrs['starttime'])
            ista = get_proclen_index(f, self.endtime)
            DATA.extend(data.ravel().tolist())
        print(data.shape)
        nproc, nwin, _  = data.shape
        size = data.size
        ista = ista*nwin+nwin
        print(ista, size)
        DATA = DATA[:ista-size]
        return np.array(DATA)
    
    
def get_proclen_index(fin, t):
    
    tsta =  UTC(*fin.attrs['starttime'])
    
    if t < tsta:
        raise ValueError("Given time %s is before start of file at %s" %
                        (tsta, t))
    shape = fin["amplitudes"].shape
    
    proclen_seconds = fin.attrs['seconds_per_proclen']
    i = int((t - tsta) / proclen_seconds)
    return i

In [None]:
reload(base)
# Station id
network = 'GR'
station = 'BFO'
location = ''
channel = 'HHZ'

# Data source
datadir = '/home/lehr/sds/processed/'
#datadir = "."


# Date range that you want to inspect
startdate = UTC("2020-10-28")
enddate = UTC("2021-10-15")


# Choose time range 
## Full time range (= all 24h)
stime, etime = "00:00", "23:00"


stationcode = "{}.{}.{}.{}".format(network, station, 
                               location, channel)
analyzer = SDSDataBaseAnalyzer(
    startdate, enddate, datadir, stationcode, fileunit="year")

In [None]:
DATA = analyzer.get_psd_at_frequency(10)

In [None]:
len(DATA)/24

In [None]:
plt.plot(DATA, '-')
ymax = np.nanmean(DATA)# + 1*np.nanstd(DATA)
plt.ylim(0, ymax)

In [None]:
analyzer.get_data()

In [None]:
plt.imshow(np.log(analyzer.amps[:,:]))


In [None]:
plt.imshow(np.log(analyzer.psds[:,:, 10]))


In [None]:
ymax

In [None]:
y = analyzer.psds[:,:,100].ravel()
ymax = np.nanpercentile(y, 99,)
plt.plot(y, '-')
plt.ylim(0,  ymax)

In [None]:
freq = 1.0
file_iterator = analyzer.iter_files()
f = next(file_iterator)
freqax = f["frequency_axis"][:]
idx = np.where(np.isclose(freqax, freq, atol=freqax[1]/2))[0]
print(idx)
DATA = f["psds"][:,:,idx].ravel().tolist()
for f in file_iterator:
    print(f)
    data = f["psds"][:,:,idx].ravel().tolist()
    DATA.extend(data)
DATA = np.array(DATA)

In [None]:
DATA

In [None]:
freqs = np.linspace(0.05, 19, 20) 
#freqs = np.linspace(0.05, 1, 20)
freqs = np.linspace(0, 50, 26)
print(freqs)
for freq in freqs:
    idx = np.where(np.isclose(freqax, freq, atol=freqax[1]/2))
    print(idx, "\t", freq, freqax[idx])

In [None]:
freqs

In [None]:
it = analyzer.iter_files()

In [None]:
f = next(it)
f

In [None]:
for f in it:
    print(f)

In [None]:
f

In [None]:
import pandas as pd

In [None]:
x = np.tile(np.arange(5), 10).reshape((5, 10)).T
x

In [None]:
df = pd.DataFrame(x)

In [None]:
s = np.arange(5).repeat(10)
s

In [None]:
ds = pd.Series(s)

In [None]:
rm = ds.rolling(window=20, center=True).median()

In [None]:

plt.plot(s, 'o')
plt.plot(rm, 'x')

In [None]:
UTC("20-Dec-2021")

In [None]:
import datetime

In [None]:
datetime.datetime("20-Dec-2021")

In [None]:
from importlib import reload

In [None]:
from data_quality_control import util
from obspy.core import UTCDateTime as UTC

sdate = UTC("2018-05-14")
edate = UTC("2021-09-20")
for s, e in util.iter_years(sdate, edate):
    print(">>>>", s.date, e.date)

In [None]:
from data_quality_control import util
from obspy.core import UTCDateTime as UTC

sdate = UTC("2020-05-20")
edate = UTC("2021-10-25")
for s, e in util.iter_month(sdate, edate):
    print(">>>", s.date, e.date)

In [None]:
reload(util)

In [None]:
from data_quality_control import util
from obspy.core import UTCDateTime as UTC

sdate = UTC("2020-05-20")
edate = UTC("2020-10-25")
for s, e in util.iter_timeinc(sdate, edate, 
                              12*3600, 4):
    print(">>>", s, e)

In [None]:
import numpy as np

In [None]:
stime = UTC("2021-01-11")
quot, rem = np.divmod(stime.month + 1, 12)
print(quot, rem)
print("{:d}-{:02d}-01".format(stime.year+quot, rem))