In [None]:
from importlib import reload

import numpy as np

In [None]:
from glob import glob
import os
from datetime import timedelta
from obspy.core import UTCDateTime as UTC

In [None]:
import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')
#%matplotlib widget

In [None]:
from data_quality_control import processing
logger = processing.logger

In [None]:
import plotly.graph_objects as go

Load processed data

In [None]:
fname = '../data/GR.BFO..BHZ_2020-12-25_2020-12-31.hdf5'

In [None]:
reload(processing)
data = processing.ProcessedData()
data.from_file(fname)

In [None]:
print(data.amplitudes.shape)
print(data.psds.shape)

In [None]:
class MyProcessedData(processing.ProcessedData):
    def __init__(self):
        super().__init__()
        #pass
        
    def _init_data(self):
        d0 = self.starttime.date
        d1 = self.enddate
        days = timedelta(
            seconds=self.endtime-self.starttime).days+1
        
    def extend_from_file(self, file):
        # If there is no data yet, we can simply read it from file
        if (self.amplitudes is None and
            self.psds is None):
            self.from_file(file)
            return
        
        # If we already have some data, we need to insert the
        # new data at the right place and fill potential gaps.
        # Read the new data
        new = processing.ProcessedData()
        new.from_file(file)
        self.add(new)
        #print(new)
        # If data overlap we have a problem
        #if ((new.startdate < self.enddate and
        #    new.startdate > self.startdate) or
        #    (new.enddate > self.startdate and
        #     new.enddate < self.enddate)):
        #    raise RuntimeError("Overlapping data")
        
        
    def __add__(self, new):
        self.add(new)
        return self
        
    def add(self, new):
        # Get total number of days to get new array sizes
        tmin = min(self.startdate, new.startdate)
        tmax = max(self.enddate, new.enddate)
        days = timedelta(seconds=tmax-tmin).days+1
        
        # If shapes are inconsistent, we get an error here
        _amps_shp, _psds_shp = self._check_shapes(new)
        
        # Initialize data containers for merged data
        amps_shp, psds_shp = list(_amps_shp), list(_psds_shp)
        amps_shp[0] = days
        psds_shp[0] = days
        
        new_amps = np.ones(amps_shp)*np.nan
        new_psds = np.ones(psds_shp)*np.nan
        
        # Insert the data. We ignore overlaps here. New data
        # overwrites existing data if they overlap
        for d in [self, new]:
            i = timedelta(seconds=d.startdate-tmin).days
            n = len(d.amplitudes)
            new_amps[i:i+n,:] = d.amplitudes
            new_psds[i:i+n,:,:] = d.psds
            
        self.amplitudes = new_amps
        self.psds = new_psds
        self.startdate = UTC(tmin.date)
        self.enddate = UTC(tmax.date)
        self.trim_nan()
        
        
            
    def _sort_datasets(self, datalist):
        sorted_list = [datalist[0]]
        pass
    
        
    def _check_shapes(self, new):
        amps_shp = self.amplitudes.shape
        psds_shp = self.psds.shape
        if (amps_shp[-1] == new.amplitudes.shape[-1] and
            psds_shp[1:] == new.psds.shape[1:]):
            return amps_shp, psds_shp
        else:
            logger.error("Data in files have inconsistent shapes!")
            raise IOError("Data in files have inconsistent shapes!")
            #return False, False
    
    def plot(self):
        z = self.amplitudes #np.clip(AMP, None,  a_max=5.5)
        sh_0, sh_1 = z.shape
        y, x = np.linspace(0, sh_0-1, sh_0), np.linspace(0, sh_1-1, sh_1)
        fig = go.Figure(data=[go.Surface(z=z, x=x, y=y, name='amplitude', cmin=2, cmax=None)])
        fig.update_layout(title='75%-amplitude', autosize=True,
                          width=800, height=500,
                          scene=dict(aspectmode='manual', aspectratio=dict(x=1, y=2, z=0.5))
                          #margin=dict(l=65, r=50, b=65, t=90)
                         )
        fig.show()
        
    
    def __repr__(self):
        s = "Starttime: {}".format(self.startdate.date)
        e = "Enddate: {}".format(self.enddate.date)
        d = "Days: {:d}".format(self.amplitudes.shape[0])
        shp1 = "Amplitude shape = {}".format(self.amplitudes.shape)
        shp2 = "PSD shape = {}".format(self.psds.shape)
        return "\n".join([s,e,d,shp1,shp2])

In [None]:
fname = '../data/GR.BFO..BHZ_2020-12-25_2020-12-31.hdf5'

In [None]:
reload(processing)
data = MyProcessedData()
data.from_file(fname)

print(data)

In [None]:
fname2 = '../data/GR.BFO..BHZ_2021-01-01_2021-01-09.hdf5'

In [None]:
data2 = MyProcessedData()
data2.from_file(fname2)
print(data2)

In [None]:
data3 = data + data2
data3

In [None]:
data

In [None]:
data.amplitudes = np.delete(data.amplitudes, 0, 0)

In [None]:
data

In [None]:
data3

In [None]:
data.add(data2)

In [None]:
data

In [None]:
data.extend_from_file(fname2)

In [None]:
print(data.amplitudes.shape)
print(data.psds.shape)

In [None]:
a, b = data._check_shapes(data)

In [None]:
a = False

In [None]:
if a and b:
    print("Hello")
else:
    print("argh")

In [None]:
class Analyzer():
    def __init__(self, starttime, endtime, 
                 datadir, stationcode):
        self.starttime = UTC(starttime)
        self.endtime = UTC(endtime)
        self.datadir = datadir
        self.stationcode = stationcode
        self.get_data()
        
        
    def get_data(self, starttime=None, endtime=None, 
                 datadir=None, stationcode=None):
        if starttime is not None:
            self.starttime = UTC(starttime)
        if endtime is not None:
            self.endtime = UTC(starttime)
        if datadir is not None:
            self.datadir = datadir
        if stationcode is not None:
            self.stationcode = stationcode
            
        files = sorted(self.get_filenames())
        if len(files) == 0:
            logger.warn("No files for %s in %s between %s and %s" %
                        (self.stationcode, self.datadir, 
                        self.starttime, self.endtime))
            return
        
        # If we found files, a
        data = MyProcessedData()
        for file in files:
            data.extend_from_file(file)
        self.data = data
            
        
            
    def get_filenames(self):
        
        filehead = os.path.join(self.datadir, self.stationcode)
        fmtstr = filehead + "_{:04d}*.hdf5"
        logger.info("Looking for data file %s" % fmtstr)
        _year = self.starttime.year
        files = []
        while _year <= self.endtime.year:
            searchstr = fmtstr.format(_year)
            fnames = glob(searchstr)

            if len(fnames) > 1:
                files.append(self.select_longest(fnames))
            elif len(fnames) == 0:
                _year = _year +1
                continue
            else:
                files.append(fnames[0])

            # Get end year of latest file
            ## Remove file-ext and path
            f, ext = os.path.splitext(files[-1])
            _endtime = UTC(f.split('_')[-1])
            if _endtime.year >= endtime.year:
                break
            _year = _year + 1
        return files

    
    def select_longest(self, fnames):
        logger.debug("Found %s files for year." % 
                     str(len(fnames)))
        f, ext = os.path.splitext(fnames[0])
        print(f.split('_')[-1])
        edate = UTC(f.split('_')[-1])
        for _f in fnames[1:]:
            _f = os.path.split(
                    os.path.splitext(
                        _f)[0])[-1]
            _edate = UTC(_f.split('_')[-1])
            if _edate >= edate:
                edate = _edate
            if edate >= self.endtime:
                break
        print(f+ext)
        return f+ext

In [None]:
      

def get_filenames(starttime, endtime, datadir, stationcode):
    filehead = os.path.join(datadir, stationcode)
    fmtstr = filehead + "_{:04d}*.hdf5"
    print(fmtstr)
    _year = starttime.year
    files = []
    while _year <= endtime.year:
        searchstr = fmtstr.format(_year)
        #print(searchstr)
        fnames = glob(searchstr)
        #print(fnames)
        
        if len(fnames) > 1:
            files.append(select_longest(fnames, endtime))
        elif len(fnames) == 0:
            _year = _year +1
            continue
        else:
            files.append(fnames[0])
        
        # Get end year of latest file
        ## Remove file-ext and path
        f, ext = os.path.splitext(files[-1])
        _endtime = UTC(f.split('_')[-1])
        if _endtime.year >= endtime.year:
            break
        _year = _year + 1
        
    return files

def select_longest(fnames, endtime):
    logger.debug("Found %s files for year." % str(len(fnames)))
    f, ext = os.path.splitext(fnames[0])
    print(f.split('_')[-1])
    edate = UTC(f.split('_')[-1])
    for _f in fnames[1:]:
        _f = os.path.split(
                os.path.splitext(
                    _f)[0])[-1]
        _edate = UTC(_f.split('_')[-1])
        if _edate >= edate:
            edate = _edate
        if edate >= endtime:
            break
    print(f+ext)
    return f+ext

In [None]:
network = 'GR'
station = 'BFO'
location = ''
channel = 'BHZ'
#datadir = '/home/lehr/sds/processed/'
datadir = "../data/"
stationcode = "{}.{}.{}.{}".format(network, station, 
                                   location, channel)

In [None]:
starttime = UTC("2018-01-02")
endtime = UTC("2021-01-12")

In [None]:
analyzer = Analyzer(starttime, endtime, datadir, stationcode)

In [None]:
analyzer.data

In [None]:
files = get_filenames(starttime, endtime, datadir, stationcode)

In [None]:
starttime = UTC("2018-01-01")
endtime = UTC("2018-12-31")

In [None]:
dt = timedelta(seconds=endtime-starttime)

In [None]:
dt.days

In [None]:
files

In [None]:
glob("/home/lehr/sds/processed/GR.BFO..BHZ_2021-01-02_*.hdf5")

In [None]:
files

In [None]:
data = MyProcessedData()

In [None]:
data.from_file(fname)

In [None]:
data.plot()