## Loading Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import autocorrelation_plot

from scipy import stats
from scipy import signal
import librosa
import librosa.display

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.stattools import acf

import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf

In [2]:
%matplotlib inline
#sns.set()
#%matplotlib nbagg

pd.options.display.precision = 15

## Umgebungsvariablen

In [4]:
# path data Stefan
failure_datapath = '''D:/jupyter-notebooks/LANL_Earthquake_Prediction/failure/'''
train_data_path = '''C:/studium/studium/CAS_PML/Projekt_Arbeit/earthquake/Daten/all/train.csv'''

In [6]:
# path data Domenico
failure_datapath = '''C:/Users/taacodo4/GITRepos/CAS_PML_Earthquake_pred/failure'''
train_data_path = '''C:/Users/taacodo4/GITRepos/CAS_PML_Earthquake_pred/data/train.csv'''


### Load the full dataset

In [None]:
train_data = pd.read_csv(train_data_path, dtype={'acoustic_data': np.int16, 'time_to_failure': np.float64})

#  Data Exploration

## Acoustic data

In [None]:
train_data.acoustic_data.describe()

In [None]:
train_sample =train_data.sample(frac=0.05)
plt.figure(figsize=(15,8))
plt.title("Acoustic data distribution (5% sample of 6.2914548000e+08)")
ax = sns.distplot(train_sample.acoustic_data)
#quelle: https://www.kaggle.com/jsaguiar/seismic-data-exploration

Max Value beträgt: 5.4440000000e+03

In [None]:
train_sample.acoustic_data.quantile(0.9)

90% der accoustic daten liegen unter 9.

In [None]:
plt.figure(figsize=(15,8))
plt.title("Acoustic data distribution (5% sample of 6.2914548000e+08)")
ax = sns.distplot(train_sample.acoustic_data[train_sample.acoustic_data.between(-9, 9)], kde=False,fit=stats.norm)
#kde = whether to plot a gaussian kernel density estimate.
#quelle: https://www.kaggle.com/jsaguiar/seismic-data-exploration

## Time to failure

In [None]:
train_data.time_to_failure.describe()

In [None]:
plt.figure(figsize=(15,8))
plt.title("Time to failure distribution (5% sample of 6.2914548000e+08)")
ax = sns.distplot(train_sample.time_to_failure,fit=stats.norm,kde=False )

Die Dauer bis zu einem Erdbeben liegt im Schnitt bei 5 - 7 Sekunden

# Data preparation
Die Trainingsdaten werden in die einzelnen Erdbeben aufgeteilt und je in ein separates File faulure gespeichert

## Die einzelnen Erdbeben voneinander trennen

In [None]:
#diff = a[n+1] - a[n].In case of a slip (time_to_failure near zero) the following number a[n] is higher than a[n+1]
failure_border = np.where(np.diff(train_data.time_to_failure) > 0)

In [None]:
failures = []
start = 0
for end in failure_border[0]:
    failures.append(train_data.iloc[start:end])
    start = end + 1

In [None]:
for failure in failures:
    print(failure.describe())

### Save each failure to file

In [None]:
for failure in range(0,len(failures)):
    np.save('./failure/failure{}'.format(failure),failures[failure])

### load first earthquake

In [None]:
failure = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(0)))
failure.columns = ['acoustic_data','time_to_failure']

### Reduce datapoints from first earthquake

In [None]:
failure_reduced = failure.iloc[::2, :] #every second element
failure_reduced.reset_index(inplace=True)
failure_reduced.describe()

## Reduce decimal places
To reduze the compute time, the decimal places shall be reduced to 4 places only.

In [26]:
import os
path, dirs, files = next(os.walk(failure_datapath))
file_count = len(files)

for failure in range(0,file_count-1):
    data = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(failure)))
    data.columns = ['acoustic_data','time_to_failure']
    data['time_to_failure']=data['time_to_failure'].round(4)
    data.to_pickle('./failure/failure{}_4decimals.pkl'.format(failure))

In [54]:
failure=1
data = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(failure)))

In [38]:
data.columns = ['acoustic_data','time_to_failure']

In [49]:
import math
def truncate(number, digits) -> float:
    stepper = pow(10.0, digits)
    return math.trunc(stepper * number) / stepper

In [61]:
data['time_to_failure']=data['time_to_failure'].apply(lambda x: truncate(x, 4))

In [57]:
    data = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(failure)))
    data.columns = ['acoustic_data','time_to_failure']
    data['time_to_failure']=data['time_to_failure'].round(4)

In [64]:
data.head()

Unnamed: 0,acoustic_data,time_to_failure
0,4.0,11.5408
1,5.0,11.5408
2,6.0,11.5408
3,3.0,11.5408
4,4.0,11.5408


In [28]:
for i in range(0,15):
    failure=pd.read_pickle('./failure/failure{}_4decimals.pkl'.format(i))
    print(failure.head())                     

   acoustic_data  time_to_failure
0           12.0           1.4691
1            6.0           1.4691
2            8.0           1.4691
3            5.0           1.4691
4            8.0           1.4691
   acoustic_data     time_to_failure
0            4.0  11.540800000000001
1            5.0  11.540800000000001
2            6.0  11.540800000000001
3            3.0  11.540800000000001
4            4.0  11.540800000000001
   acoustic_data  time_to_failure
0            1.0          14.1806
1            2.0          14.1806
2           -1.0          14.1806
3            5.0          14.1806
4            7.0          14.1806
   acoustic_data  time_to_failure
0            4.0           8.8567
1            4.0           8.8567
2            1.0           8.8567
3            6.0           8.8567
4           10.0           8.8567
   acoustic_data     time_to_failure
0           -4.0  12.694000000000001
1           -1.0  12.694000000000001
2            1.0  12.694000000000001
3            2.0  

# Plot all earthquakes

In [None]:
def plot_acc_ttf_data(train, final_idx, init_idx=0, step=1, title="",
                      color1='orange', color2='blue'):
    '''quelle: https://www.kaggle.com/jsaguiar/seismic-data-exploration
       train: DataFrame mit column acoustic_data und time_to_failure
       init_idx: start index
       final_idx: end iundex
    '''
    idx = [i for i in range(init_idx, final_idx, step)]
    fig, ax1 = plt.subplots(figsize=(15, 8))
    fig.suptitle(title +' (step size: '+str(step)+')', fontsize=14)
    
    ax2 = ax1.twinx()
    ax1.set_xlabel('index')
    ax1.set_ylabel('Acoustic data')
    ax2.set_ylabel('Time to failure')
    p1 = sns.lineplot(data=train.iloc[idx].acoustic_data.values, ax=ax1, color=color1)
    p2 = sns.lineplot(data=train.iloc[idx].time_to_failure.values, ax=ax2, color=color2)

In [None]:
plot_acc_ttf_data(train_data,final_idx=len(train_data), step=1000, title="All training data")
print('Amount of samples: {}'.format(len(train_data)))

### Peaks

In [None]:
peaks = train_data[train_data.acoustic_data.abs() > 3000]
#print(peaks)
peaks.time_to_failure.describe()

In [None]:
plt.hist(peaks.time_to_failure[peaks.time_to_failure<0.5],bins=1000)
plt.show()

In [None]:
peaks.time_to_failure[peaks.time_to_failure>0.5].describe()

In [None]:
peaks = train_data[train_data.acoustic_data.abs() > 1000]
#print(peaks)
peaks.time_to_failure.describe()

In [None]:
peaks = train_data[train_data.acoustic_data.abs() > 500]
#print(peaks)
peaks.time_to_failure.describe()

In [None]:
plot_acc_ttf_data(failure,final_idx=len(failure), step=100, title="Earthquake from file failure0.npy")
print('Amount of samples: {}'.format(len(failure)))

In [None]:
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("Spectrogram Earthquake from file failure0.npy")
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(failure.acoustic_data,
                                                               Fs=samplingFrequency,
                                                               noverlap=90,
                                                               NFFT=4096)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.colorbar(format='%+2.0f dB')
plt.show()

In [None]:
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("Spectrogram Earthquake from file failure0.npy")
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(failure.acoustic_data,
                                                               Fs=samplingFrequency)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.colorbar(format='%+2.0f dB')
plt.show()

In [None]:
powerSpectrum[2,].mean()

In [None]:
Pxx

In [None]:
#https://matplotlib.org/api/_as_gen/matplotlib.pyplot.angle_spectrum.html#matplotlib.pyplot.angle_spectrum
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("Phase Spectrogram Earthquake from file failure0.npy")
spectrum, freqs, line = plt.phase_spectrum(failure.acoustic_data,Fs=samplingFrequency)
plt.show()

In [None]:
#https://matplotlib.org/api/_as_gen/matplotlib.pyplot.magnitude_spectrum.html
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Frequency (log) ')
plt.ylabel('Magnitude (log)')
plt.title("Magnitude_spectrum from file failure0.npy")
spectrum, freqs, line = plt.magnitude_spectrum(failure.acoustic_data,Fs=samplingFrequency)
plt.show()

In [None]:
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("Power spectral density from file failure0.npy")
#plt.xscale('log')
Pxx, freqs = plt.psd(x=failure.acoustic_data, Fs=samplingFrequency)

plt.xlabel('Frequency (Hz)')
plt.show()

In [None]:
plt.figure(figsize=(15,8))
plt.title("FFT Earthquake from file failure0.npy")
                                                             
plt.xlabel('Frequency (Hz)')
plt.ylabel('')
plt.plot(freqenciesFound,powerSpectrum)
plt.show()

In [None]:
powerSpectrum.shape

In [None]:
Pxx.shape

In [None]:
y = np.array(failure.acoustic_data)
samplingFrequency = 2000000

#to display spectrogram for a ind. file
    
fig= plt.figure(figsize=(15,8))
#plt.subplot(1,2,2)
   
# calc the STFT with a window = 1024 . 
D = librosa.core.stft(y, n_fft=4096,hop_length=1024)

powerSpectrum_librosa = librosa.amplitude_to_db(np.abs(D),ref=np.max)
librosa.display.specshow(powerSpectrum_librosa,y_axis='linear', x_axis='time',sr=samplingFrequency)

plt.title('Spectrogram Earthquake from file failure0.npy', fontsize = 20)
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()                 
plt.xlabel('Time (s)', fontsize = 20)
plt.ylabel('Frequency (Hz)', fontsize = 20)

Der erste Test dauert nur ca. 1.4 Sekunden

In [None]:
#load second earthquake
failure_2 = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(1)))
failure_2.columns = ['acoustic_data','time_to_failure']

In [None]:
plot_acc_ttf_data(failure_2,final_idx=len(failure_2),init_idx=len(failure_2)-1500000, step=100, title="Earthquake from file failure2.npy")
print('Amount of samples: {}'.format(len(failure_3)))

In [None]:
failure_2.columns = ['acoustic_data','time_to_failure']
plot_acc_ttf_data(failure_2,final_idx=len(failure_2), step=100, title="Earthquake from file failure1.npy")
print('Amount of samples: {}'.format(len(failure_2)))

In [None]:
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("Spectrogram Earthquake from file failure1.npy")
powerSpectrum_2, freqenciesFound_2, time, imageAxis = plt.specgram(failure_2.acoustic_data,
                                                               Fs=samplingFrequency,
                                                               noverlap=900,
                                                               NFFT=4096)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.colorbar(format='%+2.0f dB')
plt.show()

In [None]:
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("power spectral density from file failure0.npy")
powerSpectrum, freqenciesFound, time, imageAxis = plt.psd(failure_2.acoustic_data,Fs=samplingFrequency)

plt.xlabel('Frequency (Hz)')
plt.show()

In [None]:
print(powerSpectrum_2.shape)
print(freqenciesFound_2.shape)

In [None]:
powerSpectrum_2

In [None]:
plt.figure(figsize=(15,8))
plt.title("FFT Earthquake from file failure1.npy")                                                          
plt.xlabel('Frequency (Hz)')
plt.ylabel('')
plt.plot(freqenciesFound_2,powerSpectrum_2)
plt.show()

In [None]:
y = np.array(failure_2.acoustic_data)
samplingFrequency = 2000000

#to display spectrogram for a ind. file
    
fig= plt.figure(figsize=(15,8))
#plt.subplot(1,2,2)
   
# calc the STFT with a window = 1024 . 
D = librosa.core.stft(y, n_fft=4096,hop_length=1024)

powerSpectrum_librosa = librosa.amplitude_to_db(np.abs(D),ref=np.max)
librosa.display.specshow(powerSpectrum_librosa,y_axis='linear', x_axis='time',sr=samplingFrequency)

plt.title('Spectrogram Earthquake from file failure1.npy', fontsize = 20)
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()                 
plt.xlabel('Time (s)', fontsize = 20)
plt.ylabel('Frequency (Hz)', fontsize = 20)

In [None]:
#load third earthquake
failure_3 = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(2)))
failure_3.columns = ['acoustic_data','time_to_failure']

In [None]:
plot_acc_ttf_data(failure_3,final_idx=len(failure_3),init_idx=len(failure_3)-1500000, step=100, title="Earthquake from file failure2.npy")
print('Amount of samples: {}'.format(len(failure_3)))

In [None]:
plot_acc_ttf_data(failure_3,final_idx=len(failure_3), step=100, title="Earthquake from file failure2.npy")
print('Amount of samples: {}'.format(len(failure_3)))

In [None]:
samplingFrequency = 4000000

# Plot the spectrogram
plt.figure(figsize=(15,8))
plt.title("power spectral density from file failure2.npy")
powerSpectrum_PSD, freqenciesFound, time, imageAxis = plt.psd(failure_3.acoustic_data,
                                                               Fs=samplingFrequency,
                                                               noverlap=90,
                                                               NFFT=4096)

plt.xlabel('Frequency (Hz)')
plt.show()

In [None]:
plt.figure(figsize=(15,8))
plt.title("FFT Earthquake from file failure2.npy")
                                                             
plt.xlabel('Frequency (Hz)')
plt.ylabel('')
plt.plot(freqenciesFound_3,powerSpectrum_3)
plt.show()

In [None]:
y = np.array(failure_3.acoustic_data)
samplingFrequency = 2000000

#to display spectrogram for a ind. file
    
fig= plt.figure(figsize=(15,8))
#plt.subplot(1,2,2)
   
# calc the STFT with a window = 1024 . 
D = librosa.core.stft(y, n_fft=4096,hop_length=1024)

powerSpectrum_librosa = librosa.amplitude_to_db(np.abs(D),ref=np.max)
librosa.display.specshow(powerSpectrum_librosa,y_axis='linear', x_axis='time',sr=samplingFrequency)

plt.title('Spectrogram Earthquake from file failure2.npy', fontsize = 20)
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()                 
plt.xlabel('Time (s)', fontsize = 20)
plt.ylabel('Frequency (Hz)', fontsize = 20)
plt.show()

# sollten wir die 'Ausreisser' entfernen?

# Trends and Seasonality

## Augmented Dickey-Fuller test

### Was ist eine stationäre Zeitreihe?

Stationäre Zeitreihen weisen keine systematische Veränderung im Gesamtbild auf, d.h. es bestehen keine systematischen Änderungen im Mittel oder der Varianz, und es liegen keine streng periodischen Schwankungen vor. Anders ausgedrückt, schwanken solche Zeitreihen nicht regelmässig mit Jahresperiode, sie weisen keine mehrjährigen, zyklischen Verläufe auf und es lässt sich auch keine deutlich positive oder negative Steigung über einen längeren Zeitraum erkennen --> keine Saison und Trend - Komponenten enthalten. 

Wenn eine Zeitreihe stationär ist, kann das Modellieren einfacher sein. Statistische Modellierungsmethoden setzen voraus, dass die Zeitreihen stationär sind, um wirksam zu sein.

#### Null Hypothesis (H0): 
non-stationary. It has some time dependent structure.
#### Alternate Hypothesis (H1): 
meaning it is stationary. It does not have time-dependent structure.

#### Prepare Data

In [None]:
failure_1 = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(0)))
failure_1.columns = ['acoustic_data','time_to_failure']
failure_reduced_1 = failure_1.iloc[::100, :] 
failure_reduced_1.reset_index(inplace=True)

#### adfuller and  Kwiatkowski-Phillips-Schmidt-Shin test test

In [None]:
print('Augmented Dickey-Fuller unit root test')
result_adfuller = adfuller(failure_reduced_1.acoustic_data)
print('ADF Statistic: %f' % result_adfuller[0])
print('p-value: %f' % result_adfuller[1])
print('Critical Values:')
for key, value in result_adfuller[4].items():
	print('\t%s: %.3f' % (key, value))

print('Kwiatkowski-Phillips-Schmidt-Shin test')
result_kpss = kpss(failure_reduced_1.acoustic_data)
print('kpss_stat: %f' % result_kpss[0])
print('p-value: %f' % result_kpss[1])
print('Critical Values:')
for key, value in result_kpss[3].items():
	print('\t%s: %.3f' % (key, value))

## Ist das Accoustic Signal stationär?

##### H0 kann verworfen werden. Das acoustic signal ist ein stationäres Signal.

## Analysen mit Window

### Rolling statistics mean and standardaviation

Prepare data

In [None]:
failure_1 = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(0)))
failure_1.columns = ['acoustic_data','time_to_failure']
failure_reduced_1 = failure_1.iloc[::2, :] 
failure_reduced_1.reset_index(inplace=True)

In [None]:
rolmean = failure_reduced_1[['acoustic_data']].rolling(1000).mean()
rolstd = failure_reduced_1[['acoustic_data']].rolling(1000).std()

orig = plt.plot(failure_reduced_1[['acoustic_data']], color='orange', label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling mean')
rolstd = plt.plot(rolstd, color='black', label='Rolling std')
plt.legend(loc='best')
plt.title('Rolling Mean and Standard Deviation')
plt.show(block=False)

Auch visuell bestätigt sich die Annahme, dass das Signal keine Trendkomponente enthaltet.

## Prepare data for windows plot

In [None]:
failure_2 = pd.DataFrame(np.load(failure_datapath+'/failure{}.npy'.format(1)))
failure_2.columns = ['acoustic_data','time_to_failure']
#failure_reduced_2 = failure_2.iloc[::2, :] 
#failure_reduced_2.reset_index(inplace=True)

In [None]:
window = 5000 #willkürlich gewählt --> sollte noch besprochen werden!

In [None]:
def plot_double_series(first_chunk,first_ylabel,second_chunk,second_ylabel,xlabel,title):
    # plot rolling mean
    fig, ax1 = plt.subplots(figsize=(15, 8))
    fig.suptitle(title, fontsize=14)

    ax2 = ax1.twinx()
    ax1.set_xlabel(xlabel)
    ax1.set_ylabel(first_ylabel)
    ax2.set_ylabel(second_ylabel)
    p1 = sns.lineplot(data=first_chunk, ax=ax1, color='orange',label=first_ylabel)
    p2 = sns.lineplot(data=second_chunk, ax=ax2,label=second_ylabel)
    plt.legend(loc='best')
    plt.show()

In [None]:
def plot_triple_series(first_chunk,first_ylabel,second_chunk,second_ylabel,time_to_failure,xlabel,title):
    # plot rolling mean
    fig, ax1 = plt.subplots(figsize=(15, 8))
    fig.suptitle(title, fontsize=14)

    ax2 = ax1.twinx()
    ax1.set_xlabel(xlabel)
    ax1.set_ylabel(first_ylabel)
    ax2.set_ylabel(second_ylabel)
    p1 = sns.lineplot(data=first_chunk, ax=ax1, color='orange',label=first_ylabel)
    p2 = sns.lineplot(data=second_chunk, ax=ax2,label=second_ylabel)
    p3 = sns.lineplot(data=time_to_failure, label='time_to_failure',color='black')
    plt.legend(loc='best')
    plt.show()

In [None]:
train = train_data
window_size = 150000

rolling_mean = []
rolling_std = []
rolling_skew = []
rolling_kurt = []
rolling_max = []
rolling_min = []
rolling_25Quantile = []

last_time_to_failure = []


init_idx = 0
for _ in range(int(np.round(len(train)/150000))):  # 629M / 150k = 4194
    x = train.iloc[init_idx:init_idx + window_size]
    last_time_to_failure.append(x.time_to_failure.values[-1])
    rolling_mean.append(x.acoustic_data.mean())
    rolling_std.append(x.acoustic_data.std())
    rolling_skew.append(x.acoustic_data.skew())
    rolling_kurt.append(x.acoustic_data.kurt())
    rolling_max.append(x.acoustic_data.max())
    rolling_min.append(x.acoustic_data.min())
    rolling_25Quantile.append(x.acoustic_data.quantile(.25, interpolation='midpoint'))
    
    init_idx += window_size
    
rolling_mean = np.array(rolling_mean)
rolling_std = np.array(rolling_std)
rolling_skew = np.array(rolling_skew)
rolling_kurt = np.array(rolling_kurt)
rolling_max = np.array(rolling_max)
rolling_min = np.array(rolling_min)
rolling_25Quantile = np.array(rolling_25Quantile)

last_time_to_failure = np.array(last_time_to_failure)


## Plot Windows

### Rolling Mean

In [None]:
plot_double_series(rolling_mean,'Acoustic data',last_time_to_failure,'Time to failure','index','Rolling Mean / Time to failure')

In [None]:
plot_triple_series(rolling_mean,'Acoustic data',rolling_std,'Standard Deviation',last_time_to_failure,'index','Rolling Mean / Rolling Standard Deviation')

In [None]:
plot_triple_series(rolling_mean,'Acoustic data',rolling_skew,'Skewness',last_time_to_failure,'index','Rolling Mean / Rolling Skewness')

In [None]:
plot_triple_series(rolling_mean,'Acoustic data',rolling_kurt,'Kurtosis',last_time_to_failure,'index','Rolling Mean / Rolling Kurtosis')

In [None]:
plot_triple_series(rolling_mean,'Acoustic data',rolling_max,'Max',last_time_to_failure,'index','Rolling Mean / Rolling Max')

In [None]:
plot_triple_series(rolling_mean,'Acoustic data',rolling_min,'Min',last_time_to_failure,'index','Rolling Mean / Rolling Min')

In [None]:
plot_triple_series(rolling_mean,'Acoustic data',rolling_25Quantile,'25% quantile',last_time_to_failure,'index','Rolling Mean / Rolling 25% Quantile')

## Autokorrelationen

In [None]:
print(acf(failure_reduced[0], nlags=100, fft=True))

In [None]:
plot_acf(failure_reduced.acoustic_data,alpha=0.05,lags=1000)
plt.show()

# Erkenntnisse aus der Datenanalyse

## Stationarität der Trainingsdaten

Die Erdbeben aus den Trainingsdaten sind stationär (Dickey–Fuller und Kwiatkowski-Phillips-Schmidt-Shin test)

## Stationarität der Testdaten

## Muster in den Daten

### Standardabweichung

Muster: Je näher das Erdbeben umso mehr steigt die Std an. 

### Schiefe (Skweness)

Muster: Je näher das Erdbeben umso linksschiefer wird die Verteilung

## Erkenntnis

In [None]:
Voraussagen mittels Machine learning sollten anhand der vorhandenen Muster möglich sein.

# ToDo

Stationaritätstest auf mehreren Erdbeben ausführen
Stationaritätstest auf den Testdaten ausführen
Testdaten anzeigen


Muster (Std./ Skew / Kurtosis) in den Daten besser herausarbeiten (siehe Bsp. https://www.kaggle.com/jsaguiar/seismic-data-exploration Standard deviation)

Tiefpassfilter um noise zu entfernen? -> gibt es einen Kernel dazu?

In [None]:
def plot_Earthquake(failure):
    fig, ax = plt.subplots(nrows=8, ncols=2,figsize=(15,10))
    for row in ax:
        for col in row:
            plot_acc_ttf_data(failure,final_idx=len(failure),ax1=col, step=100, title="Earthquake from file failure{}.npy".format(i))

# Quellen

  -  CNN Spectrum: https://www.kaggle.com/michael422/spectrogram-convolution