In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import datetime
import os
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from matplotlib import pyplot
from pylab import rcParams
import pickle as pk
import gc
import requests 
import matplotlib.gridspec as gridspec
import netCDF4 as nc
import hvplot
import hvplot.pandas
import hvplot.xarray
import holoviews as hv
from holoviews import opts

In [None]:
from IPython.display import Image
Image(url = "https://cdn-images-1.medium.com/max/800/1*w-05-YnuJMl84KMSRw1J2g.png")

Covariance is scale-dependent. 

#### Pearson's Corellation Coefficient

In [None]:
def mean(x):
    return sum(x)/len(x)

def covariance(x,y):
    calc = []
    for i in range(len(x)):
        xi = x[i] - mean(x)
        yi = y[i] - mean(y)
        calc.append(xi * yi)
    return sum(calc)/(len(x) - 1)
    
a = [1,2,3,4,5] ; b = [5,4,3,2,1]
print(covariance(a,b))

#### Normalizing covariance

In [None]:
import math

def stDev(x):
    variance = 0
    for i in x:
        variance += (i - mean(x) ** 2) / len(x)
    return math.sqrt(variance)
    
def Pearsons(x,y):
    cov = covariance(x,y)
    return cov / (stDev(x) * stDev(y))

#### Import Seismic Data 

In [None]:
seismic_file = '/home/jovyan/data/hypo71.dat.txt'
df_seismic_data = pd.read_csv(seismic_file, delim_whitespace=True, dtype=object)
df_seismic_data['datetime'] = df_seismic_data['yyyymmdd'] + 'T' + \
            df_seismic_data['HHMM'].str.slice(start=0, stop=2) + ':' + \
            df_seismic_data['HHMM'].str.slice(start=2) 
df_seismic_data.index = pd.to_datetime(df_seismic_data['datetime'].values)
df_seismic_data['datetime'] = pd.to_datetime(df_seismic_data['datetime'].values)
df_seismic_data = df_seismic_data.loc['2017-01-1 00:00:00':'2019-06-17 00:00:00']
df_seismic_data.head()
# del df_seismic_data['yyyymmdd']
# del df_seismic_data['HHMM']
del df_seismic_data['Lon(D']
# del df_seismic_data['SSS.SS']
# del df_seismic_data['Depth']
del df_seismic_data['M)']
del df_seismic_data['M).1']
del df_seismic_data['NWR']
del df_seismic_data['GAP']
del df_seismic_data['DMIN']
del df_seismic_data['ERH']
del df_seismic_data['ERZ']
del df_seismic_data['ID']
del df_seismic_data['Lat(D']
del df_seismic_data['PMom']
del df_seismic_data['SMom']
df_seismic_data['Depth'] = df_seismic_data['Depth'].astype('float64').values
df_seismic_data['MW'] = df_seismic_data['MW'].astype('float64').values
df_seismic_data['RMS'] = df_seismic_data['RMS'].astype('float64').values

In [None]:
df_seismic_data.datetime.astype(np.int64).values/1e64
df_seismic_data['date'] =pd.DatetimeIndex(df_seismic_data.datetime).date
df_seismic_data.tail()

### MW Average

In [None]:
# values = make_array(2, 3, 3, 9)
# MW = make_array(df_seismic_data['MW'])
mw = df_seismic_data['MW']
depth = df_seismic_data['Depth']
average = mw.mean(), depth.mean()
mw_average = mw.mean()
depth_average = depth.mean()
# mw_average
average

### MW Histogram

In [None]:
df_seismic_data.hist('MW', ec='w')
# df_seismis_data.hist('MW', bins=np.arange(65.5, 90.5), ec='w')
# nba.hist('Height', bins=np.arange(65.5, 90.5), ec='w')

### Depth Histogram

In [None]:
df_seismic_data.hist('Depth', bins=np.arange(0, 6), ec='w')
# df_seismic_data.hist('Depth', ec='w')

In [None]:
deviations_mw = mw - mw_average
deviations_depth = depth - depth_average
df_seismic_data['MW Dev'] = deviations_mw
df_seismic_data['Depth Dev'] = deviations_depth
df_seismic_data.head(5)

In [None]:
# Sum of deviations should equal to zero
deviations_mw.sum()

In [None]:
sqdev_mw = deviations_mw ** 2
df_seismic_data['MW Squared Deviation'] = sqdev_mw
df_seismic_data.head(5)

#### MW Variance

In [None]:
# Variance of the data is the average of the squared deviations
variance_mw = sqdev_mw.mean()
variance_mw

#### MW STDEV

In [None]:
# Standard Deviation (SD) is the square root of the variance

sd_mw = variance_mw ** 0.5
sd_mw

#### Depth vs MW

In [None]:
ax1 = df_seismic_data.plot.scatter(x='MW',
                                  y='Depth')
plt.savefig('/home/jovyan/botpt/figures/mwvsdepth.png')

#### Standard Units

In [None]:
def standard_units(x):
    "Convert any array of numbers to standard units."
    return (x - np.average(x)) / np.std(x)

In [None]:
df_seismic_data['MW Squared Deviation'] = sqdev_mw

Table().with_columns(
    'mpg (standard units)',  standard_units(suv.column('mpg')), 
    'msrp (standard units)', standard_units(suv.column('msrp'))
).scatter(0, 1)
plots.xlim(-3, 3)
plots.ylim(-3, 3);

In [None]:
# Calculating R

In [None]:
df_eqMean=df_seismic_data.groupby('date').mean()
df_eqCount= df_seismic_data.groupby('date').count()
#del df_eqMean['datetime']
#df_eqMean.columns.name = df_eqMean.index.name
#df_eqMean.index.name = None
df_eqMean.tail()

In [None]:
df_eqCount['count'] = df_eqCount.datetime.astype('float64').values
df_eqCount.head()

In [None]:
df_eqMean['count'] = df_eqCount['count'].values
# df_seismic_data['count'] = df_eqCount['count'].values
# df_eqMean.head()
type(df_eqMean['count'].value())

In [None]:
# df_seismic_data['frequency'] = sqdev_mw
df_seismic_data['frequency'] = df_eqMean['count']
df_seismic_data.head(5)
type(df_seismic_data['MW'])

#### Frequency vs MW

In [None]:
ax2 = df_seismic_data.plot.scatter(x='MW',
                                  y='frequency')

#### Frequency vs Depth 

In [None]:
ax3 = df_seismic_data.plot.scatter(x='Depth',
                                  y='frequency')