In [1]:
import spotchaos.syntheticSignals as sp

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.animation as animation
from matplotlib.animation import TimedAnimation
from mpl_toolkits import mplot3d
from mpl_toolkits.axes_grid1 import make_axes_locatable
import time

import os
from astropy.timeseries import LombScargle
from scipy.integrate import RK45, DOP853
from scipy.stats import chi2, iqr
from scipy.signal import argrelextrema, savgol_filter, find_peaks_cwt
from scipy.spatial.distance import chebyshev
from scipy.optimize import curve_fit

from pytisean import tiseano, tiseanio

from sklearn.neighbors import KDTree, BallTree
from scipy.spatial import KDTree as scipyKDTree
#%matplotlib inline

In [2]:
# Time series:
# 0. White noise
# 1. stationary GP time series
# 2. Simple periodic
# 3. KB88 R(t)
# 4. Rossler x, y, z
# 5. Transformed rossler u, v, w
# 6. Lorenz x, y, z

# and for each we have 3 baselines: TESS (perfect = 257062 data points), Kepler (perfect = 71422 data points), SPECULOOS (perfect = 44119 data points)
#                   and 4 versions: perfect, gapless+noisy, gappy+noiseless, realistic
# yielding 13*3*4 = 156 time series to analyze altogether. (But only 78 files, because the perfect and noisy versions are in the same file.)

# Need to choose:
# 1. time delay (Fraser & Swinney 1986)
# 2. embedding dimension (Cao 1997)
# and then calculate
# 1. correlation dimension (Kurths & Herzel 1987; investigate pytisean too. Look into Thelier window.)
# 2. Lyapunov spectrum (Wolf et al. 1985)
# 3. Lyapunov dimension (Kaplan-Yorke conjecture; see Eckmann & Ruelle 1985 eq 4.11).

# Expected results:
# 0. White noise should have no good choice of time delay; mutual info as a function of delay time should be flat.
# 1. Not sure!
# 2. Simple periodic and KB88 R(t) results should be similar to KB88 figures 9 and 10.
# 3. same
# 4. Rossler x, y, and z should yield time delay ~ 1/4*rossler_qp and embedding dimension of 3. Rossler z results should be worse than x and y.
# 5. Transformed Rossler u, v, and w should yield time delay ~1/4*transformed_rossler_qp and embedding dimension of 3. Variables should be about equally good.
# 6. Lorenz x, y, and z should yield time delay ~1/4*lorenz_qp and embedding dimension of 3. z dimension will be insensitive to wing symmetry.


In [3]:
folderpaths = ['0_gaussian_noise',
               '1_gaussian_process',
               '2_simple_periodic',
               '3_KB88_r',
               '4_rossler',
               '5_transformed_rossler',
               '6_lorenz']

# We are working only with the gapless versions for now.

In [47]:
#for folder in folderpaths[1:]:
for folder in folderpaths:
#for folder in ['6_lorenz','blah']:
    files = os.scandir("../data/{0}/".format(folder))
    for file in files:
        if ".txt" in file.name and "perfect" in file.name and "speculoos" in file.name:
            print(file.name)
            #counter += 1
            #print(file.path)
            #test = np.genfromtxt(file.path)
            #print(np.shape(test))
            #fig, ax = plt.subplots(1,1,figsize=(4,3))
            #ax.plot(test[:,0], test[:,1],'k-')
            #plt.show()
            
            data = np.genfromtxt(file.path)
            
            if "0" in file.name:
                ts = data[:,2]
                noisy_ts = data[:,2]
            else:
                ts = data[:,1]
                noisy_ts = data[:,2]
                
            if "lorenz" in file.name and "_z_" not in file.name:
                QPmethod = "localMaxSep"
            elif "0" in file.name:
                QPmethod = "localMaxSep"
            else:
                QPmethod = "power"
            
            # choose time delay (Fraser & Swinney 1986)
            if "0" in file.name:
                mutInfo, bestTauIdx = sp.FS86(time = data[:,0], timeSeries = ts, QPmethod=QPmethod, method="global_min", plot=False)
                noisy_mutInfo, noisy_bestTauIdx = sp.FS86(time = data[:,0], timeSeries = noisy_ts, QPmethod=QPmethod, method="global_min", plot=False)
            else:
                mutInfo, bestTauIdx = sp.FS86(time = data[:,0], timeSeries = ts, QPmethod=QPmethod, method="first_or_second_local_min", plot=False)
                noisy_mutInfo, noisy_bestTauIdx = sp.FS86(time = data[:,0], timeSeries = noisy_ts, QPmethod=QPmethod, method="first_or_second_local_min", plot=False)
            
            #bestTauIdx=119
            #noisy_bestTauIdx=119
            print("perfect tauIdx is {0}, noisy tauIdx is {1}".format(bestTauIdx, noisy_bestTauIdx))

            '''
            fig, axes = plt.subplots(2,1,figsize=(18,12))
            axes[0].plot(data[:,0],ts,marker='.',ms=5,ls='-',color='k',mec='None',lw=0.5,alpha=0.1)
            axes[1].errorbar(data[:,0],noisy_ts,yerr=data[:,3],marker='.',ms=5,ls='-',color='k',mec='None',capsize=0.,elinewidth=0.5,lw=0.5,alpha=0.1)
            for ax in axes:
                ax.set_ylabel(r"$x(t)$",fontsize=14)
                ax.set_xlim(data[:,0][0]-0.5, data[:,0][-1]+0.5)
            axes[1].set_xlabel(r"$t$ [days]",fontsize=14)
            plt.savefig("./{0}_timeseriesplot.png".format(file.name.split(".txt")[0]),bbox_inches="tight")
                
            
            fig, axes = plt.subplots(1,2,sharex=True,sharey=True,figsize=(12,6))
            axes[0].plot(ts[:-bestTauIdx], ts[bestTauIdx:],marker='.',ms=5,ls='-',color='k',mec='None',lw=0.5,alpha=0.1)
            axes[1].errorbar(noisy_ts[:-bestTauIdx], noisy_ts[bestTauIdx:],xerr=data[:,3][:-bestTauIdx],yerr=data[:,3][bestTauIdx:],marker='.',ms=5,ls='-',color='k',mec='None',capsize=0.,elinewidth=0.5,lw=0.5,alpha=0.01)
            for ax in axes:
                ax.set_xlabel(r"$x(t)$",fontsize=14)
                ax.set_ylabel(r"$x(t+\tau)$",fontsize=14)
                ax.set_aspect("equal")
            plt.savefig("./{0}_delayplot.png".format(file.name.split(".txt")[0]),bbox_inches="tight")
            '''
            '''
            # choose embedding dimension (Cao 1997)
            E1, E2, sat_m = sp.cao97(timeSeries=ts, tau=bestTauIdx, mMax=8)
            noisy_E1, noisy_E2, noisy_sat_m = sp.cao97(timeSeries=noisy_ts, tau=noisy_bestTauIdx, mMax=8)
            '''
            '''
            fig, axes = plt.subplots(1,2,figsize=(16,6),sharex=True,sharey=True)
            axes[0].plot(np.arange(1,8),E1,'bo',ls='-',label="E1 (saturates at appropriate m)")
            axes[0].plot(np.arange(1,8),E2,'ro',ls='-',label="E2 (== 1 always for stochastic)")
            if sat_m is None:
                print("No saturation for {0}".format(file.name))
            else:
                axes[0].axvline(sat_m, color='k',linestyle=':')
            axes[0].set_title("{0}, tau = {1}".format(file.name, bestTauIdx))
            axes[0].set_xlabel('Dimension')
            axes[0].set_ylabel('E1 & E2')
            axes[0].legend(loc='lower right',fontsize=10)
            
            axes[1].plot(np.arange(1,8),noisy_E1,'bo',ls='-',label="E1 (saturates at appropriate m)")
            axes[1].plot(np.arange(1,8),noisy_E2,'ro',ls='-',label="E2 (== 1 always for stochastic)")
            if noisy_sat_m is None:
                print("No saturation for noisy {0}".format(file.name))
            else:
                axes[1].axvline(noisy_sat_m, color='k',linestyle=':')
            axes[1].set_title("noisy {0}, tau = {1}".format(file.name, noisy_bestTauIdx))
            axes[1].set_xlabel('Dimension')
            axes[1].set_ylabel('E1 & E2')
            axes[1].legend(loc='lower right',fontsize=10)
            #plt.show()
            
            plt.savefig("./{0}_embeddingDim.png".format(file.name.split(".txt")[0]),bbox_inches="tight")
            '''
            sat_m = None
            noisy_sat_m=None
            print("sat_m is {0}".format(sat_m))
            print("noisy_sat_m is {0}".format(noisy_sat_m))

            print("original time series length is {0}".format(len(ts)))
            ts = ts[:5000]
            noisy_ts = noisy_ts[:5000]

            
            # scale ts to be between 0 and 1
            ts = (ts - np.min(ts))/np.ptp(ts)
            noisy_ts = (noisy_ts - np.min(noisy_ts))/np.ptp(noisy_ts)
            
            # just to see how things scale, run all the way up to Mmax=10
            start = time.time()
            d2dict = sp.d2_tisean(timeSeries=ts,tau=bestTauIdx,m=10,thelier=0)
            firstd2call = time.time()
            noisy_d2dict = sp.d2_tisean(timeSeries=noisy_ts,tau=noisy_bestTauIdx,m=10,thelier=0)
            noisyd2call = time.time()

            print("first d2 call took {0} seconds".format(np.round(firstd2call-start,2)))
            print("noisy d2 call took {0} seconds".format(np.round(noisyd2call-firstd2call,2)))
            
            print("non-noisy params:")
            print(bestTauIdx)
            print(sat_m)
            print(np.shape(d2dict["c2"]))
            print(np.shape(d2dict["d2"]))
            print(np.shape(d2dict["h2"]))

            print("noisy params:")
            print(noisy_bestTauIdx)
            print(noisy_sat_m)
            print(np.shape(noisy_d2dict["c2"]))
            print(np.shape(noisy_d2dict["d2"]))
            print(np.shape(noisy_d2dict["h2"]))
            
            fig, axes = plt.subplots(2,3,figsize=(18,8))
            axes[0,0].scatter(d2dict["c2"][:,0], d2dict["c2"][:,1],s=0.1)
            axes[0,0].set_yscale('log')
            axes[0,0].set_ylabel("correlation sum")

            axes[0,1].scatter(d2dict["d2"][:,0], d2dict["d2"][:,1],s=0.1)
            axes[0,1].set_ylabel("correlation dimension")

            axes[0,2].scatter(d2dict["h2"][:,0], d2dict["h2"][:,1],s=0.1)
            axes[0,2].set_ylabel("correlation entropy")


            axes[1,0].scatter(noisy_d2dict["c2"][:,0], noisy_d2dict["c2"][:,1],s=0.1)
            axes[1,0].set_yscale('log')
            axes[1,0].set_ylabel("correlation sum")

            axes[1,1].scatter(noisy_d2dict["d2"][:,0], noisy_d2dict["d2"][:,1],s=0.1)
            axes[1,1].set_ylabel("correlation dimension")

            axes[1,2].scatter(noisy_d2dict["h2"][:,0], noisy_d2dict["h2"][:,1],s=0.1)
            axes[1,2].set_ylabel("correlation entropy")
            
            for ax in np.ravel(axes):
                ax.set_xscale('log')
                ax.set_xlabel('length scale')

            axes[0,0].set_title("Mmax = {0}".format(sat_m))
            axes[1,0].set_title("noisy Mmax = {0}".format(noisy_sat_m))

            #plt.show()
            plt.savefig("./{0}_tisean_short_norm0to1_fixBoxAssist_allboxes.png".format(file.name.split(".txt")[0]),bbox_inches="tight")
            plt.close()
            
            np.save("./{0}_tisean_c2_short_norm0to1_fixBoxAssist_allboxes.npy".format(file.name.split(".txt")[0]), d2dict["c2"])
            np.save("./{0}_tisean_d2_short_norm0to1_fixBoxAssist_allboxes.npy".format(file.name.split(".txt")[0]), d2dict["d2"])
            np.save("./{0}_tisean_h2_short_norm0to1_fixBoxAssist_allboxes.npy".format(file.name.split(".txt")[0]), d2dict["h2"])

            np.save("./{0}_noisy_tisean_c2_short_norm0to1_fixBoxAssist_allboxes.npy".format(file.name.split(".txt")[0]), noisy_d2dict["c2"])
            np.save("./{0}_noisy_tisean_d2_short_norm0to1_fixBoxAssist_allboxes.npy".format(file.name.split(".txt")[0]), noisy_d2dict["d2"])
            np.save("./{0}_noisy_tisean_h2_short_norm0to1_fixBoxAssist_allboxes.npy".format(file.name.split(".txt")[0]), noisy_d2dict["h2"])
            
            """
            mMax = 8
            
            if sat_m is None:
                print("no saturation")
                mArr = np.arange(2, mMax+1)
            else:
                if sat_m + 1 <= mMax:
                    mArr = np.array((sat_m, sat_m + 1))
                elif sat_m <= mMax:
                    mArr = np.array((sat_m))
                else:
                    mArr = np.arange(2, mMax+1)
            """
            '''
            print(mArr)
            
            for m in mArr:
                print(m)
                if os.path.exists("./{0}_tau={1}_m={2}_C0.npy".format(file.name.split(".txt")[0],bestTauIdx,m)):
                    pass
                else:
                    C0_, C1_, C2_, nArr_ = sp.Cq(rArr=rArr, timeSeries=ts, tau = bestTauIdx, m = m)
                    np.save("./{0}_tau={1}_m={2}_C0.npy".format(file.name.split(".txt")[0],bestTauIdx,m),C0_)
                    np.save("./{0}_tau={1}_m={2}_C1.npy".format(file.name.split(".txt")[0],bestTauIdx,m),C1_)
                    np.save("./{0}_tau={1}_m={2}_C2.npy".format(file.name.split(".txt")[0],bestTauIdx,m),C2_)
                    np.save("./{0}_tau={1}_m={2}_nArr.npy".format(file.name.split(".txt")[0],bestTauIdx,m),nArr_)

                    delayMat = sp.delayMatrix(ts, bestTauIdx, m)
                    
                    N = np.shape(nArr_)[0]
                    medians = np.percentile(nArr_, 50, axis=0)
                        
                    # exclude values of r where the median of n(r) is <= 10./N . Cutoff is a little arbitrary but the idea is that these points don't have enough neighbors.
                    enoughNeighborsIdxs = np.arange(len(rArr))[medians > 10./N]
                    firstGood = enoughNeighborsIdxs[0]
                    
                    # exclude values of r where any n(r) are NaN. The time series is not long enough to populate all the neighbors of the points.
                    anyNans = [np.any(~np.isfinite(nArr_[:,i])) for i in range(len(rArr))]
                    anyNans = np.array(anyNans)
                    nansIdxs = np.arange(len(rArr))[anyNans]
                    lastGood = nansIdxs[0]
                    
                    params_C2, params_unc_C2 = sp.fitLinearRegime(rArr, nArr_, C2_)
                    
                    params_dist, params_dist_1sigma = sp.powerLawSlopeDistribution(rArr, nArr_)

                    fig, axes = plt.subplots(m,1,figsize=(6, 6*(m-1)))
                    axes = np.atleast_1d(axes)
                    for j in range(1,m):
                        axes[m-1-j].plot(delayMat[:,0],delayMat[:,j],linestyle='-',color='k',lw=0.25,marker="None",zorder=1)
                        im = axes[m-1-j].scatter(delayMat[:,0],delayMat[:,j],c=params_dist[:,0],s=10,cmap="magma",linewidths=0,alpha=0.9,zorder=2)
                        
                        divider = make_axes_locatable(axes[m-1-j])
                        cax = divider.append_axes("right", size="5%", pad=0.05)
                        cb = plt.colorbar(im, cax=cax)
                        cb.set_label(label=r"Power law slope fit to $n(x_i)$ vs. $r$",fontsize=14)
                        
                        axes[m-1-j].set_ylabel(r"$x_i + {0}\tau$".format(j),fontsize=14)
                    
                    axes[m-2].set_xlabel(r"$x_i$",fontsize=14)
                    
                    im = axes[m-1].scatter(np.arange(0,len(delayMat[:,0])), delayMat[:,0], c=params_dist[:,0],s=10,cmap="magma")
                    divider = make_axes_locatable(axes[m-1])
                    cax = divider.append_axes("right", size="5%", pad=0.05)
                    cb = plt.colorbar(im, cax=cax)
                    cb.set_label(label=r"Power law slope fit to $n(x_i)$ vs. $r$",fontsize=14)
                    axes[m-1].set_ylabel(r"$x_i$",fontsize=14)
                    axes[m-1].set_xlabel("i", fontsize=14)
                    
                    n_t = len(delayMat[:,0])//bestTauIdx
                    
                    #for k in range(n_t+1):
                    #    axes[m-1].axvline(k*bestTauIdx, color='k', ls="-",lw=0.5)
                    
                    plt.subplots_adjust(hspace=0.2)
                    #plt.show()
                    plt.savefig("./{0}_m={1}_colormapped_C2.png".format(file.name.split(".txt")[0],m),bbox_inches="tight")
                    plt.close()

            """
            if noisy_sat_m is None:
                print("no saturation")
                noisy_mArr = np.arange(2, mMax+1)
            else:
                if noisy_sat_m + 1 <= mMax:
                    noisy_mArr = np.array((noisy_sat_m, noisy_sat_m + 1))
                elif noisy_sat_m <= mMax:
                    noisy_mArr = np.array((noisy_sat_m))
                else:
                    noisy_mArr = np.arange(2, mMax+1)
            """
            print(noisy_mArr)
            
            for m in noisy_mArr:
                print(m)
                if os.path.exists("./{0}_tau={1}_m={2}_C0.npy".format(file.name.split(".txt")[0],bestTauIdx,m)):
                    pass
                else:
                    C0_, C1_, C2_, nArr_ = sp.Cq(rArr=rArr, timeSeries=noisy_ts, tau = noisy_bestTauIdx, m = m)
                    np.save("./{0}_tau={1}_m={2}_noisy_C0.npy".format(file.name.split(".txt")[0],noisy_bestTauIdx,m),C0_)
                    np.save("./{0}_tau={1}_m={2}_noisy_C1.npy".format(file.name.split(".txt")[0],noisy_bestTauIdx,m),C1_)
                    np.save("./{0}_tau={1}_m={2}_noisy_C2.npy".format(file.name.split(".txt")[0],noisy_bestTauIdx,m),C2_)
                    np.save("./{0}_tau={1}_m={2}_noisy_nArr.npy".format(file.name.split(".txt")[0],noisy_bestTauIdx,m),nArr_)

                    delayMat = sp.delayMatrix(noisy_ts, noisy_bestTauIdx, m)
                    
                    N = np.shape(nArr_)[0]
                    medians = np.percentile(nArr_, 50, axis=0)
                        
                    # exclude values of r where the median of n(r) is <= 10./N . Cutoff is a little arbitrary but the idea is that these points don't have enough neighbors.
                    enoughNeighborsIdxs = np.arange(len(rArr))[medians > 10./N]
                    firstGood = enoughNeighborsIdxs[0]
                    
                    # exclude values of r where any n(r) are NaN. The time series is not long enough to populate all the neighbors of the points.
                    anyNans = [np.any(~np.isfinite(nArr_[:,i])) for i in range(len(rArr))]
                    anyNans = np.array(anyNans)
                    nansIdxs = np.arange(len(rArr))[anyNans]
                    lastGood = nansIdxs[0]
                    
                    params_C2, params_unc_C2 = sp.fitLinearRegime(rArr, nArr_, C2_)
                    
                    params_dist, params_dist_1sigma = sp.powerLawSlopeDistribution(rArr, nArr_)

                    fig, axes = plt.subplots(m,1,figsize=(6, 6*(m-1)))
                    axes = np.atleast_1d(axes)
                    for j in range(1,m):
                        axes[m-1-j].plot(delayMat[:,0],delayMat[:,j],linestyle='-',color='k',lw=0.25,marker="None",zorder=1)
                        im = axes[m-1-j].scatter(delayMat[:,0],delayMat[:,j],c=params_dist[:,0],s=10,cmap="magma",linewidths=0,alpha=0.9,zorder=2)
                        
                        divider = make_axes_locatable(axes[m-1-j])
                        cax = divider.append_axes("right", size="5%", pad=0.05)
                        cb = plt.colorbar(im, cax=cax)
                        cb.set_label(label=r"Power law slope fit to $n(x_i)$ vs. $r$",fontsize=14)
                        
                        axes[m-1-j].set_ylabel(r"$x_i + {0}\tau$".format(j),fontsize=14)
                    
                    axes[m-2].set_xlabel(r"$x_i$",fontsize=14)
                    
                    im = axes[m-1].scatter(np.arange(0,len(delayMat[:,0])), delayMat[:,0], c=params_dist[:,0],s=10,cmap="magma")
                    divider = make_axes_locatable(axes[m-1])
                    cax = divider.append_axes("right", size="5%", pad=0.05)
                    cb = plt.colorbar(im, cax=cax)
                    cb.set_label(label=r"Power law slope fit to $n(x_i)$ vs. $r$",fontsize=14)
                    axes[m-1].set_ylabel(r"$x_i$",fontsize=14)
                    axes[m-1].set_xlabel("i", fontsize=14)
                    
                    n_t = len(delayMat[:,0])//noisy_bestTauIdx
                    
                    #for k in range(n_t+1):
                    #    axes[m-1].axvline(k*noisy_bestTauIdx, color='k', ls="-",lw=0.5)
                    
                    plt.subplots_adjust(hspace=0.2)
                    #plt.show()
                    plt.savefig("./{0}_m={1}_colormapped_C2_noisy.png".format(file.name.split(".txt")[0],m),bbox_inches="tight")
                    plt.close()
            '''
            

0_gaussianNoise_speculoos_perfect.txt
perfect tauIdx is 119, noisy tauIdx is 119
sat_m is None
noisy_sat_m is None
original time series length is 44119

Additional non-data files were created
	Nonsilent mode chosen, displaying additional content:

File outFile.stat contains:
Center points treated so far= 3928
Maximal epsilon in the moment= 1.000000e+00


Additional non-data files were created
	Nonsilent mode chosen, displaying additional content:

File outFile.stat contains:
Center points treated so far= 3928
Maximal epsilon in the moment= 1.000000e+00

first d2 call took 2.63 seconds
noisy d2 call took 2.6 seconds
non-noisy params:
119
None
(1000, 2)
(691, 2)
(701, 2)
noisy params:
119
None
(1000, 2)
(691, 2)
(701, 2)
1_gaussianProcess_speculoos_perfect.txt
perfect tauIdx is 85, noisy tauIdx is 81
sat_m is None
noisy_sat_m is None
original time series length is 44119

Additional non-data files were created
	Nonsilent mode chosen, displaying additional content:

File outFile.stat conta

# Test scikit-learn KD tree implementation for faster NN searching

In [None]:
from sklearn.neighbors import KDTree, BallTree
from scipy.spatial import KDTree as scipyKDTree

In [6]:
#for folder in folderpaths[1:]:
for folder in folderpaths:
#for folder in ['6_lorenz','blah']:
    files = os.scandir("../data/{0}/".format(folder))
    for file in files:
        if ".txt" in file.name and "perfect" in file.name and "speculoos" in file.name:
            print(file.name)
            #counter += 1
            #print(file.path)
            #test = np.genfromtxt(file.path)
            #print(np.shape(test))
            #fig, ax = plt.subplots(1,1,figsize=(4,3))
            #ax.plot(test[:,0], test[:,1],'k-')
            #plt.show()
            
            data = np.genfromtxt(file.path)
            
            if "0" in file.name:
                ts = data[:,2]
                noisy_ts = data[:,2]
            else:
                ts = data[:,1]
                noisy_ts = data[:,2]
                
            if "lorenz" in file.name and "_z_" not in file.name:
                QPmethod = "localMaxSep"
            elif "0" in file.name:
                QPmethod = "localMaxSep"
            else:
                QPmethod = "power"
            
            # choose time delay (Fraser & Swinney 1986)
            if "0" in file.name:
                mutInfo, bestTauIdx = sp.FS86(time = data[:,0], timeSeries = ts, QPmethod=QPmethod, method="global_min", plot=False)
                noisy_mutInfo, noisy_bestTauIdx = sp.FS86(time = data[:,0], timeSeries = noisy_ts, QPmethod=QPmethod, method="global_min", plot=False)
            else:
                mutInfo, bestTauIdx = sp.FS86(time = data[:,0], timeSeries = ts, QPmethod=QPmethod, method="first_or_second_local_min", plot=False)
                noisy_mutInfo, noisy_bestTauIdx = sp.FS86(time = data[:,0], timeSeries = noisy_ts, QPmethod=QPmethod, method="first_or_second_local_min", plot=False)
            
            #bestTauIdx=200
            #noisy_bestTauIdx=119
            #print("perfect tauIdx is {0}, noisy tauIdx is {1}".format(bestTauIdx, noisy_bestTauIdx))

            '''
            # choose embedding dimension (Cao 1997)
            E1, E2, sat_m = sp.cao97(timeSeries=ts, tau=bestTauIdx, mMax=8)
            noisy_E1, noisy_E2, noisy_sat_m = sp.cao97(timeSeries=noisy_ts, tau=noisy_bestTauIdx, mMax=8)
            '''
            '''
            sat_m = None
            noisy_sat_m=None
            print("sat_m is {0}".format(sat_m))
            print("noisy_sat_m is {0}".format(noisy_sat_m))
            '''
            sat_m = 3
            #noisy_sat_m = 3
            
            print("original time series length is {0}".format(len(ts)))
            #ts = ts[:20000]
            #noisy_ts = noisy_ts[:5000]

            
            # scale ts to be between 0 and 1
            ts = (ts - np.min(ts))/np.ptp(ts)
            #noisy_ts = (noisy_ts - np.min(noisy_ts))/np.ptp(noisy_ts)

            print("delay matrix shape is {0}".format(np.shape(sp.delayMatrix(ts,tau=bestTauIdx,m=sat_m))))
            start_skl = time.time()
            skl_KDTree = KDTree(sp.delayMatrix(ts, tau=bestTauIdx, m=sat_m), leaf_size=10, metric='euclidean')
            
            mid_skl = time.time()
            skl_KDTree_pairs = skl_KDTree.query_radius(sp.delayMatrix(ts, tau=bestTauIdx, m=sat_m), r=0.1, count_only=True) #return_distance=True)

            # number of neighbor points within r=0.1 found, eliminating self-counting and double-counting
            #print(int((np.sum(skl_KDTree_pairs)-np.shape(sp.delayMatrix(ts,tau=bestTauIdx,m=sat_m))[0])/2))
            
            skl = time.time()
            print("time to construct scikit-learn KDTree = {0} seconds".format(mid_skl-start_skl))
            print("time to find all pairs within distance 0.1 of each other is {0} seconds".format(skl-mid_skl))
            
            start_ball = time.time()
            skl_BallTree = BallTree(sp.delayMatrix(ts, tau=bestTauIdx, m=sat_m), leaf_size=10, metric='euclidean')
            
            mid_ball = time.time()
            skl_BallTree_pairs = skl_BallTree.query_radius(sp.delayMatrix(ts, tau=bestTauIdx, m=sat_m), r=0.1, count_only=True)#return_distance=True)
            # number of neighbor points within r=0.1 found, eliminating self-counting and double-counting
            #print(int((np.sum(skl_BallTree_pairs)-np.shape(sp.delayMatrix(ts,tau=bestTauIdx,m=sat_m))[0])/2))
            
            ball = time.time()
            print("time to construct scikit-learn BallTree = {0} seconds".format(mid_ball - start_ball))
            print("time to find all pairs within distance 0.1 of each other is {0} seconds".format(ball - mid_ball))

            start_scp = time.time()
            scp_KDTree = scipyKDTree(sp.delayMatrix(ts, tau=bestTauIdx, m=sat_m), leafsize=10, compact_nodes=True, balanced_tree=True)
            mid_scp = time.time()
            scp_KDTree_pairs = scp_KDTree.query_pairs(r=0.1, p=2.0, output_type='ndarray')
            #print(type(scp_KDTree_pairs))
            #print(np.shape(scp_KDTree_pairs)[0])
            #print(scp_KDTree_pairs)
            scp = time.time()
            print("time to construct scipy.spatial KDTree = {0} seconds".format(mid_scp - start_scp))
            print("time to find all pairs within distance 0.1 of each other is {0} seconds".format(scp - mid_scp))
            

            

5_transformed_rossler_z_speculoos_perfect.txt
original time series length is 44119
delay matrix shape is (43957, 3)
time to construct scikit-learn KDTree = 0.02316904067993164 seconds
time to find all pairs within distance 0.1 of each other is 2.1808009147644043 seconds
time to construct scikit-learn BallTree = 0.022363901138305664 seconds
time to find all pairs within distance 0.1 of each other is 0.4623420238494873 seconds
time to construct scipy.spatial KDTree = 0.014407157897949219 seconds
time to find all pairs within distance 0.1 of each other is 2.1941909790039062 seconds
5_transformed_rossler_x_speculoos_perfect.txt
original time series length is 44119
delay matrix shape is (43973, 3)
time to construct scikit-learn KDTree = 0.025532007217407227 seconds
time to find all pairs within distance 0.1 of each other is 3.2650721073150635 seconds
time to construct scikit-learn BallTree = 0.017796039581298828 seconds
time to find all pairs within distance 0.1 of each other is 0.642973899

In [87]:
print(KDTree.valid_metrics)

['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity']


In [55]:
print(BallTree.valid_metrics)

['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', 'mahalanobis', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath', 'haversine', 'pyfunc']
