## Author: Alexandru Paul Tabacaru  ;  alextabac@gmail.com
## MIT License

In [116]:
import pandas as pd
import numpy as np
from math import floor
import scipy.stats as stats
import os
import json
import random
from sklearn.model_selection import train_test_split
from pandas_profiling import ProfileReport
from datetime import datetime
from collections import deque
from MASS_V4 import MASS_V4
# from importlib import reload

## Paths and Data Files

In [2]:
os.getcwd()

'C:\\ALEX\\BGU\\TDA\\PROJECT\\TSAD_project'

In [3]:
data_path = "..\\..\\sensors_data"
data_folder = os.path.join(os.getcwd(), data_path)
files1 = os.listdir(data_folder)
files2 = [os.path.join(data_folder, f) for f in files1 if f.endswith("_trim.tab")]
files = [f for f in files2 if os.path.isfile(f)]
delimiter = "\t"

In [4]:
files

['C:\\ALEX\\BGU\\TDA\\PROJECT\\TSAD_project\\..\\..\\sensors_data\\fdc_F28_PROD_XEUS_trim.tab']

## Equipment digits explained:
### first digit is the XEUS NOD number:  1, 2, 3, or 4 otherwise
### second digit is the SITE within the NOD: 0, 1, 2.
### third digit is the entity number: 0 to 6 at most
### fourth digit is the side of the entity:  0 or 1

In [5]:
class Data_Preprocess:
    """
    Raw data is aggregated over 1hr time window, and each 1hr will be represented by mean/std-dev.
    """
    
    def __init__(self):
        self.df = None
        self.dfs = []
        
    def load_data(self, filename, delimiter):
        df = pd.read_csv(filename, delimiter=delimiter)
        print(f"File loaded with {len(df)} rows.")
        all_cols = True
        needed_cols = ['RUN_START_DATE', 'Equip', 'Feature', 'PREP_VALUE']
        cols = list(df.columns)
        miss_cols = []
        for col in needed_cols:
            if col not in cols:
                all_cols = False
                miss_cols.append(col)
        if len(miss_cols) > 0:
            print("Cound not find the following columns: " + ",".join(miss_cols))
        if all_cols:
            df['RUN_START_DATE'] = pd.to_datetime(df['RUN_START_DATE'])
            df = df.sort_values('RUN_START_DATE')
            df.insert(0, 'time', df['RUN_START_DATE'].dt.floor('H'))
            df['mean'] = df.groupby(['Equip', 'Feature'], as_index=False)['PREP_VALUE'].transform('mean')
            df['std'] = df.groupby(['Equip', 'Feature'], as_index=False)['PREP_VALUE'].transform('std')
            df['norm'] = (df['PREP_VALUE'] - df['mean']) / df['std']
            df = df.drop(['PREP_VALUE', 'mean', 'std'], axis=1)
            df = df.groupby(['time', 'RUN_START_WW', 'Equip', 'Feature'], as_index=False)['norm'].\
             agg(['mean', 'std']).reset_index().fillna(0)
            df = df.melt(id_vars=['time', 'RUN_START_WW', 'Equip', 'Feature'], value_vars=['mean', 'std'])
            df['series'] = df['Feature'] + "_" + df['variable']
            df = df.drop(['Feature', 'variable'], axis=1)
            self.df = df
    
    def prepare_series(self):
        self.df['key'] = self.df['Equip'] + "_" + self.df['series']
        uniq_keys = self.df['key'].unique()
        dfs = []
        for ukey in uniq_keys:
            print(f"Preparing key series {ukey} ...")
            ddf = self.df[self.df['key'] == ukey]
            ddf = ddf.sort_values(by=['series', 'Equip', 'time'], ascending=[True, True, True])
            ddf = ddf.reset_index(drop=True)
            ddf_list = []
            self.recur_split_series_no_multi_clusters(ukey, ddf, ddf_list, ave_size=20, threshold=1.8)
            for ddfl in ddf_list:
                ddfl = ddfl.sort_values(by=['series', 'Equip', 'time'], ascending=[True, True, True])
                ddfl = ddfl.reset_index(drop=True)
                m = np.mean(ddfl['value'])
                s = np.std(ddfl['value'])
                ddfl['norm'] = (ddfl['value'] - m) / s
                ddfl = ddfl.drop(['value'], axis=1)
                dfs.append(ddfl)
        self.dfs = dfs
    
    def recur_split_series_no_multi_clusters(self, ukey, df, ddf_list, ave_size=10, threshold=1.5):
        delta, indx = self.get_series_split_max_distance(df, ave_size=ave_size)
        if delta > threshold:
            print(f"Found two clusters or more and need split, in dataset {ukey}, indx {indx}, delta {delta}.")
            self.recur_split_series_no_multi_clusters(ukey, df[indx:], ddf_list, ave_size, threshold)
            self.recur_split_series_no_multi_clusters(ukey, df[:indx], ddf_list, ave_size, threshold)
        ddf_list.append(df)
        
    def get_series_split_max_distance(self, df, ave_size=10):
        delta = 0
        ki = 0
        for i in range(ave_size, len(df) - ave_size):
            k1 = np.mean(df[(i-ave_size+1):i]['value'].values)
            k2 = np.mean(df[i:(i+ave_size-1)]['value'].values)
            d = abs(k1 - k2)
            if d > delta:
                delta = d
                ki = i
        return delta, ki
    
    

In [6]:
data_obj = Data_Preprocess()
data_obj.load_data(files[0], delimiter)

File loaded with 11568827 rows.


In [7]:
len(data_obj.df)

892868

In [8]:
data_obj.prepare_series()

Preparing key series Equip2000_Feature2_mean ...
Preparing key series Equip2000_Feature5_mean ...
Preparing key series Equip2000_Feature6_mean ...
Preparing key series Equip2000_Feature7_mean ...
Preparing key series Equip2000_Feature8_mean ...
Preparing key series Equip2001_Feature2_mean ...
Preparing key series Equip2001_Feature5_mean ...
Preparing key series Equip2001_Feature6_mean ...
Preparing key series Equip2001_Feature7_mean ...
Preparing key series Equip2001_Feature8_mean ...
Preparing key series Equip2010_Feature0_mean ...
Found two clusters or more and need split, in dataset Equip2010_Feature0_mean, indx 2909, delta 3.273769076852412.
Found two clusters or more and need split, in dataset Equip2010_Feature0_mean, indx 2888, delta 2.4586883267019286.
Preparing key series Equip2010_Feature1_mean ...
Preparing key series Equip2010_Feature2_mean ...
Preparing key series Equip2010_Feature3_mean ...
Found two clusters or more and need split, in dataset Equip2010_Feature3_mean, indx

Preparing key series Equip2001_Feature9_mean ...
Preparing key series Equip2020_Feature0_mean ...
Found two clusters or more and need split, in dataset Equip2020_Feature0_mean, indx 377, delta 4.044147563764092.
Preparing key series Equip2020_Feature1_mean ...
Found two clusters or more and need split, in dataset Equip2020_Feature1_mean, indx 357, delta 6.437874497983519.
Found two clusters or more and need split, in dataset Equip2020_Feature1_mean, indx 20, delta 6.35043766542879.
Preparing key series Equip2020_Feature2_mean ...
Preparing key series Equip2020_Feature3_mean ...
Found two clusters or more and need split, in dataset Equip2020_Feature3_mean, indx 357, delta 6.063637492193319.
Found two clusters or more and need split, in dataset Equip2020_Feature3_mean, indx 37, delta 5.102636636922242.
Preparing key series Equip2020_Feature4_mean ...
Preparing key series Equip2020_Feature5_mean ...
Preparing key series Equip2020_Feature6_mean ...
Preparing key series Equip2020_Feature7_m

Preparing key series Equip2031_Feature4_std ...
Preparing key series Equip2031_Feature9_std ...
Preparing key series Equip2050_Feature0_std ...
Preparing key series Equip2050_Feature1_std ...
Preparing key series Equip2050_Feature2_std ...
Preparing key series Equip2050_Feature3_std ...
Preparing key series Equip2050_Feature4_std ...
Preparing key series Equip2050_Feature5_std ...
Preparing key series Equip2050_Feature6_std ...
Preparing key series Equip2050_Feature7_std ...
Preparing key series Equip2050_Feature8_std ...
Preparing key series Equip2050_Feature9_std ...
Preparing key series Equip2051_Feature0_std ...
Preparing key series Equip2051_Feature1_std ...
Preparing key series Equip2051_Feature2_std ...
Preparing key series Equip2051_Feature3_std ...
Preparing key series Equip2051_Feature4_std ...
Preparing key series Equip2051_Feature5_std ...
Preparing key series Equip2051_Feature6_std ...
Preparing key series Equip2051_Feature7_std ...
Preparing key series Equip2051_Feature8_

In [9]:
len(data_obj.dfs)

420

In [10]:
type(data_obj.dfs)

list

In [11]:
N = 0
for df in data_obj.dfs:
    N += len(df)
print(N)

1071834


In [12]:
np.mean(data_obj.dfs[5])

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


RUN_START_WW    2.022374e+05
norm            5.982616e-17
dtype: float64

In [13]:
np.std(data_obj.dfs[5])

  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


time            43 days 14:42:36.826547264
RUN_START_WW                      6.238579
norm                                   1.0
dtype: object

In [14]:
data_obj.dfs[5].head(5)

Unnamed: 0,time,RUN_START_WW,Equip,series,key,norm
0,2022-06-26 23:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,2.946667
1,2022-06-27 00:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,1.225551
2,2022-06-27 01:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,0.926174
3,2022-06-27 02:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,-0.380895
4,2022-06-27 03:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,-0.941805


In [15]:
for i in range(20):
    print(data_obj.dfs[i].loc[0, 'key'])

Equip2000_Feature2_mean
Equip2000_Feature5_mean
Equip2000_Feature6_mean
Equip2000_Feature7_mean
Equip2000_Feature8_mean
Equip2001_Feature2_mean
Equip2001_Feature5_mean
Equip2001_Feature6_mean
Equip2001_Feature7_mean
Equip2001_Feature8_mean
Equip2010_Feature0_mean
Equip2010_Feature0_mean
Equip2010_Feature0_mean
Equip2010_Feature0_mean
Equip2010_Feature0_mean
Equip2010_Feature1_mean
Equip2010_Feature2_mean
Equip2010_Feature3_mean
Equip2010_Feature3_mean
Equip2010_Feature3_mean


In [16]:
data_obj.dfs[5]['norm'].mean()

5.982616181790733e-17

In [17]:
data_obj.dfs[5]['norm']

0       2.946667
1       1.225551
2       0.926174
3      -0.380895
4      -0.941805
          ...   
3417   -0.586700
3418    0.526579
3419    0.542681
3420   -0.211703
3421    1.507836
Name: norm, Length: 3422, dtype: float64

In [18]:
data_obj.dfs[5]['norm'] = stats.zscore(data_obj.dfs[5]['norm'])

In [19]:
data_obj.dfs[5]['norm']

0       2.946667
1       1.225551
2       0.926174
3      -0.380895
4      -0.941805
          ...   
3417   -0.586700
3418    0.526579
3419    0.542681
3420   -0.211703
3421    1.507836
Name: norm, Length: 3422, dtype: float64

In [21]:
data_obj.dfs[5]

Unnamed: 0,time,RUN_START_WW,Equip,series,key,norm
0,2022-06-26 23:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,2.946667
1,2022-06-27 00:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,1.225551
2,2022-06-27 01:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,0.926174
3,2022-06-27 02:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,-0.380895
4,2022-06-27 03:00:00,202227,Equip2001,Feature2_mean,Equip2001_Feature2_mean,-0.941805
...,...,...,...,...,...,...
3417,2022-11-23 22:00:00,202248,Equip2001,Feature2_mean,Equip2001_Feature2_mean,-0.586700
3418,2022-11-23 23:00:00,202248,Equip2001,Feature2_mean,Equip2001_Feature2_mean,0.526579
3419,2022-11-24 00:00:00,202248,Equip2001,Feature2_mean,Equip2001_Feature2_mean,0.542681
3420,2022-11-24 01:00:00,202248,Equip2001,Feature2_mean,Equip2001_Feature2_mean,-0.211703


In [284]:
w = 24
Q = data_obj.dfs[5][-w:]['norm'].values
T = data_obj.dfs[5]['norm'].values
k = 40

In [285]:
Q

array([-0.25400299,  2.01031419,  0.86405611,  0.60696626,  0.47424785,
       -0.48535571,  0.6448176 ,  1.36591211,  0.19454703, -0.97723858,
        1.68730968,  0.48086607,  1.40361402, -1.6479097 ,  0.10702605,
        0.56809276,  0.57606835, -0.52326287, -0.52523995, -0.58669976,
        0.52657869,  0.54268096, -0.21170283,  1.50783637])

In [286]:
print(len(T))
print(len(Q))

3422
24


In [429]:
import MASS_V4
reload(MASS_V4)
from MASS_V4 import MASS_V4

In [430]:
mass = MASS_V4()

In [431]:
floor((3*w+1)/2)

36

In [432]:
batchs = mass.get_batch_size(floor((3*w+1)/2), 10)

In [433]:
batchs

24

In [434]:
x_pad, y_pad, start_i = mass.dct_padding(T, Q)

In [435]:
print(len(x_pad))
print(len(y_pad))
print(start_i)

5133
5133
12


In [436]:
y_pad[1704:1720]

array([-0.48535571,  0.6448176 ,  1.36591211,  0.19454703, -0.97723858,
        1.68730968,  0.48086607,  1.40361402, -1.6479097 ,  0.10702605,
        0.56809276,  0.57606835, -0.52326287, -0.52523995, -0.58669976,
        0.52657869])

In [437]:
dd = mass.dct_dot_product(T[100: 100+batchs], Q)

len(x_pad)=36 ; len(y_pad)=36 ; si=12
;len(dct_product)=36; max=1.6020569930772304
[-1.22379674  1.60205699 -0.013026   -0.66039187  0.0921591  -0.17337247
  0.56751292  0.19611535  0.73200489 -0.05648803 -0.06056902 -0.79860353
  0.05250123  0.31507301  0.35705535 -0.12132279 -1.92992712 -0.11199922
 -0.3148225  -0.68145271 -0.13849676 -0.0383007  -0.16065825  0.74271807
  0.18200513 -0.68163566 -0.8803833   0.05752488  0.50378482  0.03230446
 -0.01349542  0.41509915 -1.97943537  0.07523414 -1.19860984  1.11006968]
;len(dot_p)=37; max=1.7232598424167624


In [438]:
len(dd)

1

In [439]:
dd

array([6.62977259])

In [440]:
mws = mass.movstd(T[100: 100+batchs], len(Q))

In [441]:
mws

array([1.15171314])

In [442]:
sim = mass.get_similarities(T, Q, k)

len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=2.2920164520097375
[-2.15236516e-16  1.29390403e-01 -7.34155323e-01 -7.88689752e-01
 -3.89740425e-01 -1.70953333e-01  6.34891372e-02  1.60200342e-01
  5.28501219e-04  4.32912163e-01 -7.33002865e-02 -1.35317281e+00
 -6.39795981e-01 -2.38259236e-01  9.70805228e-02  2.98474319e-01
 -1.21605576e+00 -9.85675408e-02  2.91109582e-01 -6.81773469e-01
 -5.02138896e-04  6.67319620e-01 -3.45398177e-01 -1.47143895e-01
  2.64665285e-01  5.45098195e-01 -2.41171442e-01  7.97461936e-03
 -9.85017465e-01  2.29201645e+00 -9.52236202e-03 -1.93818484e-02
 -3.03006493e-01 -2.69401780e-02  1.07481587e+00  7.44679105e-01
 -1.27034869e-01 -8.41789858e-01  7.53108406e-01]
;len(dot_p)=40; max=1.7031060207682318
max dot_p=1.186876726266879 ; min dot_p=-1.3372093811021863
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.7079412439840544
[ 5.68787454e-17  2.01096582e-01 -5.83477731e-01  6.62914745e-02
  8.39983310e-02 -4.27921009e-01 -8

;len(dot_p)=40; max=1.397504608445994
max dot_p=5.340160361706114 ; min dot_p=-2.081924700765437
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=2.2657107778183123
[-1.11511625e-16 -9.37947452e-02 -1.04206456e-02 -1.95288262e-01
 -4.44843140e-02 -6.23587382e-02  2.19831743e-02  1.08905758e-01
  4.88136659e-01 -1.22652158e-02 -4.11648324e-02 -2.37363654e-01
  1.82904627e-01  2.40876909e-02 -2.64910384e-03  7.88004176e-01
 -5.31236679e-01 -3.95279018e-01 -9.09241798e-01 -3.97710863e-01
  6.75403422e-03  1.73815113e-01 -1.93350552e-01  5.81381633e-01
 -4.01620753e-01 -2.63968207e-01 -5.14867908e-01  2.69125589e-01
  2.26571078e+00 -3.44866294e+00  1.08216935e-02 -4.84511071e-02
 -1.74597248e-01  4.22662587e-02 -1.70036987e-01  2.19797773e-01
 -1.76096698e-01 -1.99792625e+00  1.39434850e-01]
;len(dot_p)=40; max=1.8963980262649764
max dot_p=6.192065042193354 ; min dot_p=-11.902385546088917
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=2.5975363648026213
[-9

;len(dot_p)=40; max=1.9761594780510943
max dot_p=4.613995418571062 ; min dot_p=0.09845692553844997
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.4106809832799152
[-4.83491587e-16 -8.14780873e-01  1.41068098e+00 -1.14730997e+00
 -8.42235641e-01  2.10577290e-01  1.09295224e+00 -3.72831751e-01
  3.72883657e-02 -6.55180091e-02 -2.77027979e-02 -1.81755904e-01
  1.90899222e-01  3.07352108e-02  2.39289782e-01 -1.51295102e+00
  1.40823096e+00 -3.80229475e-01  5.08547708e-01 -3.46321300e+00
  4.01715960e-02  1.50001670e-02  1.14721083e-01  1.70594384e-02
 -7.35543799e-03  3.16442073e-02 -6.48376142e-02  1.09839575e-01
 -3.43142581e-01 -8.78998139e-01  1.51733692e-02 -7.50494549e-03
 -1.74036098e-01  1.06486415e-03  6.84604971e-01 -6.84035318e-01
 -3.33080418e-02  1.05194989e+00 -1.53492963e+00]
;len(dot_p)=40; max=1.8357331363667029
max dot_p=7.963549064267058 ; min dot_p=-3.0848008590180704
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=2.0050770357262584
[

len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.6310319641178876
[-5.55469109e-17 -3.14684665e-02 -5.03185982e-02  9.16268064e-02
  3.38066334e-01 -6.66820919e-03  2.97874286e-01  1.36300283e-01
  1.17156898e-01  2.97308820e-01 -2.12136286e-01 -2.11707753e-02
  1.09118616e-01  3.71215577e-02  9.07604271e-02 -3.75250253e-01
  1.56974782e+00 -5.50100242e-01 -5.07405346e-01  8.92816025e-01
 -9.95176077e-03  4.00055486e-01 -1.43067786e-01  1.35986589e-01
 -2.73954330e-01 -9.81267736e-02 -3.53322884e-01  9.42392275e-02
 -3.46736280e-01  1.63103196e+00 -1.92754655e-02 -7.88966453e-02
 -4.58046842e-01  2.34653763e-02  8.51860301e-01 -1.17641071e+00
  1.93003017e-02 -4.32026117e-01  6.85406209e-01]
;len(dot_p)=40; max=0.7646448336319621
max dot_p=3.3660332463524925 ; min dot_p=-3.4526038775089365
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.3254307594553871
[-1.00987732e-19 -1.62755317e-02 -2.47221305e-01  3.39328626e-01
  7.11552600e-02  9.97377420e-02 -

;len(dot_p)=40; max=1.1159975106294184
max dot_p=1.4894873701281635 ; min dot_p=-1.403573493252336
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=0.5574037731807601
[-3.30216830e-17  5.86402951e-02 -1.01589513e-01 -5.82675679e-01
 -5.75049488e-01 -2.16586183e-01 -1.42260495e-01 -4.74697201e-01
 -9.37699132e-01  5.49772936e-02 -1.27044163e-01  5.57403773e-01
  1.76191358e-01 -8.12028949e-03  2.18374959e-02 -6.62547639e-01
 -6.07912008e-01  2.00817309e-01 -8.72740114e-02 -2.26205889e+00
  3.84674867e-02  5.28393705e-02 -7.63021897e-02  2.86081588e-01
  2.82669470e-01  1.57478198e-01  4.60045417e-01 -3.42262001e-02
 -1.34448249e+00  2.27444210e-02 -4.43769520e-03 -4.54223298e-02
  2.25082963e-02  3.53708052e-02 -8.97090454e-01  4.59622051e-01
 -5.18895408e-02 -5.19448303e-01 -4.57328861e-01]
;len(dot_p)=40; max=1.1252559609897528
max dot_p=5.41661631393311 ; min dot_p=-1.3871952000014214
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.6421265273256282
[ 

;len(dct_product)=39; max=1.798675127876948
[ 2.47359743e-16  1.96077208e-01  2.52594345e-01  4.32853688e-01
  6.71743907e-01 -1.21575578e-01  4.63889682e-01 -2.25543398e-01
  3.77993924e-01 -2.54102455e-01 -9.95832174e-02  8.51206632e-01
  6.20649054e-01  1.55445984e-01  3.65024555e-02 -1.28578733e-01
 -3.51850667e-01 -2.61410815e-01 -2.83775003e+00  1.79867513e+00
  3.67876150e-03 -5.80111887e-01  2.28362304e-01  3.33602040e-01
 -1.00915918e-01  1.00658353e-01  2.14209544e-01 -2.08960327e-01
  6.02056996e-01  1.41521346e+00 -4.17057402e-02 -6.39945176e-02
 -2.95342921e-02  8.42798919e-02 -8.59085111e-01 -7.72027492e-01
 -1.75711317e-02  5.23076493e-01 -1.76683043e+00]
;len(dot_p)=40; max=1.3771395946865614
max dot_p=2.1315764642772748 ; min dot_p=-4.176009729089836
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=2.7188924492519684
[ 2.41474552e-16  2.97843032e-01 -9.69537628e-02  5.05709132e-01
  4.88415357e-01  2.94913566e-01  5.37982773e-01  2.85096204e-01
  4.14323

;len(dot_p)=40; max=1.4635008328940864
max dot_p=8.627882261217135 ; min dot_p=-5.533635114651245
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.5247676331196882
[-4.32711009e-16 -6.30670783e-01  2.01366302e-01 -2.07238765e-01
 -4.50597007e-01  2.54058876e-01  5.22329121e-01  5.28069570e-02
  8.89825577e-01 -2.16738787e-01  3.52519995e-03  1.53662721e-01
 -1.74000185e-01 -7.38904015e-02  3.15152043e-02 -5.99757124e-01
 -6.78199934e-01  6.01858602e-01  9.38181837e-01  1.52476763e+00
 -4.71165211e-02 -4.88890553e-01 -4.72178307e-01  9.40772795e-01
 -6.93519152e-02  1.06116012e-01 -9.47627444e-02  1.15786962e-01
  2.30879341e-01 -5.21674615e-02 -3.93565715e-02 -1.38630954e-01
  2.20317667e-01 -1.69123759e-02 -1.18944673e+00  1.24676139e+00
  4.27062563e-02 -8.07034718e-01  1.23749728e+00]
;len(dot_p)=40; max=1.0743266842823787
max dot_p=8.564303708227817 ; min dot_p=-1.0788701240634957
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.4523013339750022
[-

len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.6264085822606993
[ 1.66418052e-16 -1.01441894e-01  5.99261045e-01  8.52504115e-01
  8.84857518e-01  3.82124089e-02  1.76103514e-01 -5.33380554e-02
 -2.93791438e-01  3.02037865e-02 -3.21943226e-02  3.04411312e-01
 -2.05272868e-02 -1.78681092e-02  1.13132606e-01 -1.97652731e+00
  1.62640858e+00 -1.00707709e-01 -1.08527356e+00  1.28965838e+00
 -6.75163243e-03 -5.54367256e-01  1.90607078e-01  2.23464995e-01
 -9.65769351e-02  1.13096260e-01  8.94242616e-02 -1.41418827e-01
  4.27489839e-01  1.20270351e+00 -2.57090395e-02  5.32475474e-02
  4.72495484e-01 -2.54200356e-03 -8.10277902e-01  1.36565406e+00
 -1.24550340e-01  2.76192134e-01 -1.50200978e+00]
;len(dot_p)=40; max=1.8588056668747608
max dot_p=4.0658612046035865 ; min dot_p=-9.382294857551244
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.6095244243087108
[ 1.75738048e-17 -2.01190412e-01  3.49819764e-01  5.23646303e-01
  5.29912173e-02  5.29518005e-01  7

 -3.84007361e-02 -7.05615104e-01  6.63641647e-01]
;len(dot_p)=40; max=1.2035326943394833
max dot_p=2.5434584544739045 ; min dot_p=1.1722511683579904
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.066491766910399
[ 5.01478687e-17  1.75259328e-01  2.06639064e-01 -2.72301234e-01
  5.73777661e-01 -1.44430263e-01  1.06649177e+00  1.01303121e-02
  9.16978452e-01  6.36857573e-02 -5.88530973e-02 -1.20535391e+00
 -4.94849092e-02 -1.65098217e-01  1.95837021e-01 -1.47320476e+00
  4.68045053e-01 -2.15667009e-01  4.33245665e-01 -1.45799734e+00
 -2.39873925e-02 -1.39139033e+00  4.76792653e-02  7.76655314e-01
 -3.74421454e-01 -1.80161257e-01 -2.40406384e-01  1.21792675e-01
  6.11595472e-01 -2.70007916e-01 -1.01247743e-02 -5.06551365e-03
 -1.13669843e-01  6.60525154e-02 -2.41135447e-01 -1.25619154e+00
  4.34460071e-02  5.37725432e-01 -8.52948249e-01]
;len(dot_p)=40; max=1.4226220458733385
max dot_p=3.0992811888198353 ; min dot_p=-1.3248858526979455
len(x_pad)=39 ; len(y_pad)=39 ; si

;len(dot_p)=40; max=1.6837309041367796
max dot_p=0.25619374299505937 ; min dot_p=-2.816910116946701
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=2.414126490240269
[-8.01464522e-17 -4.48823408e-01  3.08888953e-01  9.24442550e-01
  2.74111892e-01  2.72175042e-01 -2.78612368e-01  1.17919549e-01
 -6.18955589e-02 -4.10859941e-02 -1.31825906e-01  9.97892168e-01
  6.05294915e-01  9.05443166e-02 -4.29716683e-01  6.27044420e-01
  2.41412649e+00 -6.13724495e-01 -1.28161002e+00  1.97887577e+00
 -1.50213177e-02  4.89805202e-02 -1.04882824e-01  1.68292041e-01
 -3.51142567e-02 -4.31928872e-02 -1.50618618e-01  1.49668731e-01
  1.09208403e+00 -2.15996410e+00 -2.10590967e-03 -3.42122462e-02
  2.09420613e-01  2.15915055e-03 -6.73546328e-01  9.65201419e-01
 -1.52699715e-01 -1.62570089e+00 -5.57560244e-02]
;len(dot_p)=40; max=2.7749696078198514
max dot_p=1.2114863163362313 ; min dot_p=-8.7221998903562
len(x_pad)=39 ; len(y_pad)=39 ; si=12
;len(dct_product)=39; max=1.7538383078688689
[-2

In [443]:
sim

array([6.77962893, 6.75472032, 7.11859668, ..., 7.35655745, 7.26703575,
       2.47504726])

In [444]:
len(sim)

3399

In [445]:
sim[3400:]

array([], dtype=float64)

In [446]:
sum(np.isnan(sim))

0

In [447]:
np.argmax(sim)

984

In [452]:
sim[984]

8.840290063242287

In [449]:
aa = np.array([1,2,3, np.nan])

In [450]:
aa

array([ 1.,  2.,  3., nan])

In [451]:
sum(np.isnan(aa))

1