In [1]:
from pathlib import Path
import itertools
import numpy as np
import matplotlib.pylab as plt
# %matplotlib tk
# %matplotlib nbagg
# %matplotlib inline
import h5py
import json
import pickle

## Loading files

In [2]:
filename_1 = Path("C:/LINHC/VersucheDBs/Trelleborg/2021-01-27-V24/2_Phase_A-D/20210127_Phase_A-D_LEM1.h5")
# filename_2 = Path("C:/LINHC/VersucheDBs/Trelleborg/2021-01-27-V24/2_Phase_A-D/20210127_Phase_A-D_LEM2.h5")

h51 = h5py.File(filename_1, "r")
# h52 = h5py.File(filename_2, "r")

print("ATTR:")
print(h51.attrs.keys())

print("Keys:")
print(h51.keys())

ATTR:
<KeysViewHDF5 ['name_lookup', 'name_rev_lookup']>
Keys:
<KeysViewHDF5 ['S11_LEM1', 'S21_P1_LEM1', 'S21_P2_LEM1', 'S22_LEM1', 'hub1', 'hub2', 'ip1k1', 'ip1k2', 'ip2k1', 'ip2k2', 'k1at', 'k1f', 'k1p', 'k1t', 'k2at', 'k2f', 'k2p', 'k2t', 'phi', 'pos1', 'pos2', 'shub1', 'shub2', 'sk1at', 'sk1f', 'sk1p', 'sk1t', 'sk2at', 'sk2f', 'sk2p', 'sk2t', 'spd1', 'spd2', 'sphi', 'spos1', 'spos2', 'sspd1', 'sspd2', 'stdf', 't', 'tdf']>


In [3]:
json.loads(h51.attrs["name_lookup"])

{'Ist-Ablauf-Zeit, ca.': 't',
 'Ist-MG1: Teller-Drehfrequenz [Hz]': 'tdf',
 'Soll-MG1: Teller-Drehfrequenz [Hz]': 'stdf',
 'Ist-MG2: Kammer1-Temp[°C]': 'k1t',
 'Soll-MG2: Kammer1-Temp[°C]': 'sk1t',
 'Ist-MG3: Kammer1-Druck[bar]': 'k1p',
 'Soll-MG3: Kammer1-Druck[bar]': 'sk1p',
 'Ist-MG4: Kammer2-Temp.[°C]': 'k2t',
 'Soll-MG4: Kammer2-Temp.[°C]': 'sk2t',
 'Ist-MG5: Kammer2-Druck[bar]': 'k2p',
 'Soll-MG5: Kammer2-Druck[bar]': 'sk2p',
 'Ist-MG6: Kammer1-Kraft[kN]': 'k1f',
 'Soll-MG6: Kammer1-Kraft[kN]': 'sk1f',
 'Ist-MG7: Kammer2-Kraft[kN]': 'k2f',
 'Soll-MG7: Kammer2-Kraft[kN]': 'sk2f',
 'Ist-MG8: Kammer1-Außentemp.[°C]': 'k1at',
 'Soll-MG8: Kammer1-Außentemp.[°C]': 'sk1at',
 'Ist-MG9: Kammer2-Außentemp.[°C]': 'k2at',
 'Soll-MG9: Kammer2-Außentemp.[°C]': 'sk2at',
 'Ist-MG10: Kammer1-Istpos.[mm]': 'pos1',
 'Soll-MG10: Kammer1-Istpos.[mm]': 'spos1',
 'Ist-MG11: Kammer2-Istpos.[mm]': 'pos2',
 'Soll-MG11: Kammer2-Istpos.[mm]': 'spos2',
 'Ist-MG12: Hub1[mm]': 'hub1',
 'Soll-MG12: Hub1[mm]': '

In [4]:
shift_opts_freqs = pickle.load( open( "shift_opts_freqs.pkl", "rb" ) ) # calculated by grid search

In [5]:
shift_opts_freqs.shape

(121, 46)

In [6]:
shift_opts_freqs[0].shape

(46,)

## Find the low speed areas

all low speed areas (low)

In [7]:
low = h51['spd1'][:]>-50
pos_low = h51['pos1'][low]

all low speed areas as ONE sequence

In [8]:
low = np.where(h51['spd1'][:]>-50)[0]
pos_low = h51['pos1'][low]

all low speed areas as seperate arrays

In [9]:
low_sep = np.where(np.diff(low)>10000)[0]
pos_low_sep = np.split(pos_low,low_sep+1)

cut out some points to make sure all points in low speed 
(if not some points in high speed area will showed up in s21, especially in later timestamps)

In [10]:
Ls = np.append(low[low_sep],low[-1])
# print("timestamps of Ls:")
# print (Ls)

Fs = np.append(low[0],low[low_sep+1])
# print("timestamps of Fs:")
# print (Fs)

# print("Durations of lsa:\n",Ls-Fs)

find the max values of positions in each low speed interval

In [11]:
# create a list of arrays max_sep, each array contains peak values of the corresponding low speed area.
max_sep = [] # (46,4)
for k in range(len(pos_low_sep)):
    # temporary sequency ts
    ts = pos_low_sep[k] 

    # find top 30 highest values
    max_20 = np.argsort(ts)[-30:][::-1]

    # filter the max value for each period
    temp =[max_20[0]]
    for i in max_20:
        if all([abs(j-i)>50 for j in temp]):        
            temp.append(i)
    max_sep.append(np.sort(temp))
    
# There should be 4 max values in each interval
assert all([len(max_sep[i])==4 for i in range(len(max_sep))]) 

## Data extraction

In [12]:
# # choose intervals with safe points (2000 points after the first peak value, 
# 2000 is to make sure no points in hsa are included, peak value is to make sure the same starting point for each interval)

features_setup = ['k1t',
                  #'k1p',
                  #'k1f',
                  'k1at',
                  #'phi',
                  'ip1k1',
                  'ip2k1']

# possible useful features
# hub1 constant
# tdf same as spd1?
ground_truths = ['pos1','spd1']

s21=[[] for _ in range(121)] # s21 for all frequendcies

pos1=[]
spd1=[]

k1t=[]
# k1p=[]
# k1f=[]
k1at=[]
# phi=[]
ip1k1=[]
ip2k1=[]

for i,Fc in enumerate(Fs):
    F = Fc + max_sep[i][0] 
    L = F + 2000
    for feature in features_setup:
        eval(feature).append(h51[feature][F:L])
    for gt in ground_truths:
        eval(gt).append(h51[gt][F:L])

for freq in range (121):
    FF = [F - shift for shift in shift_opts_freqs[freq]]
    LL = [L - shift for shift in shift_opts_freqs[freq]]
    for j in range(len(FF)):
        s21[freq].append(h51['S21_P1_LEM1'][FF[j]:LL[j], freq])

In [13]:
# calculated by grid search
shift_opts_freqs.shape

(121, 46)

In [14]:
shift_opts_freqs[0].shape

(46,)

In [15]:
np.array(pos1).shape

(46, 2000)

In [16]:
np.array(s21).shape

(121, 46, 2000)

In [17]:
shift_opts_freqs[20]

array([ 31,  37,  44,  51,  58,  64,  72,  79,  86,  93, 100, 106, 114,
       120, 128, 135, 143, 149, 157, 164, 171, 178, 185, 192, 199, 205,
       212, 219, 225, 232, 239, 247, 253, 260, 268, 275, 282, 288, 295,
       302, 309, 316, 322, 329, 337, 344])

In [18]:
# save the file as hdf5

file = r'C:\LINHC\VersucheDBs\Trelleborg\test\training.h5'

with h5py.File(file, 'w') as f:
    setup = f.create_group("setup")
    liview = f.create_group("liview")
    ref = f.create_group("ref")
    
    liview.create_dataset('s21',data=np.array(s21).reshape(121,-1))
    
    for feature in features_setup:
        tmp = np.array(eval(feature)).reshape(-1)
        setup.create_dataset(feature,data=tmp)
        
    for gt in ground_truths:
        tmp = np.array(eval(gt)).reshape(-1)
        ref.create_dataset(gt,data=tmp)
        
    feature_lookup={}
    for j in range(242):
        if j%2 == 0:
            key = 's21-'+str(j//2)+'-real'
        else:
            key = 's21-'+str(j//2)+'-imag'
        feature_lookup[key] = j
    
    i = 242
    for key in setup.keys():
        feature_lookup[key] = i
        i += 1

    f.attrs["feature_lookup"] = json.dumps(feature_lookup)     
    
    f.attrs["name_lookup_rev"] = h51.attrs["name_rev_lookup"]

In [19]:
## Read all the training data and labels from h5 files
f = h5py.File(file, 'r')
print("ATTR:")
print(f.attrs.keys())
print("Keys:")
print(f.keys())

dataset = []
for i in range(121):
    dataset.append(f['liview']['s21'][i].real)
    dataset.append(f['liview']['s21'][i].imag)
for value in f['setup'].values():
    dataset.append(np.asarray(value))
    
X = np.asarray(dataset).T
y = np.asarray(f['ref']['pos1'])

ATTR:
<KeysViewHDF5 ['feature_lookup', 'name_lookup_rev']>
Keys:
<KeysViewHDF5 ['liview', 'ref', 'setup']>


In [20]:
json.loads(f.attrs["feature_lookup"])

{'s21-0-real': 0,
 's21-0-imag': 1,
 's21-1-real': 2,
 's21-1-imag': 3,
 's21-2-real': 4,
 's21-2-imag': 5,
 's21-3-real': 6,
 's21-3-imag': 7,
 's21-4-real': 8,
 's21-4-imag': 9,
 's21-5-real': 10,
 's21-5-imag': 11,
 's21-6-real': 12,
 's21-6-imag': 13,
 's21-7-real': 14,
 's21-7-imag': 15,
 's21-8-real': 16,
 's21-8-imag': 17,
 's21-9-real': 18,
 's21-9-imag': 19,
 's21-10-real': 20,
 's21-10-imag': 21,
 's21-11-real': 22,
 's21-11-imag': 23,
 's21-12-real': 24,
 's21-12-imag': 25,
 's21-13-real': 26,
 's21-13-imag': 27,
 's21-14-real': 28,
 's21-14-imag': 29,
 's21-15-real': 30,
 's21-15-imag': 31,
 's21-16-real': 32,
 's21-16-imag': 33,
 's21-17-real': 34,
 's21-17-imag': 35,
 's21-18-real': 36,
 's21-18-imag': 37,
 's21-19-real': 38,
 's21-19-imag': 39,
 's21-20-real': 40,
 's21-20-imag': 41,
 's21-21-real': 42,
 's21-21-imag': 43,
 's21-22-real': 44,
 's21-22-imag': 45,
 's21-23-real': 46,
 's21-23-imag': 47,
 's21-24-real': 48,
 's21-24-imag': 49,
 's21-25-real': 50,
 's21-25-i

In [21]:
f.close()