# Proses Pengolahan Dataset (STEAD) 2

## Fungsi dan inisialisasi

Fungsi untuk membuat obspy stream dari dataset yang ada

In [1]:
import obspy
import h5py
from obspy import UTCDateTime
import numpy as np
from npy_append_array import NpyAppendArray
from obspy.clients.fdsn.client import Client
import matplotlib.pyplot as plt
import time

def make_stream(dataset):
    '''
    input: hdf5 dataset
    output: obspy stream

    '''
    
    data = np.array(dataset)

    tr_E = obspy.Trace(data=data[:, 0])
    tr_E.stats.starttime = UTCDateTime(dataset.attrs['trace_start_time'])
    tr_E.stats.delta = 0.01
    tr_E.stats.channel = dataset.attrs['receiver_type']+'E'
    tr_E.stats.station = dataset.attrs['receiver_code']
    tr_E.stats.network = dataset.attrs['network_code']

    tr_N = obspy.Trace(data=data[:, 1])
    tr_N.stats.starttime = UTCDateTime(dataset.attrs['trace_start_time'])
    tr_N.stats.delta = 0.01
    tr_N.stats.channel = dataset.attrs['receiver_type']+'N'
    tr_N.stats.station = dataset.attrs['receiver_code']
    tr_N.stats.network = dataset.attrs['network_code']

    tr_Z = obspy.Trace(data=data[:, 2])
    tr_Z.stats.starttime = UTCDateTime(dataset.attrs['trace_start_time'])
    tr_Z.stats.delta = 0.01
    tr_Z.stats.channel = dataset.attrs['receiver_type']+'Z'
    tr_Z.stats.station = dataset.attrs['receiver_code']
    tr_Z.stats.network = dataset.attrs['network_code']

    stream = obspy.Stream([tr_E, tr_N, tr_Z])

    return stream

Preprocessing (filter)

In [2]:
def FetchingAndFiltering(st):
    '''
    input: obspy stream
    output: filtered traces (E,N,Z) and unfiltered Z trace

    '''
    
    #Pick the traces from the stream object
    trE = st[0]
    trN = st[1]
    trZ = st[2]
        
    #Filter signal
    trE_filt = trE.copy()
    trN_filt = trN.copy()
    trZ_filt = trZ.copy()
    trE_filt.filter("bandpass", freqmin = 0.1, freqmax = 15)
    trN_filt.filter("bandpass", freqmin = 0.1, freqmax = 15)
    trZ_filt.filter("bandpass", freqmin = 0.1, freqmax = 15)
    #Access the attribute of each trace to get the numpy
    dataE = trE_filt.data
    dataN = trN_filt.data
    dataZ = trZ_filt.data

    return dataE,dataN,dataZ,trZ

Proses pelabelan data

In [3]:
def Labeling(train_samples,x,y,temp_label, temp_label_2):
    '''
    input: number of training samples, number of train_windows_samples, number of moving_windows_samples, array of initialization label,
        and temp_label_2 array (?)
    output: temp_label_2 array

    '''
    for i in range(train_samples):
        found = 0
        for j in range(i*y,i*y+x):
            if(temp_label[j]==1):
                #Just for the middle and forward for 1
                if(j >= (i*y + int(x/2))):
                    found = 1
                    break
        temp_label_2 = np.append(temp_label_2, found)
    
    return temp_label_2

Proses menambahkan data domain frekuensi

In [4]:
def Appending(train_samples, dataE, dataN, dataZ, x , y, temp_signal, pointer_dataset):
    '''
    input: number of training samples, data obspy trace ENZ, number of train_windows_samples, number of moving_windows_samples, 
        and temp_signal (?)
    output: temp_label_2 array

    '''
    pointer = pointer_dataset
    
    for i in range(train_samples):
        
        # Fuse the E,N, and Z channels with the frequency domain too
        temp = np.array([dataE[i*y:i*y+x], dataN[i*y:i*y+x], dataZ[i*y:i*y+x], (dct(dataE[i*y:i*y+x],norm = 'ortho')), (dct(dataN[i*y:i*y+x],norm = 'ortho')), (dct(dataZ[i*y:i*y+x],norm = 'ortho'))])
        temp2 = np.transpose(temp)
                
        # Insert it!
        temp_signal[pointer,:,:] = temp2
        
        # update pointer in temp_signal
        pointer = pointer + 1
            
    return temp_signal

Proses balancing dataset dengan SMOTE

In [5]:
def Balancing(temp_signal, temp_label_2, signal_dataset, label_dataset):
    '''
    input: unbalanced processed dataset (signal and label)
    output: balanced dataset (signal and label)

    '''
#     balance_time = time.time()
    
    #Balancing
    orig_shape = temp_signal.shape
#     print(f'before balancing label shape: {temp_label_2.shape}')
#     print(f'before balancing signal shape: {orig_shape}')

    #Reshape it to 2D matrix first
    temp_signal = np.reshape(temp_signal, (temp_signal.shape[0], -1))
#     print(temp_signal.shape)

    #Do the Balancing
    from imblearn.under_sampling import RandomUnderSampler
    from imblearn.over_sampling import SMOTE

    over = SMOTE(sampling_strategy=0.03) #3%
    under = RandomUnderSampler(sampling_strategy=0.8) #50% bigger than minority

    from imblearn.pipeline import Pipeline
    steps = [('o', over), ('u', under)]
    pipeline = Pipeline(steps=steps)

    # transform the dataset
    temp_signal, temp_label_2 = pipeline.fit_resample(temp_signal, temp_label_2)
        
    temp_signal = np.reshape(temp_signal, (-1, orig_shape[1], orig_shape[2]))
#     print(f'after balancing shape: {temp_signal.shape}')
        
    # Save to dataset and update pointer
    signal_dataset = np.append(signal_dataset, temp_signal, axis = 0)
    label_dataset = np.append(label_dataset, temp_label_2, axis = 0)
#     pointer_dataset_inc = len(temp_label2)
#     print(f'pointer_dataset_inc: {pointer_dataset_inc}')
    
#     print(f'Balancing time: {time.time()-balance_time}')

    return signal_dataset, label_dataset

In [6]:
def SaveToFile(signal_dataset, label_dataset, signal_file, label_file):
    '''
    Saving dataset to external file. Appending to whatever inside the external file
    input: processed dataset (signal and label)
    output: dataset file 

    '''
    with NpyAppendArray(signal_file) as npaa1:
        npaa1.append(signal_dataset)
    with NpyAppendArray(label_file) as npaa2:
        npaa2.append(label_dataset)
    

In [None]:
def ceildiv(a, b):
    '''
    Does ceiling division
    '''
    return -(a // -b)

## Program Utama

Proses load dataset dan memilih data sesuai yang diinginkan

In [7]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fftpack import dct

file_hdf5 = "D:/Download/merged/merge.hdf5"
header_file = "D:/Download/merged/merge.csv"

# reading the csv file into a dataframe:
df = pd.read_csv(header_file, engine='python')
print(f'total events in csv file: {len(df)}')

# filtering the dataframe with this condition 
df = df[(df.trace_category == 'earthquake_local') & (df.source_magnitude > 2.5)]
print(f'total events selected: {len(df)}')

# making a list of trace names for the selected data
list_event = df['trace_name'].to_list()
print(f'total events selected: {len(list_event)}')

# retrieving waveforms from the hdf5 file: 
dtfl = h5py.File(file_hdf5, 'r')    

total events in csv file: 1265657
total events selected: 150892
total events selected: 150892


Proses setting window2 yang digunakan

In [8]:
#Access the attribute of period and total Samples
Period = 0.01
df = 1/Period
#First Calculation
train_windows_seconds = 1 #Try training window at ... seconds
train_windows_samples = int(train_windows_seconds / Period)
moving_windows_seconds = 0.04 #Try overlapping at ... second
moving_windows_samples = int(moving_windows_seconds/Period)

print("The period of this data is : ", Period, " Seconds")
print("Try the Training Windows for each ", train_windows_seconds, " Seconds")
print("So, the Training Windows for this data is for each ", train_windows_samples, " Samples")
print("Also, the Moving Windows for this data is for each ", moving_windows_samples, " Samples")

The period of this data is :  0.01  Seconds
Try the Training Windows for each  1  Seconds
So, the Training Windows for this data is for each  100  Samples
Also, the Moving Windows for this data is for each  4  Samples


Proses memecah setiap sinyal menjadi beberapa window dan resampling (balancing)

In [9]:
# Calculate some important informations, mainly train_samples (number of training samples from this particular event)
Total_Samples = 6000 # 60s data with 100Hz freq
train_samples = int(1 + ((Total_Samples-train_windows_samples)/moving_windows_samples))
print(Total_Samples)
print(train_samples)
#counter for event
count = 0
#temp variable for
signal_dataset = np.zeros((train_samples*5, train_windows_samples,6))
print(signal_dataset.shape)
print(signal_dataset.nbytes)
#temp variable that is initiated after each balancing
label_dataset = np.zeros((train_samples*5), dtype = np.int32)
print(label_dataset.shape)
print(label_dataset.nbytes)

6000
1476
(7380, 100, 6)
35424000
(7380,)
29520


Catatan penting: seharusnya save_iter kelipatan balance_iter

In [12]:
# Balance dataset after balance_iter number of events
balance_iter = 17
# Calculate some important informations, mainly train_samples (number of training samples from this particular event)
Total_Samples = 6000 # 60s data with 100Hz freq
train_samples = int(1 + ((Total_Samples-train_windows_samples)/moving_windows_samples)) 
# counters
count = 0 # event
countBalance = 0 # Balance
countSave = 0 # save
# temp variable for saving balance_iter number of events
temp_signal = np.zeros((train_samples*balance_iter, train_windows_samples,6))
# temp variable that is initiated after each balancing
#temp_label_2 = np.zeros((train_samples*balance_iter), dtype = np.int32)
temp_label_2 = np.zeros((1), dtype = np.int32)
# pointers
pointer_dataset = 0 #for undersampled dataset
pointer_signal = 0 #for temporary 'sliced' signals
# Define x and y equal to train_window_samples and moving_windows_samples
x = train_windows_samples 
y = moving_windows_samples

# dataset variables
#signal_dataset = np.zeros((train_samples*save_iter, train_windows_samples, 6)) # 3 time domain + 3 freq domain
#label_dataset = np.zeros((train_samples*save_iter), dtype = np.int32)
signal_dataset = np.zeros((1, train_windows_samples, 6)) # 3 time domain + 3 freq domain
label_dataset = np.zeros((1), dtype = np.int32)
signal_file = ("D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_0.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_1.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_2.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_3.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_4.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_5.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_6.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_7.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_8.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_9.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_10.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_11.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_12.npy",
              "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_M25_13.npy")
label_file = ("D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_0.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_1.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_2.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_3.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_4.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_5.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_6.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_7.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_8.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_9.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_10.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_11.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_12.npy",
             "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_M25_13.npy")

file_num = len(signal_file)
data_per_file = (len(list_event)//file_num)
save_iter = 68

print("Init!")

# Loop for every event in list_event
for counter, event in enumerate(list_event):
    # update counters
    count = count + 1
#     print(count, ". There will be ", train_samples, " Training samples from this traces")
    countBalance = countBalance + 1
    countSave = countSave + 1
    
#     current_time = time.time()
    
    dataset = dtfl.get('data/'+str(event)) 
    # convering hdf5 dataset into obspy sream
    st = make_stream(dataset)
    
    # fetch the traces from stream and filter it
    [dataE,dataN,dataZ,trZ] = FetchingAndFiltering(st)
    
    # checking elapsed time
#     print(f'fetchAndFilter: {time.time()-current_time}')
#     current_time = time.time()
    
    # LABELING with zero for initiation 
    temp_label = np.zeros((int(Total_Samples)), dtype = np.int32)
    
    # Pick the Arrival of P based on IRIS (on 60th second)
    a = int(dataset.attrs["s_arrival_sample"])
    # Indicate it with 1
    temp_label[a] = 1

    # Labeling Process and ADDING FREQ. DOMAIN info
    temp_label_2 = Labeling(train_samples, x, y, temp_label, temp_label_2)
    
    # Checking elapsed time
#     print(f'Labeling: {time.time()-current_time}')
#     current_time = time.time()
    
    # Inserting data to temporary dataset
    temp_signal = Appending(train_samples, dataE, dataN, dataZ, x , y, temp_signal, pointer_signal)
    
    # Checking elapsed time
#     print(f'Appending: {time.time()-current_time}')
    
    # Balancing Process
    # Balancing dataset after balance_iter data fetched
    if((countBalance == balance_iter) or (count == len(list_event))):
        temp_label_2 = np.delete(temp_label_2, 0, 0) #erasing first row because it is only placeholder for temp_label_2 before appending
        [signal_dataset, label_dataset] = Balancing(temp_signal,temp_label_2, signal_dataset, label_dataset)
        
        temp_label_2 = np.zeros((1), dtype = np.int32)
                
        # update counter and pointer
        countBalance = 0
        pointer_signal = 0
        
    # Saving process
    if ((countSave == save_iter) or (count == len(list_event))):
        print(f'Saving to file... count: {count}')
        SaveToFile(signal_dataset, label_dataset, signal_file[count//data_per_file], label_file[count//data_per_file])
        
        # start again
        signal_dataset = np.zeros((1, train_windows_samples, 6)) # 3 time domain + 3 freq domain
        label_dataset = np.zeros((1), dtype = np.int32)
        
        countSave = 0
    
    # update pointers
    if (countBalance != 0):
        pointer_signal = pointer_signal + train_samples 
    #pointer_dataset = pointer_dataset + pointer_dataset_inc
    
print(signal_dataset.shape)
print(label_dataset.shape)

Init!
Saving to file... count: 68
Saving to file... count: 136
Saving to file... count: 204
Saving to file... count: 272
Saving to file... count: 340
Saving to file... count: 408
Saving to file... count: 476
Saving to file... count: 544
Saving to file... count: 612
Saving to file... count: 680
Saving to file... count: 748
Saving to file... count: 816
Saving to file... count: 884
Saving to file... count: 952
Saving to file... count: 1020
Saving to file... count: 1088
Saving to file... count: 1156
Saving to file... count: 1224
Saving to file... count: 1292
Saving to file... count: 1360
Saving to file... count: 1428
Saving to file... count: 1496
Saving to file... count: 1564
Saving to file... count: 1632
Saving to file... count: 1700
Saving to file... count: 1768
Saving to file... count: 1836
Saving to file... count: 1904
Saving to file... count: 1972
Saving to file... count: 2040
Saving to file... count: 2108
Saving to file... count: 2176
Saving to file... count: 2244
Saving to file... c

Saving to file... count: 18428
Saving to file... count: 18496
Saving to file... count: 18564
Saving to file... count: 18632
Saving to file... count: 18700
Saving to file... count: 18768
Saving to file... count: 18836
Saving to file... count: 18904
Saving to file... count: 18972
Saving to file... count: 19040
Saving to file... count: 19108
Saving to file... count: 19176
Saving to file... count: 19244
Saving to file... count: 19312
Saving to file... count: 19380
Saving to file... count: 19448
Saving to file... count: 19516
Saving to file... count: 19584
Saving to file... count: 19652
Saving to file... count: 19720
Saving to file... count: 19788
Saving to file... count: 19856
Saving to file... count: 19924
Saving to file... count: 19992
Saving to file... count: 20060
Saving to file... count: 20128
Saving to file... count: 20196
Saving to file... count: 20264
Saving to file... count: 20332
Saving to file... count: 20400
Saving to file... count: 20468
Saving to file... count: 20536
Saving t

Saving to file... count: 36448
Saving to file... count: 36516
Saving to file... count: 36584
Saving to file... count: 36652
Saving to file... count: 36720
Saving to file... count: 36788
Saving to file... count: 36856
Saving to file... count: 36924
Saving to file... count: 36992
Saving to file... count: 37060
Saving to file... count: 37128
Saving to file... count: 37196
Saving to file... count: 37264
Saving to file... count: 37332
Saving to file... count: 37400
Saving to file... count: 37468
Saving to file... count: 37536
Saving to file... count: 37604
Saving to file... count: 37672
Saving to file... count: 37740
Saving to file... count: 37808
Saving to file... count: 37876
Saving to file... count: 37944
Saving to file... count: 38012
Saving to file... count: 38080
Saving to file... count: 38148
Saving to file... count: 38216
Saving to file... count: 38284
Saving to file... count: 38352
Saving to file... count: 38420
Saving to file... count: 38488
Saving to file... count: 38556
Saving t

Saving to file... count: 54468
Saving to file... count: 54536
Saving to file... count: 54604
Saving to file... count: 54672
Saving to file... count: 54740
Saving to file... count: 54808
Saving to file... count: 54876
Saving to file... count: 54944
Saving to file... count: 55012
Saving to file... count: 55080
Saving to file... count: 55148
Saving to file... count: 55216
Saving to file... count: 55284
Saving to file... count: 55352
Saving to file... count: 55420
Saving to file... count: 55488
Saving to file... count: 55556
Saving to file... count: 55624
Saving to file... count: 55692
Saving to file... count: 55760
Saving to file... count: 55828
Saving to file... count: 55896
Saving to file... count: 55964
Saving to file... count: 56032
Saving to file... count: 56100
Saving to file... count: 56168
Saving to file... count: 56236
Saving to file... count: 56304
Saving to file... count: 56372
Saving to file... count: 56440
Saving to file... count: 56508
Saving to file... count: 56576
Saving t

Saving to file... count: 72488
Saving to file... count: 72556
Saving to file... count: 72624
Saving to file... count: 72692
Saving to file... count: 72760
Saving to file... count: 72828
Saving to file... count: 72896
Saving to file... count: 72964
Saving to file... count: 73032
Saving to file... count: 73100
Saving to file... count: 73168
Saving to file... count: 73236
Saving to file... count: 73304
Saving to file... count: 73372
Saving to file... count: 73440
Saving to file... count: 73508
Saving to file... count: 73576
Saving to file... count: 73644
Saving to file... count: 73712
Saving to file... count: 73780
Saving to file... count: 73848
Saving to file... count: 73916
Saving to file... count: 73984
Saving to file... count: 74052
Saving to file... count: 74120
Saving to file... count: 74188
Saving to file... count: 74256
Saving to file... count: 74324
Saving to file... count: 74392
Saving to file... count: 74460
Saving to file... count: 74528
Saving to file... count: 74596
Saving t

Saving to file... count: 90508
Saving to file... count: 90576
Saving to file... count: 90644
Saving to file... count: 90712
Saving to file... count: 90780
Saving to file... count: 90848
Saving to file... count: 90916
Saving to file... count: 90984
Saving to file... count: 91052
Saving to file... count: 91120
Saving to file... count: 91188
Saving to file... count: 91256
Saving to file... count: 91324
Saving to file... count: 91392
Saving to file... count: 91460
Saving to file... count: 91528
Saving to file... count: 91596
Saving to file... count: 91664
Saving to file... count: 91732
Saving to file... count: 91800
Saving to file... count: 91868
Saving to file... count: 91936
Saving to file... count: 92004
Saving to file... count: 92072
Saving to file... count: 92140
Saving to file... count: 92208
Saving to file... count: 92276
Saving to file... count: 92344
Saving to file... count: 92412
Saving to file... count: 92480
Saving to file... count: 92548
Saving to file... count: 92616
Saving t

Saving to file... count: 108256
Saving to file... count: 108324
Saving to file... count: 108392
Saving to file... count: 108460
Saving to file... count: 108528
Saving to file... count: 108596
Saving to file... count: 108664
Saving to file... count: 108732
Saving to file... count: 108800
Saving to file... count: 108868
Saving to file... count: 108936
Saving to file... count: 109004
Saving to file... count: 109072
Saving to file... count: 109140
Saving to file... count: 109208
Saving to file... count: 109276
Saving to file... count: 109344
Saving to file... count: 109412
Saving to file... count: 109480
Saving to file... count: 109548
Saving to file... count: 109616
Saving to file... count: 109684
Saving to file... count: 109752
Saving to file... count: 109820
Saving to file... count: 109888
Saving to file... count: 109956
Saving to file... count: 110024
Saving to file... count: 110092
Saving to file... count: 110160
Saving to file... count: 110228
Saving to file... count: 110296
Saving t

Saving to file... count: 125732
Saving to file... count: 125800
Saving to file... count: 125868
Saving to file... count: 125936
Saving to file... count: 126004
Saving to file... count: 126072
Saving to file... count: 126140
Saving to file... count: 126208
Saving to file... count: 126276
Saving to file... count: 126344
Saving to file... count: 126412
Saving to file... count: 126480
Saving to file... count: 126548
Saving to file... count: 126616
Saving to file... count: 126684
Saving to file... count: 126752
Saving to file... count: 126820
Saving to file... count: 126888
Saving to file... count: 126956
Saving to file... count: 127024
Saving to file... count: 127092
Saving to file... count: 127160
Saving to file... count: 127228
Saving to file... count: 127296
Saving to file... count: 127364
Saving to file... count: 127432
Saving to file... count: 127500
Saving to file... count: 127568
Saving to file... count: 127636
Saving to file... count: 127704
Saving to file... count: 127772
Saving t

Saving to file... count: 143208
Saving to file... count: 143276
Saving to file... count: 143344
Saving to file... count: 143412
Saving to file... count: 143480
Saving to file... count: 143548
Saving to file... count: 143616
Saving to file... count: 143684
Saving to file... count: 143752
Saving to file... count: 143820
Saving to file... count: 143888
Saving to file... count: 143956
Saving to file... count: 144024
Saving to file... count: 144092
Saving to file... count: 144160
Saving to file... count: 144228
Saving to file... count: 144296
Saving to file... count: 144364
Saving to file... count: 144432
Saving to file... count: 144500
Saving to file... count: 144568
Saving to file... count: 144636
Saving to file... count: 144704
Saving to file... count: 144772
Saving to file... count: 144840
Saving to file... count: 144908
Saving to file... count: 144976
Saving to file... count: 145044
Saving to file... count: 145112
Saving to file... count: 145180
Saving to file... count: 145248
Saving t

IndexError: tuple index out of range

In [11]:
print(temp_label_2.shape)
print(temp_signal.shape)

(7380,)
(7380, 100, 6)


In [21]:
print(temp_label_2[6020])
print(temp_signal[1000])

0
[[-4.84261137e+02 -1.08312566e+02  7.57494566e+01  1.55479436e+03
   1.24005039e+02 -3.71006383e+02]
 [ 9.02333729e+02 -3.86946356e+01 -5.11743879e+02  1.23937887e+03
   1.67250583e+02 -3.26801926e+01]
 [ 1.78953488e+03  1.64800035e+01 -1.02362276e+03  2.16006777e+03
   3.10133413e+02 -5.10122164e+02]
 [ 2.13672071e+03  2.23281161e+02 -1.20022708e+03  1.23423145e+03
   2.62975554e+02 -5.56820471e+01]
 [ 2.09024913e+03  5.48018002e+02 -9.04501674e+02  2.41585459e+03
   4.88683156e+02 -6.34563579e+02]
 [ 1.94138148e+03  8.06014392e+02 -2.69951945e+02  1.73543824e+03
   2.46333816e+02 -1.87665358e+02]
 [ 1.98189861e+03  8.60058638e+02  3.47075858e+02  4.54975062e+03
  -3.55157574e+02 -6.04690078e+02]
 [ 2.31786757e+03  6.87586848e+02  6.08381846e+02  1.98853481e+03
   2.33491746e+03  2.21163836e+02]
 [ 2.79514934e+03  3.61202279e+02  4.51621225e+02  7.17191025e+03
   2.16033595e+03 -1.40283692e+03]
 [ 3.07346621e+03  5.48486393e+01  1.08077418e+02 -6.97568184e+03
  -3.43226952e+02  7.56

## Proses penyimpanan dataset

In [30]:
a_file = "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_20F_100Hz_01.npy"
np.save(a_file, signal_dataset)

b_file = "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_20F_100Hz_01.npy"
np.save(b_file, label_dataset)

Penyimpanan setelah dilakukan scaling

In [31]:
#Scaling for the Signals
signal_dataset_time = np.delete(signal_dataset, [3,4,5], 2)
signal_dataset_freq = np.delete(signal_dataset, [0,1,2], 2)

orig_shape = signal_dataset.shape
signal_dataset = None

from sklearn.preprocessing import StandardScaler
scalers = {}
for i in range(orig_shape[0]):
    scalers[i] = StandardScaler()
    signal_dataset_time[i][:][:] = scalers[i].fit_transform(signal_dataset_time[i][:][:]) 

scalers = {}
for i in range(orig_shape[0]):
    scalers[i] = StandardScaler()
    signal_dataset_freq[i][:][:] = scalers[i].fit_transform(signal_dataset_freq[i][:][:]) 

signal_dataset_scaled = np.append(signal_dataset_time, signal_dataset_freq, axis = 2)

In [32]:
a_file = "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/signal_dataset_20F_100Hz_Scaled_01.npy"
np.save(a_file, signal_dataset_scaled)

b_file = "D:/S3_project/S Picker/Cooked_Dataset/Dataset100Hz/label_dataset_20F_100Hz_Scaled_01.npy"
np.save(b_file, label_dataset)