In [92]:
import numpy as np
import pandas as pd
import h5py
import dask.array as da
import dask.dataframe as dd
import dask

file = '../../data/pair3.mat'

f = h5py.File(file, 'r')

trace = f['s_traces']
trace_T = trace[:].T
s_traces = da.from_array(trace_T, chunks=(10000, 10000))

s_traces_real = s_traces['real']  
s_traces_imag = s_traces['imag']

In [93]:
s_traces_real.compute()

array([[ 2.66330160e-04,  3.39474429e-04,  3.97195029e-04, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-1.94677905e-04, -1.03788556e-04, -8.09854021e-06, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 3.30216397e-04,  2.94542204e-04,  2.29296368e-04, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       ...,
       [ 5.72209317e-04,  3.19251547e-04, -8.44281070e-06, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 3.32511855e-04,  1.84027406e-04, -1.78084588e-05, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.23385214e-04,  4.26125738e-05, -8.57108244e-05, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [94]:
from datetime import timedelta, datetime

with h5py.File(file, "r") as mat_file:
    s_dates = mat_file["s_dates"][:]  
s_dates = s_dates.flatten()

def datenum_to_datetime(datenum):
    """
    Convert Matlab datenum into Python datetime.
    :param datenum: Date in datenum format
    :return:        Datetime object corresponding to datenum.
    """
    days = datenum % 1
    python_date = datetime.fromordinal(int(datenum)) \
           + timedelta(days=days) \
           - timedelta(days=366)
    
    if python_date.second > 30:
        python_date = python_date + timedelta(minutes=1)  
        python_date = python_date.replace(second=0)  

    return python_date.strftime("%Y-%m-%d-%H-%M")

s_dates_to_python = np.array([datenum_to_datetime(d) for d in s_dates])
s_dates_df = pd.DataFrame(s_dates_to_python, columns= ['date'])

In [95]:
s_dates_df

Unnamed: 0,date
0,2012-01-04-09-28
1,2012-01-04-09-49
2,2012-01-04-10-27
3,2012-01-04-11-05
4,2012-01-04-11-13
...,...
76675,2021-06-08-10-06
76676,2021-06-08-10-08
76677,2021-06-08-10-10
76678,2021-06-08-10-11


In [96]:
def get_index(time, date_df):
    return date_df[date_df['date'].isin(time)].index.to_numpy()

In [97]:
defect_pos = int(get_index(['2021-06-08-09-15'], s_dates_df))
defect_pos

  defect_pos = int(get_index(['2021-06-08-09-15'], s_dates_df))


76650

In [111]:
len(s_traces_real) - 76650

30

In [105]:
def add_label(df, defect_id):
    
    m = len(df)
    y = np.zeros(m)
    y[defect_id:] = 1
    yint = y.astype(int)

    return yint

In [None]:
y = add_label(s_traces_real, defect_pos)
y

array([0, 0, 0, ..., 1, 1, 1])

In [109]:
s_traces_real.compute()

array([[ 2.66330160e-04,  3.39474429e-04,  3.97195029e-04, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-1.94677905e-04, -1.03788556e-04, -8.09854021e-06, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 3.30216397e-04,  2.94542204e-04,  2.29296368e-04, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       ...,
       [ 5.72209317e-04,  3.19251547e-04, -8.44281070e-06, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 3.32511855e-04,  1.84027406e-04, -1.78084588e-05, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.23385214e-04,  4.26125738e-05, -8.57108244e-05, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [112]:
import random

# Convert Dask to NumPy (if not already done)
X = s_traces_real.compute()
y = add_label(X, defect_pos)

# Sample 50 random indices with label 0
zero_indices = np.where(y == 0)[0]
sampled_zero_indices = random.sample(list(zero_indices), 50)

# Get the positive example index
positive_index = defect_pos + 20

# Combine all indices
all_indices = sampled_zero_indices + [positive_index]

# Extract the corresponding samples and labels
X_sample = X[all_indices]
y_sample = y[all_indices]

In [None]:
X_sample = X_sample[:, :, np.newaxis]

In [113]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential([
    LSTM(64, input_shape=(X_sample.shape[1], 1), return_sequences=False),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train
model.fit(X_sample, y_sample, epochs=10, batch_size=8, verbose=1)

Epoch 1/10


  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 452ms/step - accuracy: 0.9894 - loss: 0.6924
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 466ms/step - accuracy: 0.9855 - loss: 0.6890
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 441ms/step - accuracy: 0.9894 - loss: 0.6856
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 430ms/step - accuracy: 0.9724 - loss: 0.6826
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 444ms/step - accuracy: 0.9724 - loss: 0.6794
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 440ms/step - accuracy: 0.9855 - loss: 0.6757
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 605ms/step - accuracy: 0.9894 - loss: 0.6722
Epoch 8/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.9568 - loss: 0.6705
Epoch 9/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[

<keras.src.callbacks.history.History at 0x226ee9e9310>

In [114]:
X_full = s_traces_real.compute()

# Take the last 10 rows
X_test = X_full[-10:]  # Shape: (10, time_steps)

X_test = X_test[:, :, np.newaxis]
y_pred = model.predict(X_test)
y_class = (y_pred > 0.5).astype(int)
for i, (prob, label) in enumerate(zip(y_pred, y_class)):
    print(f"Row {-10+i}: Probability={prob[0]:.4f}, Predicted Label={label[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 220ms/step
Row -10: Probability=0.4827, Predicted Label=0
Row -9: Probability=0.4827, Predicted Label=0
Row -8: Probability=0.4827, Predicted Label=0
Row -7: Probability=0.4827, Predicted Label=0
Row -6: Probability=0.4827, Predicted Label=0
Row -5: Probability=0.4827, Predicted Label=0
Row -4: Probability=0.4827, Predicted Label=0
Row -3: Probability=0.4827, Predicted Label=0
Row -2: Probability=0.4827, Predicted Label=0
Row -1: Probability=0.4827, Predicted Label=0
