<a href="https://colab.research.google.com/github/aist2000/ML-public/blob/master/el2/TCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TCN method

In [172]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Dropout, Activation, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping

def create_dataset(dataset, window_size=1, stride=1):
    """
    Creates a dataset of sliding windows.

    Args:
      dataset: The input dataset as a NumPy array.
      window_size: The size of the sliding window.
      stride: The stride for the sliding window.

    Returns:
      A NumPy array of windows.
    """
    X = list()
    for i in range(window_size, len(dataset) - window_size + 1, stride):
        X.append(dataset[i-window_size:i, 1:])  # Exclude time column
    return np.array(X)

def train_model(X_train):
    """
    Trains a TCN autoencoder model for anomaly detection.

    Args:
      X_train: The training input data.

    Returns:
      A trained TCN autoencoder model.
    """
    num_features = X_train.shape[2]
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(window_size, num_features), padding='same', dilation_rate=1))
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', padding='same', dilation_rate=2))
    model.add(Conv1D(filters=16, kernel_size=3, activation='relu', padding='same', dilation_rate=4))
    # Add more Conv1D layers with increasing dilation rates as needed

    # Decoder (upsampling and reconstruction)
    model.add(Conv1D(filters=16, kernel_size=3, activation='relu', padding='same', dilation_rate=4))
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', padding='same', dilation_rate=2))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same', dilation_rate=1))
    model.add(TimeDistributed(Dense(num_features)))  # Reconstruct the input

    model.compile(optimizer='adam', loss='mse')
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    model.fit(X_train, X_train, epochs=100, batch_size=32, verbose=1, validation_split=0.2, callbacks=[early_stopping])
    return model

def predict_anomalies(model, X_test, threshold=0.1, stride=1):
    """
    Predicts anomaly windows and observations based on the trained model and a threshold.

    Args:
      model: The trained TCN autoencoder model.
      X_test: The test input data.
      threshold: The threshold for anomaly detection.
      stride: The stride used for creating the sliding windows.

    Returns:
      A tuple containing:
        - anomaly_windows: A list of indices for windows containing anomalies.
        - anomaly_observations: A list of indices for observations considered anomalous.
    """
    predictions = model.predict(X_test)
    mse = np.mean(np.square(predictions - X_test), axis=2)  # Calculate MSE for each time step
    threshold = np.percentile(mse, 95)  # Example threshold (95th percentile)

    anomaly_windows = np.where(np.any(mse > threshold, axis=1))[0]  # Identify anomaly windows
    anomaly_observations = list()
    for window_index in anomaly_windows:
        window = mse[window_index]
        anomaly_indices = np.where(window > threshold)[0]  # Find anomalies within the window
        # Adjust indices to correspond to original dataset
        anomaly_observations.extend(window_index * stride + anomaly_indices)

    return predictions, anomaly_windows, anomaly_observations



In [223]:
 X_train.shape[2]

3

In [224]:
#import numpy as np
import pandas as pd
'''
data = np.array([
   [100,0, 0], [101,0, 0],[102,0, 1],[103,0, 1],[104,0, 1],[105,1, 1],[106,0, 1],[107,0, 1],[108,0, 1],[109,1, 1],[110,0, 1]
              ,[111,0, 0], [112,0, 0],[113,0, 1],[114,0, 1],[115,0, 1],[116,1, 1],[117,0, 1],[118,0, 1],[119,0, 1],[120,1, 1]
              ,[121,0, 0], [122,0, 0],[123,0, 1],[124,0, 1],[125,0, 1],[126,1, 1],[127,0, 1],[128,0, 1],[129,0, 1],[130,1, 1]
  ])
'''
url="https://raw.githubusercontent.com/aist2000/ML-public/master/el2/data.csv"
#url="dataset.csv"
df = pd.read_csv(url, skiprows=0, header=None)
display(df.head())
display(df.describe())
data = df.to_numpy()

Unnamed: 0,0,1,2,3
0,100,0,0,0
1,101,0,0,0
2,102,0,0,0
3,103,0,0,0
4,104,0,0,0


Unnamed: 0,0,1,2,3
count,700.0,700.0,700.0,700.0
mean,449.5,0.0,0.001429,0.0
std,202.21688,0.0,0.037796,0.0
min,100.0,0.0,0.0,0.0
25%,274.75,0.0,0.0,0.0
50%,449.5,0.0,0.0,0.0
75%,624.25,0.0,0.0,0.0
max,799.0,0.0,1.0,0.0


In [201]:

np.where( data[:,1:4] > 0)[0]


array([62])

In [170]:

 X_test[[ind]]

array([[[[[0, 1, 0]]],


        [[[0, 0, 0]]],


        [[[0, 0, 0]]]]])

In [129]:
X_test[ind][0]

1

In [114]:
X_test[100]

array([[0, 1, 0]])

In [110]:
data[50:70],X_test[0:10]

(array([[150,   0,   0,   0],
        [151,   0,   0,   0],
        [152,   0,   0,   0],
        [153,   0,   0,   0],
        [154,   0,   0,   0],
        [155,   0,   0,   0],
        [156,   0,   0,   0],
        [157,   0,   0,   0],
        [158,   0,   0,   0],
        [159,   0,   0,   0],
        [160,   0,   0,   0],
        [161,   0,   0,   0],
        [162,   0,  26,   0],
        [163,   0,   0,   0],
        [164,   0,   0,   0],
        [165,   0,   0,   0],
        [166,   0,   0,   0],
        [167,   0,   0,   0],
        [168,   0,   0,   0],
        [169,   0,   0,   0]]),
 array([[[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]],
 
        [[0, 0, 0]]]))

In [225]:
# Example usage:
# Assuming your data is in a NumPy array called 'data' with shape (num_samples, 31)
# where the first column is time and the rest are the 30 features


# 1. Data Preprocessing
scaler = StandardScaler()
trfdata=data
trfdata[:, 1:] = scaler.fit_transform(data[:, 1:])  # Standardize features
window_size = 10 #10000  # Set window size to 10,000
stride = 1     # 1000  # Adjust stride as needed
X = create_dataset(trfdata, window_size, stride)

# 2. Train-Test Split (adjust split ratio as needed)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]

# 3. Model Training
model = train_model(X_train)



Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - loss: 0.3244 - val_loss: 4.0720e-04
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.5388 - val_loss: 0.0010
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.2078 - val_loss: 1.2183e-04
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.2475 - val_loss: 4.6066e-04
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0470 - val_loss: 3.6208e-04
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0561 - val_loss: 2.3973e-04
Epoch 7/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0211 - val_loss: 3.4393e-04
Epoch 8/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0638 - val_loss: 1.0342e-04
Epoch 9/100
[1m14/14[0

In [207]:
np.where(X_test>0)

(array([], dtype=int64), array([], dtype=int64), array([], dtype=int64))

In [211]:
X_test[95:105][0]

array([[0, 0, 0],
       [2, 2, 2],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [226]:
# 4. Anomaly Prediction
threshold = 10  # Adjust threshold as needed

predictions, anomaly_windows, anomaly_observations = predict_anomalies(model, X_test, threshold, stride)

# Print the results
print("Anomaly Windows:", anomaly_windows)
print("Anomaly Observations:", anomaly_observations)
print("Predictions:", predictions)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step


ValueError: not enough values to unpack (expected 3, got 2)

In [214]:
X[[anomaly_observations]]

array([[[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],

        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],

        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],

        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],

        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0,

In [74]:
# 4. Anomaly Prediction
threshold = 5  # Adjust threshold as needed

anomaly_windows, anomaly_observations = predict_anomalies(model, X_train, threshold, stride)

# Print the results
print("Anomaly Windows:", anomaly_windows)
print("Anomaly Observations:", anomaly_observations)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Anomaly Windows: [27 28 29 30 31]
Anomaly Observations: [54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71]


In [13]:
 predictions = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


In [75]:
predictions, X_test

(array([[[ 0.07872578, -0.00353326, -0.00368248],
         [ 0.11749697, -0.00121287, -0.00320818],
         [ 0.11228596, -0.0049434 , -0.00144949],
         [ 0.10001312, -0.01300826,  0.00160571]],
 
        [[ 0.07872578, -0.00353326, -0.00368248],
         [ 0.11749697, -0.00121287, -0.00320818],
         [ 0.11228596, -0.00494341, -0.00144949],
         [ 0.10001311, -0.01300826,  0.00160572]]], dtype=float32),
 array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 

In [16]:
    predictions = model.predict(X_test)
    mse = np.mean(np.square(predictions - X_test), axis=2)  # Calculate MSE for each time step
    threshold = np.percentile(mse, 95)  # Example threshold (95th percentile)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step


In [17]:
mse,threshold

(array([[0.0020746 , 0.00460577, 0.00421156, 0.00339147],
        [0.0020746 , 0.00460577, 0.00421156, 0.00339147]]),
 0.004605767520890757)

In [37]:
    anomaly_windows = np.where(np.any(mse > threshold, axis=1))[0]  # Identify anomaly windows
    anomaly_windows


array([0])

In [30]:
mse[0]

array([0.0020746 , 0.00460577, 0.00421156, 0.00339147])

In [29]:
np.where(mse[0] > threshold)

(array([1]),)

In [31]:
    anomaly_observations = list()
    for window_index in anomaly_windows:
        window = mse[window_index]
        anomaly_indices = np.where(window > threshold)[0]  # Find anomalies within the window
        # Adjust indices to correspond to original dataset
        anomaly_observations.extend(window_index * stride + anomaly_indices)

In [36]:
anomaly_windows[0], anomaly_observations

(0, [1])