In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset, DataLoader

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn import preprocessing

import matplotlib.pyplot as plt

def label_load(row):
   if row['Active Power'] < 1 and row['Governor speed actual'] < 1:
      return 'Shutdown'
   elif row['Active Power'] < 3 and row['Governor speed actual'] < 250:
      return 'Warming'
   elif row['Active Power'] < 3 and row['Governor speed actual'] > 250:
      return 'No Load'
   elif row['Active Power'] >= 1 and row['Active Power'] < 20 and row['Governor speed actual'] > 250:
      return 'Low Load'
   elif row['Active Power'] >= 20 and row['Active Power'] < 40 and row['Governor speed actual'] > 250:
      return 'Rough Zone'
   elif row['Active Power'] >= 40 and row['Active Power'] < 50 and row['Governor speed actual'] > 250:
      return 'Part Load'
   elif row['Active Power'] >= 50 and row['Active Power'] < 65 and row['Governor speed actual'] > 250:
      return 'Efficient Load'
   elif row['Active Power'] >= 65 and row['Governor speed actual'] > 250:
      return 'High Load'
   else:
      return 'Undefined'
      
class TimeSeriesDataset(Dataset):
    def __init__(self, input_data, input_window, output_window):
        self.input_data = input_data
        self.input_window = input_window
        self.output_window = output_window
        self.block_len = input_window + output_window
        self.block_num = len(input_data) - self.block_len + 1
        self.inout_seq = self.create_input_sequences()

    def create_input_sequences(self):
        inout_seq = []
        for i in range(self.block_num):
            train_seq = self.input_data[i : i + self.input_window]
            train_label = self.input_data[i + self.output_window : i + self.input_window + self.output_window][:, 9:16]
            inout_seq.append((train_seq, train_label))
        return inout_seq

    def __len__(self):
        return len(self.inout_seq)

    def __getitem__(self, idx):
        train_seq, train_label = self.inout_seq[idx]
        return torch.FloatTensor(train_seq), torch.FloatTensor(train_label)

In [15]:
df_data_withtime = pd.read_pickle("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/my_data_5thn_olah.pickle")
mask = (df_data_withtime['TimeStamp'] >= '2020-01-01 00:00:00')
df_data_withtime = df_data_withtime.loc[mask]
df_data_withtime['TimeStamp'] = pd.to_datetime(df_data_withtime['TimeStamp'])

for column_name in df_data_withtime.columns:
    if column_name != 'Load_Type' and column_name != 'TimeStamp':
        df_data_withtime[column_name] = pd.to_numeric(df_data_withtime[column_name], downcast='float')

# load_type_type = df_data_withtime.apply(label_load, axis=1)

# df_data = df_data_withtime.iloc[:,1:]
# df_timestamp = df_data_withtime.iloc[:,0]

# df_data_withtime  = pd.concat([df_timestamp, df_data], axis=1)
# df_data_withtime['Load_Type'] = load_type_type

# df_data_withtime['Load_Type'] = df_data_withtime['Load_Type'].replace('Undefined', pd.NA)
df_data_withtime = df_data_withtime.fillna(method='ffill')
# load_type_type = df_data_withtime.apply(label_load, axis=1)
# df_data = df_data_withtime.iloc[:,1:]
# df_timestamp = df_data_withtime.iloc[:,0]
# df_data_withtime  = pd.concat([df_timestamp, df_data], axis=1)
# df_data_withtime['Load_Type'] = load_type_type

# masknot = (df_data_withtime['Load_Type'] == 'Undefined')
# df_data_withtime = df_data_withtime.loc[~masknot]
print(len(df_data_withtime))

2103840


In [3]:
df_data_withtime

Unnamed: 0,TimeStamp,Governor speed actual,UGB X displacement,UGB Y displacement,LGB X displacement,LGB Y displacement,TGB X displacement,TGB Y displacement,Stator winding temperature 13,Stator winding temperature 14,...,Governor Penstock Pressure,Penstock pressure,Active Power,Reactive Power,Power Factor,Electrical Frequency,Grid Selection,Opening Wicked Gate,UGB Oil Contaminant,Gen Thrust Bearing Oil Contaminant
1440,2020-01-01 00:00:00,276.959991,174.589996,172.720001,128.589996,108.400002,167.479996,129.119995,67.000000,66.089996,...,276.959991,13.85,42.880001,18.790001,0.92,50.139999,0.0,64.910004,15.420000,20.200001
1441,2020-01-01 00:01:00,274.690002,183.960007,170.679993,129.889999,110.900002,181.380005,134.479996,67.000000,66.050003,...,274.850006,13.32,43.650002,22.850000,0.92,50.139999,0.0,65.400002,15.420000,20.200001
1442,2020-01-01 00:02:00,275.079987,150.539993,162.710007,132.789993,104.589996,165.520004,119.209999,67.000000,66.000000,...,275.359985,13.34,44.419998,20.129999,0.92,50.139999,0.0,65.879997,15.420000,20.200001
1443,2020-01-01 00:03:00,275.869995,173.110001,177.080002,121.610001,112.690002,183.300003,136.990005,67.000000,65.949997,...,275.869995,13.81,45.189999,17.410000,0.92,50.139999,0.0,64.599998,15.420000,20.200001
1444,2020-01-01 00:04:00,274.260010,180.100006,164.110001,130.389999,98.800003,170.889999,132.100006,67.000000,65.910004,...,274.260010,13.83,45.959999,22.230000,0.92,50.139999,0.0,64.800003,15.420000,20.200001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2629435,2023-12-31 23:55:59,273.589996,199.500000,182.000000,122.500000,91.800003,74.900002,53.900002,66.760002,64.919998,...,274.179993,13.42,48.509998,19.610001,0.94,50.279999,1.0,67.980003,19.129999,23.110001
2629436,2023-12-31 23:56:59,273.119995,192.130005,184.199997,122.209999,88.029999,73.889999,58.849998,66.739998,64.900002,...,273.100006,13.69,48.610001,20.629999,0.94,50.360001,1.0,69.070000,19.129999,23.110001
2629437,2023-12-31 23:57:59,272.209991,188.119995,190.649994,123.169998,100.419998,74.059998,55.630001,66.709999,64.879997,...,272.010010,13.40,48.650002,21.600000,0.94,50.450001,1.0,68.919998,19.120001,23.110001
2629438,2023-12-31 23:58:59,275.089996,197.460007,191.770004,124.970001,96.339996,71.080002,55.610001,66.680000,64.860001,...,274.950012,13.68,48.689999,14.940000,0.94,50.540001,1.0,67.760002,19.120001,23.110001


In [2]:
df_anomaly = pd.read_excel("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/shutdown_list.xlsx", 'Sheet2')
df_anomaly['Start Time'] = pd.to_datetime(df_anomaly['Start Time'])
df_anomaly['End Time'] = pd.to_datetime(df_anomaly['End Time'])

# mask = (df_anomaly['Interal/External'] == 'Internal') & (df_anomaly['Shutdown Type'] == 'Unplanned')
# df_anomaly = df_anomaly.loc[~mask]

for index, row in df_anomaly.iterrows():
    masknot = (df_data_withtime['TimeStamp'] > (row['Start Time'] - timedelta(hours=24 * 3))) & (df_data_withtime['TimeStamp'] <= (row['End Time'] + timedelta(hours=24 * 3)))
    df_data_withtime = df_data_withtime.loc[~masknot]
    
print(len(df_data_withtime))

NameError: name 'df_data_withtime' is not defined

In [3]:
df_anomaly

Unnamed: 0,Start Time,End Time,Event,Activities,Related Component,Action/Cause,Interal/External,Shutdown Details,Startup Details,Supplied Grid after Action,Shutdown Type
0,2023-03-23 10:22:00,2023-03-23 12:34:00,LGS#1 shut down for Hydran PPM reset,Instrument,Hydran PPM MTX,Alarm Reset,External,Shutdown via 4/CS,Started via 4/CS,Auxiliary Grid,Planned
1,2023-04-21 09:18:00,2023-04-21 11:27:00,LGS#1 Shutdown for Replace Carbon Brush and cl...,Instrument,Carbon Brush,Replacement,Internal,Shutdown,Started permissives,FCE Grid,Planned
2,2023-05-29 11:34:00,2023-05-29 13:57:00,LGS#1 Shutdown for Check & Repair Sensor Level...,Instrument,TGB Oil Level Sensor,Installation Check,Internal,Shutdown,Started permissives,FCE Grid,Planned
3,2023-07-13 11:27:00,2023-07-13 14:56:00,LGS#1 Shutdown for Investigation TGB oil Level...,Instrument,TGB Oil Level Sensor,Alarm Check,Internal,Shutdown,Start from Local Panel,FCE Grid,Planned
4,2023-07-20 07:45:00,2023-07-20 15:06:00,LGS#1 Shutdown for HIRA,"Mechanic, Electric, Instrument",TGB Oil Level,Repairment,Internal,Shutdown,Start from Local Panel,FCE Grid,Planned
...,...,...,...,...,...,...,...,...,...,...,...
76,2019-05-23 09:44:00,2019-05-23 11:45:00,Total blackout,Electrical,A3L3 CB,Total Blackout,External,Trip,Flying Start,FCE Grid,Unplanned
77,2019-06-29 00:51:00,2019-06-29 12:06:00,LGS#1 Trip due to relay 186T transformer lock ...,Electrical,A1G1 CB,Thermal Plant Shorted,External,Trip by 186T Alarm,Started via 4/CS,FCE Grid,Unplanned
78,2019-10-16 05:57:00,2019-10-16 06:31:00,Total blackout,Electrical,A1L2 CB,Trip,External,Trip,Flying Start,FCE Grid,Unplanned
79,2019-12-14 19:14:00,2019-12-14 22:32:00,LGS#1 Trip to standstill due to PLC error.,Instrument,A1G1 CB,PLC Major Fault,External,Trip to Standstill,Started via HMI,FCE Grid,Unplanned


In [9]:
df_data_withtime.reset_index(drop=True, inplace=True)

In [17]:
def filter_noise_es(df, alpha=0.4, reduction=False):
    import copy
    new_df = copy.deepcopy(df)
    
    for column in df:
        new_df[column] = df[column].ewm(alpha=alpha, adjust=False).mean()
    
    if reduction:
        return new_df[::len(df)]  # Adjust sparsity if needed
    else:
        return new_df

def wgn_pandas(df_withtime, snr, alpha=0.15, window_size=120):
    df_no_timestamp = df_withtime.drop(columns=['TimeStamp'])
    noisy_df = pd.DataFrame(index=df_no_timestamp.index, columns=df_no_timestamp.columns)

    for start in range(0, len(df_no_timestamp), window_size):
        window = df_no_timestamp.iloc[start:start + window_size]
        
        min_window, max_window = window.min(), window.max()
        #x = (window - min_window) / (max_window - min_window + 1e-4)
        Ps = np.sum(np.power(window, 2), axis=0) / len(window)
        Pn = Ps / (np.power(10, snr / 10))

        noise = np.random.randn(*window.shape) * np.sqrt(Pn.values)
        noisy_window = window + (noise / 100)

        noisy_df.iloc[start:start + window_size] = noisy_window
    
    noisy_df.reset_index(drop=True, inplace=True)
    noisy_df = filter_noise_es(pd.DataFrame(noisy_df, columns=noisy_df.columns), alpha)

    df_timestamp = df_withtime['TimeStamp']
    df_timestamp.reset_index(drop=True, inplace=True)

    df_withtime = pd.concat([df_timestamp, noisy_df], axis=1)
    return df_withtime

df_noisy_wgn = wgn_pandas(df_data_withtime, 30, alpha=0.15)

In [18]:
feature_set = ['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'Stator core temperature', 'UGB metal temperature',
    'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant']

df_data_withtime = df_noisy_wgn[['TimeStamp'] + feature_set]

In [None]:
df_noisy_wgn

In [15]:
def filter_noise_ma(df, WS = 100,reduction = False):
    import copy
    new_df = copy.deepcopy(df)

    for column in df:
        new_df[column] = new_df[column].rolling(WS).mean()

    if reduction:
        return new_df.drop(df.index[:WS])[::WS]
    else:
        return new_df.drop(df.index[:WS])
        
df_no_timestamp = df_data_withtime.drop(columns=['TimeStamp'])
df_no_timestamp = torch.tensor(df_no_timestamp.values, dtype=torch.float16)

window_size = 120
mv_ws = 5
noise_level = 0.002

noisy_data = []
for start in range(0, len(df_no_timestamp), window_size):
    window = df_no_timestamp[start:start + window_size]
    
    # Add noise
    feature_noise_scale = noise_level * window.abs()
    noise = feature_noise_scale * torch.randn(window.shape, device=window.device)
    noisy_window = window + noise
    noisy_data.append(noisy_window)

noisy_data = torch.cat(noisy_data, dim=0).numpy()
noisy_data_df = filter_noise_ma(pd.DataFrame(noisy_data, columns=df_data_withtime.columns.drop('TimeStamp')), mv_ws)
noisy_data_df.reset_index(drop=True, inplace=True)

df_newtimestamp = df_data_withtime['TimeStamp'][mv_ws:]
df_newtimestamp.reset_index(drop=True, inplace=True)

df_data_withtime = pd.concat([df_newtimestamp, noisy_data_df], axis=1)

feature_set = ['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'Stator core temperature', 'UGB metal temperature',
    'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant']

df_data_withtime = df_data_withtime[['TimeStamp'] + feature_set]

In [None]:
# Set 1
# df_data_withtime = df_data_withtime[['TimeStamp', 'Active Power', 'Opening Wicked Gate', 'Penstock Flow', 'Turbine flow', 'Penstock pressure',
#        'Stator core temperature', 'Stator winding temperature 13', 'Stator winding temperature 14', 'Stator winding temperature 15',
#        'Surface Air Cooler Air Outlet Temperature', 'Surface Air Cooler Water Inlet Temperature', 'Surface Air Cooler Water Outlet Temperature',
#        'UGB cooling water flow']]

In [46]:
df_data_withtime = df_data_withtime[['TimeStamp', 'Active Power', 'Governor speed actual', 'Opening Wicked Gate', 'Penstock Flow', 'Turbine flow', 'Penstock pressure', 'Governor Penstock Pressure',
       'UGB X displacement', 'UGB Y displacement', 'LGB X displacement', 'LGB Y displacement', 'TGB X displacement', 'TGB Y displacement',
       'Stator winding temperature 13', 'Stator winding temperature 14', 'Stator winding temperature 15',
       'Surface Air Cooler Air Outlet Temperature',
       'Surface Air Cooler Water Inlet Temperature',
       'Surface Air Cooler Water Outlet Temperature',
       'Stator core temperature', 'UGB metal temperature',
       'LGB metal temperature 1', 'LGB metal temperature 2',
       'LGB oil temperature', 'UGB cooling water flow', 'LGB cooling water flow',
       'Generator cooling water flow', 'UGB Oil Contaminant', 'Gen Thrust Bearing Oil Contaminant']]

In [17]:
df_data_withtime = df_data_withtime.fillna(method='ffill')

In [16]:
# df_data_withtime = df_data_withtime.rename(columns={'TimeStamp': 'date'})
# df_data_withtime.reset_index(drop=True, inplace=True)
# df_data_withtime.to_csv("data_pi/set1.csv", index=False)

In [18]:
df_data_withtime.shape

(1496052, 31)

In [19]:
df_label = pd.DataFrame({
    'TimeStamp': df_data_withtime['TimeStamp'],
    'label_anomaly': 0
})

for _, row in df_anomaly.iterrows():
    start_time = row['Start Time']
    end_time = row['End Time']
    pre_start_time = start_time - pd.Timedelta(hours=3)
    
    df_label.loc[
        (df_label['TimeStamp'] >= pre_start_time) & 
        (df_label['TimeStamp'] < start_time), 
        'label_anomaly'
    ] = 1
    
    # Remove timestamps between [start_time, end_time) from df_label and df_data_withtime
    mask_remove = (df_data_withtime['TimeStamp'] >= start_time) & (df_data_withtime['TimeStamp'] < end_time)
    df_data_withtime = df_data_withtime.loc[~mask_remove]
    df_label = df_label.loc[~mask_remove]

# Reset index of the resulting DataFrames if necessary
df_data_withtime.reset_index(drop=True, inplace=True)
df_label.reset_index(drop=True, inplace=True)


In [28]:
df_no_timestamp = df_data_withtime.drop(columns=['TimeStamp'], errors='ignore')
df_no_timestamp = torch.tensor(df_no_timestamp.values)

window_size=360
noisy_data = []
for start in range(0, len(df_no_timestamp), window_size):
    window = df_no_timestamp[start:start + window_size]

    feature_noise_scale = 0.001 * window.abs()
    noise = feature_noise_scale * torch.randn(window.shape, device=window.device)
    noisy_window = window + noise
    noisy_data.append(noisy_window)

noisy_data = torch.cat(noisy_data, dim=0).numpy()
new_df = pd.concat( [df_data_withtime['TimeStamp'].reset_index(drop=True), pd.DataFrame(noisy_data, columns=df_data_withtime.columns.drop('TimeStamp'))],
            axis=1
        )

In [32]:
len(df_label)

1469549

In [20]:
new_df = df_data_withtime

In [21]:
sampels = int(len(new_df) * 0.8)
train_data = new_df[:sampels]
test_data = new_df[sampels:]
test_label = df_label[sampels:]

train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)
test_label.reset_index(drop=True, inplace=True)

train_data.to_csv("/run/media/fourier/Data2/Pras/Vale/TN_Anom/DTAAD/data/CustomAWGN30ES15/train.csv", index=False)
test_data.to_csv("/run/media/fourier/Data2/Pras/Vale/TN_Anom/DTAAD/data/CustomAWGN30ES15/test.csv", index=False)
test_label.to_csv("/run/media/fourier/Data2/Pras/Vale/TN_Anom/DTAAD/data/CustomAWGN30ES15/test_label.csv", index=False)

In [48]:
train_data.shape

(416100, 30)

In [46]:
test_label.shape

(104026, 2)

In [47]:
test_data.shape

(104026, 30)

In [3]:
df_data_withtime = df_data_withtime[~df_data_withtime['Load_Type'].isin(['No Load', 'Shutdown', 'Warming'])]

mask = (df_data_withtime['TimeStamp'] >= '2020-01-01 00:00:00')
df_data_withtime = df_data_withtime.loc[mask]
df_data_withtime = df_data_withtime.fillna(method='ffill')

In [4]:
df_data_withtime = df_data_withtime.drop(['TimeStamp', 'Load_Type'], axis=1)

#min_max_scaler = preprocessing.MinMaxScaler()
#df_data = pd.DataFrame(min_max_scaler.fit_transform(df_data_withtime.values), columns=df_data_withtime.columns)

# df_data = df_data_withtime[['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
#        'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
#        'TGB Y displacement', 'Stator winding temperature 13',
#        'Stator winding temperature 14', 'Stator winding temperature 15',
#        'Surface Air Cooler Air Outlet Temperature',
#        'Surface Air Cooler Water Inlet Temperature',
#        'Surface Air Cooler Water Outlet Temperature',
#        'Stator core temperature', 'UGB metal temperature',
#        'LGB metal temperature 1', 'LGB metal temperature 2',
#        'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
#        'UGB cooling water flow', 'LGB cooling water flow',
#        'Generator cooling water flow', 'Governor Penstock Pressure',
#        'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
#        'Gen Thrust Bearing Oil Contaminant']]

df_data = df_data_withtime

In [6]:
hidden_size = 256
hidden_layer_depth = 1
latent_length = 128
batch_size = 192
learning_rate = 8e-4
n_epochs = 80
dropout_rate = 0.1
optimizer = 'Adam' # options: Adam, SGD
cuda = True # options: True, False
print_every=150
clip = True # options: True, False
max_grad_norm=5
loss_type = 'MSELoss' # options: SmoothL1Loss, MSELoss
block = 'LSTM' # options: LSTM, GRU
dload = './model_dir' #download directory
train_size = 0.8

measured_horizon = 12 * 60
predicted_horizon = 0 * 60

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sampels = int(len(df_data) * train_size) # use a parameter to control training size
train_data = df_data[:sampels]
test_data = df_data[sampels:]

train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

train_data.to_csv("data_pi/train2.csv")
test_data.to_csv("data_pi/test2.csv")

In [8]:
train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

In [9]:
train_data.to_csv("data_pi/train2.csv")
test_data.to_csv("data_pi/test2.csv")

In [10]:
test_data = pd.read_csv('/run/media/fourier/Data2/Pras/Vale/Anomaly-Transformer/data_pi/train2.csv')

In [11]:
test_data.values[:, 1:].shape

(1683072, 33)

In [13]:
test_data.values[:, 1:].shape

(1683072, 33)

In [74]:
def filter_noise_ma(df, WS = 100,reduction = False):
    import copy
    new_df = copy.deepcopy(df)

    for column in df:
        new_df[column] = new_df[column].rolling(WS).mean()

    if reduction:
        return new_df.drop(df.index[:WS])[::WS]
    else:
        return new_df.drop(df.index[:WS])

df_data_withtime = pd.read_pickle("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/my_data_5thn_olah.pickle")
mask = (df_data_withtime['TimeStamp'] >= '2023-01-01 00:00:00')
df_data_withtime = df_data_withtime.loc[mask]

for column_name in df_data_withtime.columns:
    if column_name != 'Load_Type' and column_name != 'TimeStamp':
        df_data_withtime[column_name] = pd.to_numeric(df_data_withtime[column_name], downcast='float')

df_data_withtime = df_data_withtime.fillna(method='ffill')
df_data_withtime.reset_index(drop=True, inplace=True)

df_no_timestamp = df_data_withtime.drop(columns=['TimeStamp'])
df_no_timestamp = torch.tensor(df_no_timestamp.values, dtype=torch.float16)

window_size = 120
mv_ws = 10
noise_level = 0.001

noisy_data = []
for start in range(0, len(df_no_timestamp), window_size):
    window = df_no_timestamp[start:start + window_size]
    feature_noise_scale = noise_level * window.abs()
    noise = feature_noise_scale * torch.randn(window.shape, device=window.device)
    noisy_window = window + noise
    noisy_data.append(noisy_window)

noisy_data = torch.cat(noisy_data, dim=0).numpy()
noisy_data_df = filter_noise_ma(pd.DataFrame(noisy_data, columns=df_data_withtime.columns.drop('TimeStamp')), mv_ws)
noisy_data_df.reset_index(drop=True, inplace=True)

timestamp_toconcat = df_data_withtime['TimeStamp'].reset_index(drop=True)[mv_ws:]
timestamp_toconcat.reset_index(drop=True, inplace=True)

df_data_preprocess = pd.concat([timestamp_toconcat, noisy_data_df], axis=1)

print(df_data_preprocess.isna().sum())

df_data = df_data_preprocess[['TimeStamp', 'Active Power', 'Governor speed actual', 'Penstock Flow', 'Turbine flow', 'Opening Wicked Gate', 'Penstock pressure', 
       'UGB X displacement', 'UGB Y displacement', 'LGB X displacement', 'LGB Y displacement', 'TGB X displacement', 'TGB Y displacement', 
       'Stator winding temperature 13', 'Stator winding temperature 14', 'Stator winding temperature 15', 'Stator core temperature',
       'Surface Air Cooler Air Outlet Temperature', 'Surface Air Cooler Water Inlet Temperature', 'Surface Air Cooler Water Outlet Temperature',
       'UGB metal temperature', 'LGB metal temperature 1', 'LGB metal temperature 2',
       'UGB cooling water flow', 'LGB cooling water flow',
       'Generator cooling water flow', 'UGB Oil Contaminant', 
       'LGB oil temperature', 'Gen Thrust Bearing Oil Contaminant']]

df_data.reset_index(drop=True, inplace=True)
df_data = df_data.rename(columns={'TimeStamp': 'date'})

TimeStamp                                      0
Governor speed actual                          0
UGB X displacement                             0
UGB Y displacement                             0
LGB X displacement                             0
LGB Y displacement                             0
TGB X displacement                             0
TGB Y displacement                             0
Stator winding temperature 13                  0
Stator winding temperature 14                  0
Stator winding temperature 15                  0
Surface Air Cooler Air Outlet Temperature      0
Surface Air Cooler Water Inlet Temperature     0
Surface Air Cooler Water Outlet Temperature    0
Stator core temperature                        0
UGB metal temperature                          0
LGB metal temperature 1                        0
LGB metal temperature 2                        0
LGB oil temperature                            0
Penstock Flow                                  0
Turbine flow        

In [75]:
df_data.reset_index(drop=True, inplace=True)
df_data.to_csv("data_pi/train_2023_fore.csv", index=False)

In [77]:
df_data.shape

(525590, 29)

In [58]:
len(['TimeStamp', 'Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'Stator core temperature', 'UGB metal temperature',
    'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant'])

31

In [70]:
a = ['Active Power', 'Reactive Power', 'Governor speed actual',
       'Penstock Flow', 'Turbine flow', 'Opening Wicked Gate',
       'Penstock pressure', 'UGB X displacement', 'UGB Y displacement',
       'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
       'TGB Y displacement', 'Stator winding temperature 13',
       'Stator winding temperature 14', 'Stator winding temperature 15',
       'Stator core temperature', 'Surface Air Cooler Air Outlet Temperature',
       'Surface Air Cooler Water Inlet Temperature',
       'Surface Air Cooler Water Outlet Temperature', 'UGB metal temperature',
       'LGB metal temperature 1', 'LGB metal temperature 2',
       'UGB cooling water flow', 'LGB cooling water flow',
       'Generator cooling water flow', 'UGB Oil Contaminant',
       'LGB oil temperature', 'Gen Thrust Bearing Oil Contaminant']

In [71]:
b = ['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15', 'Stator core temperature',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'UGB metal temperature', 'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant']

In [72]:
c = [val for val in a if val not in b]

In [73]:
c

[]