# Test the Secure Water Treatment (SWaT) dataset

In [1]:
import pandas as pd
import plotly.graph_objects as go

In [None]:
df = pd.read_excel(r"../data/raw/SWaT/SWaT.A1 _ A2_Dec 2015/Physical/SWaT_Dataset_Normal_v1.xlsx",
                   header=1,
                   index_col=0)
df.head()


In [None]:
df["Normal/Attack"].unique()

In [6]:
df.drop("Normal/Attack", axis=1, inplace=True)

In [None]:
df.head()

In [None]:
# translate index to DatetimeIndex
df.index = pd.to_datetime(df.index, format=" %d/%m/%Y %I:%M:%S %p")
df.index

In [None]:
# shopw all columns
pd.set_option('display.max_columns', None)
df.describe()

In [None]:
# df = pd.read_parquet("../data/processed/SWaT_Dataset_Normal_v1.parquet")
df.head()

In [None]:
downsample_factor = 100
df_downsampled = df.iloc[::downsample_factor]

fig = go.Figure()
for col in df_downsampled.columns:
    fig.add_trace(
        go.Scatter(
            x=df_downsampled.index,
            y=df_downsampled[col],
            mode='lines',
            name=col
        )
    )
index_label = "Datetime" if isinstance(df_downsampled.index, pd.DatetimeIndex) else "Index"
fig.update_layout(
    title=f"SWaT Normal | original shape: {df.shape} | plot downsample factor: {downsample_factor}",
    xaxis_title=index_label,
    yaxis_title="Value",
    legend=dict(
        title="Columns",
        orientation="v",
        x=1.02,
        xanchor="left",
        y=1
    )
)
fig.show()

In [None]:
df.to_parquet(r"../data/processed/SWaT_Dataset_Normal_v1.parquet")

In [None]:
df_a = pd.read_excel(r"../data/raw/SWaT/SWaT.A1 _ A2_Dec 2015/Physical/SWaT_Dataset_Attack_v0.xlsx",
                   header=1,
                   index_col=0)
df_a.index = pd.to_datetime(df_a.index, format=" %d/%m/%Y %I:%M:%S %p")
df_a.head()

In [None]:
downsample_factor = 100
df_downsampled = df_a.iloc[::downsample_factor]

fig = go.Figure()
for col in df_downsampled.columns:
    fig.add_trace(
        go.Scatter(
            x=df_downsampled.index,
            y=df_downsampled[col],
            mode='lines',
            name=col
        )
    )
index_label = "Datetime" if isinstance(df_downsampled.index, pd.DatetimeIndex) else "Index"
fig.update_layout(
    title=f"SWaT Attack | original shape: {df.shape} | plot downsample factor: {downsample_factor}",
    xaxis_title=index_label,
    yaxis_title="Value",
    legend=dict(
        title="Columns",
        orientation="v",
        x=1.02,
        xanchor="left",
        y=1
    )
)
fig.show()

In [None]:
df_a.to_parquet(r"../data/processed/SWaT_Dataset_Attack_v0.parquet")


In [3]:
df = pd.read_parquet(r"../data/processed/SWaT_Dataset_Normal_v1.parquet")

In [None]:
df.shape

In [None]:
continuous_threshold = 3
continuous_features = [feature for feature in df.columns if df[feature].nunique() > continuous_threshold]
discrete_features = [feature for feature in df.columns if df[feature].nunique() <= continuous_threshold]

print(f"Continuous features: {len(continuous_features)}")
print(f"Discrete features: {len(discrete_features)}")
        