In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import datetime
import numpy as np
from tornado.httputil import format_timestamp


#TODO check if there are no anomalies that pass midnight end date has differnt date to start date.
def extract_attack_timeline(excel_file_path):
    # Read the Excel file
    df = pd.read_excel(excel_file_path)
    df = df[:41]
    # Convert start time column to datetime format
    df.iloc[:, 1] = pd.to_datetime(df.iloc[:, 1])

    # Create a nested list of attack times [start_time, end_time]
    attack_timeline = [None] * len(df)  # Pre-allocate list with None values
    for idx, row in df.iterrows():
        start_time = row.iloc[1]  # Get the start time from the second column
        end_time = row.iloc[2]    # Get the end time from the third column

        # Check if end_time is valid
        if pd.notna(end_time):
            # If end_time is a datetime.time object, convert it to timestamp
            if isinstance(end_time, datetime.time):
                # Combine date from start_time with time from end_time
                end_time = pd.Timestamp.combine(start_time.date(), end_time)
            # If it's already a timestamp or datetime but needs date adjustment
            elif hasattr(end_time, 'time') and not isinstance(end_time, pd.Timestamp):
                # Convert to timestamp while preserving the time component
                end_time = pd.Timestamp(
                    year=start_time.year,
                    month=start_time.month,
                    day=start_time.day,
                    hour=end_time.hour,
                    minute=end_time.minute,
                    second=end_time.second
                )

            attack_timeline[idx] = [start_time, end_time]

    # Filter out None values for the final result while preserving indexes
    attack_timeline = [(i, attack) for i, attack in enumerate(attack_timeline) if attack is not None]

    return attack_timeline

# Example usage
if __name__ == "__main__":
    file_path = "/home/andreas/Thesis/datasets/SWAT/SWaT.A1 & A2_Dec 2015/List_of_attacks_Final_swat.xlsx"
    anomaly_times = extract_attack_timeline(file_path)

    # Print the attack timeline with original indexes for verification

In [6]:
#TODO check which graph which anomaly it goes wrong
# for i in range(1,10):
#     fp_timestamps = return_series(f'~/baselines/GDN/results/swat/fp_timesteps{i}.txt')
#     fn_timestamps = return_series(f'~/baselines/GDN/results/swat/fn_timesteps{i}.txt')
#     for index,[start,end] in anomaly_times:
#         wrong_fns=fn_timestamps[(fn_timestamps <= start) & (fn_timestamps >= end)]
#         wrong_fps=fp_timestamps[(fp_timestamps >= start) & (fp_timestamps <= end)]
#         print(wrong_fns)
#         print(f'fp {wrong_fps}')

In [17]:
wrong_fps

Series([], dtype: datetime64[ns])

In [None]:


def visualize_anomaly_timeline(full_timeline_start, full_timeline_end,
                              anomaly_start, anomaly_end,
                              anomaly_description,
                              fp_timestamps=None, fn_timestamps=None):
    """
    Create a timeline visualization highlighting a specific anomaly period.

    Parameters:
    -----------
    full_timeline_start : datetime
        Start time of the full visualization period
    full_timeline_end : datetime
        End time of the full visualization period
    anomaly_start : datetime
        Start time of the specific anomaly
    anomaly_end : datetime
        End time of the specific anomaly
    anomaly_description : str
        Description of the anomaly to use as title
    fp_timestamps : list or pandas.Series, optional
        Timestamps of false positives to plot
    fn_timestamps : list or pandas.Series, optional
        Timestamps of false negatives to plot

    Returns:
    --------
    fig : matplotlib.figure.Figure
        The figure object for the visualization
    """
    # Create figure and axis
    fig, ax = plt.subplots(figsize=(12, 5))

    # Calculate timeline durations
    full_duration = (full_timeline_end - full_timeline_start).total_seconds() / 60  # in minutes
    anomaly_duration = (anomaly_end - anomaly_start).total_seconds() / 60  # in minutes

    # Plot the full timeline as a gray bar
    ax.axhspan(0.4, 0.6, xmin=0, xmax=1, color='lightgray', alpha=0.5, label='Timeline')

    # Calculate the relative position of the anomaly in the full timeline
    anomaly_start_rel = (anomaly_start - full_timeline_start).total_seconds() / (full_timeline_end - full_timeline_start).total_seconds()
    anomaly_end_rel = (anomaly_end - full_timeline_start).total_seconds() / (full_timeline_end - full_timeline_start).total_seconds()

    # Plot the anomaly period as a red bar
    ax.axhspan(0.4, 0.6, xmin=anomaly_start_rel, xmax=anomaly_end_rel, color='red', alpha=0.7, label='Anomaly')

    # Add start and end markers for the anomaly
    ax.axvline(x=anomaly_start, color='red', linestyle='--', lw=1.5, label='Anomaly Start/End')
    ax.axvline(x=anomaly_end, color='red', linestyle='--', lw=1.5)

    # Plot false positive and false negative markers if provided
    if fp_timestamps is not None:
        for ts in pd.Series(fp_timestamps):
            if full_timeline_start <= ts <= full_timeline_end:
                ax.scatter(ts, 0.7, color='orange', s=50, marker='|', label='False Positive' if ts == fp_timestamps[0] else "")

    if fn_timestamps is not None:
        for ts in pd.Series(fn_timestamps):
            if full_timeline_start <= ts <= full_timeline_end:
                ax.scatter(ts, 0.3, color='blue', s=50, marker='|', label='False Negative' if ts == fn_timestamps[0] else "")

    # Add labels for anomaly start and end times
    ax.text(anomaly_start, 0.2, anomaly_start.strftime('%H:%M:%S'),
            ha='center', va='bottom', color='red', fontweight='bold')
    ax.text(anomaly_end, 0.2, anomaly_end.strftime('%H:%M:%S'),
            ha='center', va='bottom', color='red', fontweight='bold')

    # Add duration text
    duration_text = f"Duration: {anomaly_duration:.1f} min"
    ax.text((anomaly_start + (anomaly_end - anomaly_start)/2), 0.5,
            duration_text, ha='center', va='center',
            color='white', fontweight='bold')

    # Format the x-axis to show time properly
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=max(1, int(full_duration/10))))

    # Customize the plot
    ax.set_xlim(full_timeline_start, full_timeline_end)
    ax.set_ylim(0, 1)
    ax.set_yticks([])  # Hide y-axis ticks
    ax.set_title(f"Anomaly: {anomaly_description}", fontsize=14, pad=20)
    ax.set_xlabel('Time', fontsize=12)

    # Add legend
    handles, labels = [], []
    for handle, label in zip(*ax.get_legend_handles_labels()):
        if label not in labels:
            handles.append(handle)
            labels.append(label)
    ax.legend(handles, labels, loc='upper right')

    # Add attack details box
    attack_details = (
        f"Anomaly Details:\n"
        f"Start: {anomaly_start.strftime('%Y-%m-%d %H:%M:%S')}\n"
        f"End: {anomaly_end.strftime('%Y-%m-%d %H:%M:%S')}\n"
        f"Duration: {anomaly_duration:.1f} minutes"
    )
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(0.02, 0.98, attack_details, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', bbox=props)

    plt.tight_layout()
    return fig

In [6]:
def visualise_for_one_graph(anomaly_timestamps,graph_id):
    fp_times = return_series(f'~/baselines/GDN/results/swat/fp_timesteps{graph_id}.txt')
    fn_times = return_series(f'~/baselines/GDN/results/swat/fn_timesteps{graph_id}.txt')
    for attack in anomaly_timestamps:
        attack_id = attack[0]
        attack_start = attack[1][0]
        attack_end = attack[1][1]
        vis_start_time = attack_start - datetime.timedelta(minutes=10)
        vis_end_time = attack_end + datetime.timedelta(minutes=10)
        superfig = visualize_anomaly_timeline(vis_start_time,vis_end_time,attack_start,attack_end,f'attack {attack_id}',fp_times,fn_times)
        plt.show()
        plt.savefig(f"anomaly_timeline_graph{graph_id}_attack{attack_id}")

visualise_for_one_graph(anomaly_times,1)

NameError: name 'return_series' is not defined

In [7]:
#utils
def time_plus(time, timedelta):
    start = datetime.datetime(
        2000, 1, 1,
        hour=time.hour, minute=time.minute, second=time.second)
    end = start + timedelta
    return end.time()

def return_timestamps(file_path):
    # Read file directly with pandas, without using the first column as index
    df = pd.read_csv(file_path, header=None, names=['id', 'datetime'])

    # Convert to datetime
    df['datetime'] = pd.to_datetime(df['datetime'], format='%d/%m/%Y %H:%M:%S')

    # Create Series with default index (0, 1, 2, ...)
    return pd.Series(df['datetime'].values)


def return_ids(file_path):
    # Read file directly with pandas, without using the first column as index
    df = pd.read_csv(file_path, header=None, names=['id', 'datetime'])
    # Create Series with default index (0, 1, 2, ...)
    return pd.Series(df['id'].values)

In [8]:
fn_ids = return_ids('~/Thesis/baselines/GDN/results/swat/fn_timesteps1.txt')


In [9]:
fn_ids

0         170
1         171
2         172
3         173
4         174
        ...  
2140    44510
2141    44511
2142    44512
2143    44513
2144    44514
Length: 2145, dtype: int64

In [11]:
test_set = pd.read_csv('/home/andreas/Thesis/baselines/GDN/data/swat/test.csv')

In [19]:
for i in range(1,10):
    fp_ids = return_ids(f'~/Thesis/baselines/GDN/results/swat/fp_timesteps{i}.txt')
    fn_ids = return_ids(f'~/Thesis/baselines/GDN/results/swat/fn_timesteps{i}.txt')
    fn_ids_table = test_set.iloc[fn_ids]
    fp_ids_table = test_set.iloc[fp_ids]
    print(f'number of false negatives in non-attack time {len(fn_ids_table[fn_ids_table['attack']==0])}')
    print(f'number of false positives in attack time {len(fp_ids_table[fp_ids_table['attack']==1])}')

number of false negatives in non-attack time 162
number of false positives in attack time 14
number of false negatives in non-attack time 171
number of false positives in attack time 25
number of false negatives in non-attack time 157
number of false positives in attack time 30
number of false negatives in non-attack time 161
number of false positives in attack time 26
number of false negatives in non-attack time 154
number of false positives in attack time 19
number of false negatives in non-attack time 163
number of false positives in attack time 14
number of false negatives in non-attack time 158
number of false positives in attack time 23
number of false negatives in non-attack time 164
number of false positives in attack time 22
number of false negatives in non-attack time 115
number of false positives in attack time 35
