In [1]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import os
import pandas as pd

from TSB_UAD.utils.max_mean_discrepancy import calculate_mmd
from TSB_UAD.utils.shapelets import calculate_shapelet_similarity
from TSB_UAD.utils.dtw import compute_dtw_distance
from TSB_UAD.utils.slidingWindows import find_length

In [2]:
main_directory = 'data/TSB-UAD-Synthetic/MGAB_add_random_walk_trend_0.2/'
main_files = [f for f in sorted(os.listdir(main_directory))]

directory = 'data/TSB-UAD-Synthetic/YAHOO_add_random_walk_trend_0.1/'
files = [f for f in sorted(os.listdir(directory))]
print("Total number of time series:", len(files))

for i in range(0, len(main_files)):
    results = {'name':[], 'time_series_length': [], 'window_length':[], 'num_of_anomalies':[], 'ts_comparison_name':[], 'mmd_value':[], 'dwt_value':[], 'shapelet_similarity':[]}
    result_name = 'MGAB_add_random_walk_trend_0.2_' + main_files[i] + '.csv'
    print(result_name)
    df1 = pd.read_csv(main_directory + main_files[i], header=None).dropna().to_numpy()
    max_length = 10000
    starting_point = 40000

    data1 = df1[starting_point:max_length+starting_point,0].astype(float)
    label1 = df1[starting_point:max_length+starting_point,1].astype(int)


    slidingWindow1 = find_length(data1)

    for j in range(0, len(files)):
        df2 = pd.read_csv(directory + files[j], header=None).dropna().to_numpy()

        data2 = df2[:max_length,0].astype(float)
        label2 = df2[:max_length,1].astype(int)

        slidingWindow2 = find_length(data2)

        mmd_value = calculate_mmd(data1, data2, gamma=1.0)
        dtw_value = compute_dtw_distance(data1, data2)
        shapelet_value = calculate_shapelet_similarity(data1, data2)

        results['name'].append('MGAB_add_random_walk_trend_0.2_'+main_files[i])
        results['time_series_length'].append(len(data1))
        results['window_length'].append(slidingWindow1)
        results['num_of_anomalies'].append(list(label1).count(1))
        results['ts_comparison_name'].append('YAHOO_add_random_walk_trend_0.1_'+files[j])
        results['mmd_value'].append(mmd_value)
        results['dwt_value'].append(dtw_value)
        results['shapelet_similarity'].append(shapelet_value)

        # print(files[i], mmd_value, dtw_value, shapelet_value)
        print('Time series', j, 'has completed')

    results_df = pd.DataFrame.from_dict(results)
    results_df.to_csv(result_name, sep=';')
    print('File saved')


Total number of time series: 367
MGAB_add_random_walk_trend_0.2_1.test.out.csv
Time series 0 has completed
Time series 1 has completed
Time series 2 has completed
Time series 3 has completed
Time series 4 has completed
Time series 5 has completed
Time series 6 has completed
Time series 7 has completed
Time series 8 has completed
Time series 9 has completed
Time series 10 has completed
Time series 11 has completed
Time series 12 has completed
Time series 13 has completed
Time series 14 has completed
Time series 15 has completed
Time series 16 has completed
Time series 17 has completed
Time series 18 has completed
Time series 19 has completed
Time series 20 has completed
Time series 21 has completed
Time series 22 has completed
Time series 23 has completed
Time series 24 has completed
Time series 25 has completed
Time series 26 has completed
Time series 27 has completed
Time series 28 has completed
Time series 29 has completed
Time series 30 has completed
Time series 31 has completed
Tim

In [2]:
def find_minimum_row(folder_path, value_column):
    """
    Finds the row with the minimum value in the specified column across multiple CSV files.

    Parameters:
        folder_path (str): Path to the folder containing the CSV files.
        value_column (str): Name of the column to search for the minimum value.

    Returns:
        pd.Series or None: The row with the minimum value, or None if no valid data is found.
    """
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    if not csv_files:
        print("No CSV files found in the folder.")
        return None

    min_row = None
    min_value = float('inf')

    for file in csv_files:
        file_path = os.path.join(folder_path, file)

        try:
            # Read the CSV file
            df = pd.read_csv(file_path,sep=';')

            if value_column not in df.columns:
                print(f"Skipping {file}: Column '{value_column}' not found.")
                continue

            # Convert the column to numeric, setting invalid values to NaN
            df[value_column] = pd.to_numeric(df[value_column], errors='coerce')

            # Drop rows with NaN in the value column
            valid_df = df.dropna(subset=[value_column])

            if valid_df.empty:
                print(f"Skipping {file}: No valid data in column '{value_column}'.")
                continue

            # Find the row with the minimum value in the column
            current_min_row = valid_df.loc[valid_df[value_column].idxmin()]

            # print(current_min_row[value_column], current_min_row['num_of_anomalies'])

            if current_min_row[value_column] < min_value and current_min_row['num_of_anomalies'] !=0:
                min_value = current_min_row[value_column]
                min_row = current_min_row

        except Exception as e:
            print(f"Error processing {file}: {e}")

    if min_row is None:
        print("No valid data found across all files.")
        return None

    return min_row


In [3]:
folder_path = [
    "MGAB_add_point_outlier_0.02", 
    "MGAB_add_point_outlier_0.05", 
    "MGAB_add_random_walk_trend_0.1",
    "MGAB_add_random_walk_trend_0.2", 
    "MGAB_add_white_noise_0.5",
    "MGAB_add_white_noise_1.0",
    "MGAB_filter_fft_11",
    "MGAB_filter_fft_21"
]
column = 'mmd_value'  # dwt_value mmd_value
res = []
for file in folder_path:
    res.append(find_minimum_row(file, column))

In [4]:
for i in res:
    print(i['name'], '-', i['ts_comparison_name'])
    print('================================================================================')

MGAB_add_point_outlier_002_10.test.out - Yahoo_A1real_60_data.out
MGAB_add_point_outlier_005_4.test.out - YAHOO_add_point_outlier_005_Yahoo_A1real_60_data.out
MGAB_add_random_walk_trend_0.1_5.test.out - YAHOO_add_random_walk_trend_0.1_Yahoo_A1real_3_data.out
MGAB_add_random_walk_trend_0.2_5.test.out - YAHOO_add_random_walk_trend_0.2_Yahoo_A1real_54_data.out
MGAB_add_white_noise_0.5_9.test.out - YAHOO_add_white_noise_0.5_Yahoo_A1real_60_data.out
MGAB_add_white_noise_1_2.test.out - YAHOO_add_white_noise_1_Yahoo_A1real_60_data.out
MGAB_filter_fft_11_2.test.out - YAHOO_filter_fft_11_Yahoo_A1real_60_data.out
MGAB_filter_fft_21_2.test.out - YAHOO_filter_fft_11_Yahoo_A1real_60_data.out
