In [None]:
import math
import copy
import pandas as pd
import numpy as np
from collections import Counter
import pickle
import json
import re
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from category_encoders import one_hot
# import cudf
# from cuml.decomposition import PCA as cumlPCA
import matplotlib.pyplot as plt
from lib.utilities import Repository



In [None]:
repo = Repository('./session_repositories/actions.tsv','./session_repositories/displays.tsv','./raw_datasets/')

In [None]:
# def mac_to_int(mac):
#     res = re.match('^((?:(?:[0-9a-f]{2}):){5}[0-9a-f]{2})$', mac.lower())
#     if res is None:
#         raise ValueError('invalid mac address')
#     # return int(res.group(0).replace(':', ''), 16)
#     return ','.join([str(int(x, 16)) for x in res.group(0).split(":")])

def mac_seperation(mac):
    res = re.match('^((?:(?:[0-9a-f]{2}):){5}[0-9a-f]{2})$', mac.lower())
    if res is None:
        raise ValueError('invalid mac address')
    # print(res.group(0))
    return ','.join([res.group(0)[:8], res.group(0)[9:]])

def timestamp_decompose(timestamp):
    timestamp_obj = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
    return f'{timestamp_obj.year},{timestamp_obj.month},{timestamp_obj.day},{timestamp_obj.hour},{timestamp_obj.minute},{timestamp_obj.second}'

def alter_df_columns(df, drop_pid=True):
    if 'ip_dst' in df.columns:
        df[['ip_dst_0', 'ip_dst_1', 'ip_dst_2', 'ip_dst_3']] = df['ip_dst'].fillna('x.x.x.x').str.split('.', expand=True, n=4).replace('x', np.nan).astype('float32')
        df = df.drop(columns=['ip_dst'])

    if 'ip_dst_count' in df.columns:
        df = df.assign(**{x: df['ip_dst_count'] for x in ['ip_dst_0_count', 'ip_dst_1_count', 'ip_dst_2_count', 'ip_dst_3_count']})
        df = df.drop(columns=['ip_dst_count'])
        
    if 'ip_src' in df.columns:
        df[['ip_src_0', 'ip_src_1', 'ip_src_2', 'ip_src_3']] = df['ip_src'].fillna('x.x.x.x').str.split('.', expand=True, n=4).replace('x', np.nan).astype('float32')
        df = df.drop(columns=['ip_src'])

    if 'ip_src_count' in df.columns:
        df = df.assign(**{x: df['ip_src_count'] for x in ['ip_src_0_count', 'ip_src_1_count', 'ip_src_2_count', 'ip_src_3_count']})
        df = df.drop(columns=['ip_src_count'])

    if 'eth_dst' in df.columns:
        df['eth_dst_tokens'] = df.apply(lambda x: mac_seperation(x.eth_dst), axis=1)
        df[['eth_dst_oui', 'eth_dst_di']] = df['eth_dst_tokens'].str.split(',', expand=True, n=2)
        df = df.drop(columns=['eth_dst', 'eth_dst_tokens'])

    if 'eth_dst_count' in df.columns:
        df = df.assign(**{x: df['eth_dst_count'] for x in ['eth_dst_oui_count', 'eth_dst_di_count']})
        df = df.drop(columns=['eth_dst_count'])

    if 'eth_src' in df.columns:
        df['eth_src_tokens'] = df.apply(lambda x: mac_seperation(x.eth_src), axis=1)
        df[['eth_src_oui', 'eth_src_di']] = df['eth_src_tokens'].str.split(',', expand=True, n=2)
        df = df.drop(columns=['eth_src', 'eth_src_tokens'])

    if 'eth_src_count' in df.columns:
        df = df.assign(**{x: df['eth_src_count'] for x in ['eth_src_oui_count', 'eth_src_di_count']})
        df = df.drop(columns=['eth_src_count'])

    if 'sniff_timestamp' in df.columns:
        df['timestamp_tokens'] = df.apply(lambda x: timestamp_decompose(x.sniff_timestamp), axis=1)
        df[['year', 'month', 'day', 'hour', 'minutes', 'seconds']] = df['timestamp_tokens'].str.split(',', expand=True, n=6).astype('float32')
        df = df.drop(columns=['sniff_timestamp', 'timestamp_tokens'])

    if 'sniff_timestamp_count' in df.columns:
        df = df.assign(**{x: df['sniff_timestamp_count'] for x in ['year_count', 'month_count', 'day_count', 'hour_count', 'minutes_count', 'seconds_count']})
        df = df.drop(columns=['sniff_timestamp_count'])

    if 'interface_captured' in df.columns:
        df = df.drop(columns=['interface_captured'])

    if drop_pid:
        if 'project_id' in df.columns:
            df = df.drop(columns=['project_id'])

    return df

def replace_numbers_and_uris(string):
    # Replace IP addresses with <IP>
    string = re.sub(r"\b(?:\d{1,3}\.){3}\d{1,3}\b", "<IP>", string)
    # Replace MAC addresses with <MAC>
    string = re.sub(r"\b([0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\b", "<MAC>", string)
    
    # Replace numbers (decimal and hex) except IP and MAC addresses
    string = re.sub(r"\b(0x[a-fA-F0-9]+|\d+)\b", "<NUMBER>", string)
    
    # Replace only the URI part after HTTP request methods (GET, POST, etc.)
    uri_pattern = r"\b(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH)\s+([^\s]+)"
    string = re.sub(uri_pattern, r"\1 <URI>", string)
    # Replace other general URIs (ftp, file, mailto, etc.)
    string = re.sub(r"\b(?:[a-zA-Z][a-zA-Z\d+.-]*):\/\/[^\s]+(?:\s+[^\s]+)*", "<URI>", string)
    
    
    # Replace domain names with <DOMAIN>
    string = re.sub(r"\b(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}\b", "<DOMAIN>", string)
    return string

def extract_templates(strings):
    templates = [replace_numbers_and_uris(s) for s in strings]
    unique_templates = set(templates)
    return unique_templates

def identify_template(new_string, templates):
    processed_new_string = replace_numbers_and_uris(new_string)
    if processed_new_string in templates:
        return processed_new_string
    else:
        return None

In [None]:
# for i in range(4):
#     print(repo.data[i].shape)
#     print()

#     for col in repo.data[i].columns:
#         print(col, repo.data[i][col].nunique())

In [None]:
df_list = []
for i in range(4):
    df_list.append(repo.data[i])

df = pd.concat(df_list)
print(df.shape)

df = alter_df_columns(df, drop_pid=False)
print(df.shape)

In [None]:
# df[['ip_dst_0', 'ip_dst_1', 'ip_dst_2', 'ip_dst_3']] = df['ip_dst'].str.split('.', expand=True, n=4).astype('float32')
# df[['ip_src_0', 'ip_src_1', 'ip_src_2', 'ip_src_3']] = df['ip_src'].str.split('.', expand=True, n=4).astype('float32')

# df['eth_dst_tokens'] = df.apply(lambda x: mac_seperation(x.eth_dst), axis=1)
# df[['eth_dst_oui', 'eth_dst_di']] = df['eth_dst_tokens'].str.split(',', expand=True, n=2)
# df['eth_src_tokens'] = df.apply(lambda x: mac_seperation(x.eth_src), axis=1)
# df[['eth_src_oui', 'eth_src_di']] = df['eth_src_tokens'].str.split(',', expand=True, n=2)
# df['timestamp_tokens'] = df.apply(lambda x: timestamp_decompose(x.sniff_timestamp), axis=1)
# df[['year', 'month', 'day', 'hour', 'minutes', 'seconds']] = df['timestamp_tokens'].str.split(',', expand=True, n=6).astype('float32')
# df = df.drop(columns=['ip_dst', 'ip_src', 'sniff_timestamp', 'timestamp_tokens', 'eth_dst', 'eth_src', 'eth_src_tokens', 'eth_dst_tokens', 'interface_captured'])

In [None]:
strings = []

for line in df['info_line']:
    strings.append(line)

# Find common templates
templates = extract_templates(strings)
# for template in templates:
#     print(template)
#     print('-----------------------------------------------------------------------------')

templates = list(templates)
templates.sort()

In [None]:
# categoric = ['highest_layer', 'eth_dst', 'eth_src', 'ip_dst', 'ip_src']
# # categoric = ['eth_dst', 'eth_src', 'highest_layer']
# numeric = ['captured_length', 'length', 'number', 
#             'tcp_dstport', 'tcp_srcport', 'tcp_stream', 'year', 'month', 'day', 'hour', 'minutes', 'seconds']
# # numeric = ['ip_dst_0', 'ip_dst_1', 'ip_dst_2', 'ip_dst_3', 'ip_src_0', 'ip_src_1', 'ip_src_2', 'ip_src_3', 'length', 'number', 'tcp_dstport', 'tcp_srcport', 'tcp_stream']
# id = ['project_id']
# text = ['info_line']
# # time = ['sniff_timestamp_epoch']

# # finally divide the dataset using 'project_id' = 1,2,3,4

In [None]:
categoric = ['eth_dst_oui', 'eth_dst_di', 'eth_src_oui', 'eth_src_di', 
             'ip_dst_0', 'ip_dst_1', 'ip_dst_2', 'ip_dst_3', 
             'ip_src_0', 'ip_src_1', 'ip_src_2', 'ip_src_3', 
             'highest_layer', 'info_line', 
             'tcp_dstport', 'tcp_srcport', 
             'year', 'month', 'day', 'hour', 'minutes', 'seconds']
# categoric = ['eth_dst', 'eth_src', 'highest_layer']
numeric = ['captured_length', 'length', 'tcp_stream']
# numeric = ['ip_dst_0', 'ip_dst_1', 'ip_dst_2', 'ip_dst_3', 'ip_src_0', 'ip_src_1', 'ip_src_2', 'ip_src_3', 'length', 'number', 'tcp_dstport', 'tcp_srcport', 'tcp_stream']
id = ['project_id']
number = ['number']
# time = ['sniff_timestamp_epoch']

# finally divide the dataset using 'project_id' = 1,2,3,4

In [None]:
# df_numeric = df[numeric].copy()
# df_categoric = df[categoric].copy().astype(dtype='object')
# df_id = df[id].copy()
# df_number = df[number].copy()

In [None]:
agg_types = ['count', 'min', 'max', 'sum', 'avg']

num_intervals = 100
numeric_col_ranges = {}
for col in numeric:
    col_min = df[col].min().__floor__()
    col_max = df[col].max().__ceil__()
    
    col_count = 0 #len(df)
    col_sum = 0 #df[col].sum().astype('float64').__ceil__()
    for pid in range(1, 5):
        pid_df = df[df['project_id'] == pid][col]
        pid_sum = pid_df.sum().astype('float64').__ceil__()
        if pid_sum > col_sum:
            col_sum = pid_sum
        if len(pid_df) > col_count:
            col_count = len(pid_df)

    col_avg = df[col].mean().__ceil__()

    interval = math.ceil(col_max / num_intervals)
    numeric_col_ranges[col] = (0, col_max, interval)
    
    for agg_type in agg_types:
        agg_col = f'{col}_{type}'
        if agg_type in ['min', 'max', 'avg']:
            interval = col_max / num_intervals
            numeric_col_ranges[agg_col] = (0, col_max, math.ceil(interval))
        elif agg_type == 'sum':
            interval = col_sum / num_intervals
            numeric_col_ranges[agg_col] = (0, col_sum, math.ceil(interval))
        elif agg_type == 'count':
            interval = col_count / num_intervals
            numeric_col_ranges[agg_col] = (0, col_count, math.ceil(interval))

categoric_col_ranges = {}
for col in categoric:
    col_max = len(df)
    interval = math.ceil(col_max / num_intervals)
    categoric_col_ranges[f'{col}_count'] = (0, col_max, interval)

number_col_ranges = {}
for col in number:
    col_max = len(df)
    interval = math.ceil(col_max / num_intervals)
    number_col_ranges[f'{col}_count'] = (0, col_max, interval)

categoric_col_uniques = {}
for col in categoric:
    if col == 'info_line':
        categoric_col_uniques[col] = {templates[i]: i for i in range(len(templates))}
        categoric_col_uniques[col]['nan'] = len(templates)
    else:
        col_uniques = df[col].dropna().unique().tolist()
        col_uniques.sort()
        categoric_col_uniques[col] = {col_uniques[i]: i for i in range(len(col_uniques))}
        categoric_col_uniques[col]['nan'] = len(col_uniques)
        

In [None]:
def get_distribution(qdf, col_bins):
    col_bins = copy.deepcopy(col_bins)
    for col in qdf.columns:
        if col in numeric_col_ranges.keys():
            ### handing the continuous
            for value in qdf[col]:
                if pd.isna(value):
                    index = -1
                else:
                    index = math.floor(value / numeric_col_ranges[col][2])
                col_bins[col][index] += 1
        elif col in categoric_col_uniques:
            ### handling categoric
            if col == 'info_line':
                for value in qdf[col]:
                    processed_new_string = replace_numbers_and_uris(value)
                    index = categoric_col_uniques[col]['nan'] ## index corresponding to unmatching strings and nans
                    if processed_new_string in categoric_col_uniques[col].keys():
                        index = categoric_col_uniques[col][processed_new_string]
                    col_bins[col][index] += 1
            else:
                for value in qdf[col]:
                    if pd.isna(value):
                        index = -1
                    else:
                        index = categoric_col_uniques[col][value]
                    col_bins[col][index] += 1
        elif col in categoric_col_ranges.keys():
            ### handling the categoric_count
            for value in qdf[col]:
                index = math.floor(value / categoric_col_ranges[col][2])
                col_bins[col][index] += 1
        elif col in number_col_ranges.keys():
            ### handling number_count
            for value in qdf[col]:
                index = math.floor(value / number_col_ranges[col][2])
                col_bins[col][index] += 1

    for col in col_bins:
        total = sum(col_bins[col])
        col_bins[col] = np.array(col_bins[col], dtype=np.float32)
        if total > 0:
            col_bins[col] = col_bins[col] / total

    return col_bins

In [None]:
col_bins = {}

for col in numeric_col_ranges:
    col_bins[col] = [0] * (num_intervals + 1)

for col in categoric_col_ranges:
    col_bins[col] = [0] * num_intervals

for col in number_col_ranges:
    col_bins[col] = [0] * num_intervals

for col in categoric_col_uniques:
    col_bins[col] = [0] * len(categoric_col_uniques[col])

feats_order = list(col_bins.keys())
feats_order.sort()

In [None]:
logic_error_displays = [427, 428, 429, 430, 
                        854, 855, 856, 868, 891, 
                        977, 978, 979, 980, 
                        1304, 1908, 1909, 1983, 
                        2022, 2023, 2024, 2195,
                        3244, 3446, 3447, 
                        4050, 4051, 4056, 4052, 4054, 4055, 4057, 4058, 4059, 
                        4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067]


In [None]:
display_feats = {}
for tup in repo.displays.itertuples():
    if tup.display_id in logic_error_displays:
        continue

    grouping = json.loads(tup.grouping)
    aggregation = json.loads(tup.aggregations)

    try:
        df_bins = None
        if aggregation:
            # print(tup.display_id, grouping, aggregation)
            raw_df, grp_df = repo.get_raw_display(tup.display_id)

            if grp_df.empty:
                df_bins = copy.deepcopy(col_bins)
                for col in df_bins:
                    df_bins[col] = np.array(df_bins[col], dtype=np.float32)
            else:
                for col_and_type in aggregation['list']:
                    col = f"{col_and_type['field']}_{col_and_type['type']}"
                    grp_df.rename(columns={col_and_type['field']: col}, inplace=True)
                grp_df.rename(columns={'number': 'number_count'}, inplace=True)

                grp_df.reset_index(allow_duplicates=True, inplace=True)
                grp_df = grp_df.loc[:,~grp_df.columns.duplicated()].copy()

                grp_df = alter_df_columns(grp_df)
                df_bins = get_distribution(grp_df, col_bins)

            # print(grp_df.head())
            # break
        elif len(grouping['list']) > 0:
            # print(tup.display_id, grouping, aggregation)
            raw_df, grp_df = repo.get_raw_display(tup.display_id)

            if grp_df.empty:
                df_bins = copy.deepcopy(col_bins)
                for col in df_bins:
                    df_bins[col] = np.array(df_bins[col], dtype=np.float32)
            else:
                grp_df.rename(columns={'number': 'number_count'}, inplace=True)

                grp_df.reset_index(allow_duplicates=True, inplace=True)
                grp_df = grp_df.loc[:,~grp_df.columns.duplicated()].copy()

                grp_df = alter_df_columns(grp_df)
                df_bins = get_distribution(grp_df, col_bins)

            # print(grp_df.head())
        else:
            # print(tup.display_id, tup.filtering)
            raw_df, grp_df = repo.get_raw_display(tup.display_id)

            if raw_df.empty:
                df_bins = copy.deepcopy(col_bins)
                for col in df_bins:
                    df_bins[col] = np.array(df_bins[col], dtype=np.float32)
            else:
                raw_df = alter_df_columns(raw_df)
                df_bins = get_distribution(raw_df, col_bins)

            # print(raw_df.head())

        if df_bins:
            to_concat = []
            for col in feats_order:
                to_concat.append(df_bins[col])
            display_feats[tup.display_id] = np.concatenate(to_concat).copy()
            # display_feats[tup.display_id] = to_concat

            with open(f'./display_feats/display_feats_new.pickle', 'wb') as fout:
                pickle.dump(display_feats, fout, protocol=pickle.HIGHEST_PROTOCOL)
                
    except Exception as e:
        print(tup.display_id, tup.filtering, grouping, aggregation)
        print(e)
        break

In [None]:
with open(f'./display_feats/display_feats_new.pickle', 'rb') as fin:
    display_feats = pickle.load(fin)

In [None]:
feats_for_pca = []
for d in display_feats:
    feats_for_pca.append(display_feats[d])
feats_for_pca = np.array(feats_for_pca)

In [None]:
pid = 4
feats_for_pca = []
for i, row in repo.displays[repo.displays['project_id'] == pid].iterrows():
    did = row['display_id']
    if not (did in logic_error_displays):
        feats_for_pca.append(display_feats[did])
feats_for_pca = np.array(feats_for_pca)

In [None]:
big_pca = PCA(n_components=feats_for_pca.shape[0])
big_pca.fit(feats_for_pca)

In [None]:
req_var = 0.9999
total_evr = 0.0
components = 0
for evr in big_pca.explained_variance_ratio_:
    total_evr += evr
    components += 1
    if total_evr >= req_var:
        break

print(total_evr, components)

In [None]:
pca = PCA(n_components=components)
pca.fit(feats_for_pca)
pca_scaler = MinMaxScaler()
pca_scaler.fit(pca.transform(feats_for_pca))

In [None]:
display_pca_feats = {}
for i, row in repo.displays[repo.displays['project_id'] == pid].iterrows():
    did = row['display_id']
    if not (did in logic_error_displays):
        display_pca_feats[did] = pca_scaler.transform(pca.transform([display_feats[did]]))[0]

with open(f'./display_feats/display_pca_feats_{int(req_var * 1e4)}_{pid}.pickle', 'wb') as fout:
    pickle.dump(display_pca_feats, fout, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
display_pca_feats = {}
for d in display_feats:
    display_pca_feats[d] = pca_scaler.transform(pca.transform([display_feats[d]]))[0]

with open(f'./display_feats/display_pca_feats_{int(req_var * 1e4)}.pickle', 'wb') as fout:
    pickle.dump(display_pca_feats, fout, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
og_columns = ['captured_length', 'length', 'tcp_stream', 'number', 'eth_dst', 'eth_src', 
                'highest_layer', 'info_line', 'interface_captured', 'ip_dst', 'ip_src', 
                'sniff_timestamp', 'tcp_dstport', 'tcp_srcport']

logic_error_displays = [427, 428, 429, 430, 
                        854, 855, 856, 868, 891, 
                        977, 978, 979, 980, 
                        1304, 1908, 1909, 1983, 
                        2022, 2023, 2024, 2195,
                        3244, 3446, 3447, 
                        4050, 4051, 4056, 4052, 4054, 4055, 4057, 4058, 4059, 
                        4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067]

for i, row in repo.actions.iterrows():
    u = int(row['parent_display_id'])
    v = int(row['child_display_id'])
    aid = int(row['action_id'])
    action_type = row['action_type']
    action_params = row['action_params']

    if row['action_type'] == 'sort' and (not bool(row['action_params'])):
        check_col = 'number'
    else:
        check_col = row['action_params']['field']

    if not check_col in og_columns:
        continue

    if (not u in display_feats) and (not u in logic_error_displays):
        print('child', u, aid)
        
    
    # if (not v in display_feats) and (not v in logic_error_displays):
    #     print('parent', v, aid)

In [None]:
from IGTD.Scripts.IGTD_Functions import min_max_transform, table_to_image, select_features_by_variation, multi_table_to_image

In [None]:
with open(f'./display_feats/display_feats.pickle', 'rb') as fin:
    display_feats = pickle.load(fin)

In [None]:
num_row = 30    # Number of pixel rows in image representation
num_col = 30    # Number of pixel columns in image representation
num = num_row * num_col # Number of features to be included for analysis, which is also the total number of pixels in image representation
save_image_size = 3 # Size of pictures (in inches) saved during the execution of IGTD algorithm.
max_step = 30000    # The maximum number of iterations to run the IGTD algorithm, if it does not converge.
val_step = 300  # The number of iterations for determining algorithm convergence. If the error reduction rate
                # is smaller than a pre-set threshold for val_step itertions, the algorithm converges.

In [None]:
feats_for_df = []
d_order = []
for d in display_feats:
    feats_for_df.append(display_feats[d])
    d_order.append(d)
d_order = np.array(d_order, dtype=np.int32)

feats_for_df = np.array(feats_for_df)
new_columns = list(range(len(feats_for_df[0])))

qdf = pd.DataFrame(data=feats_for_df, columns=new_columns)
qdf.insert(loc=0, column='display_id', value=d_order)
qdf.set_index('display_id', inplace=True)

id = select_features_by_variation(qdf, variation_measure='var', num=num)
qdf = qdf.iloc[:, id]

norm_data = min_max_transform(qdf.values)
norm_data = pd.DataFrame(norm_data, columns=qdf.columns, index=qdf.index)

fea_dist_method = 'Euclidean'
image_dist_method = 'Euclidean'
error = 'abs'
result_dir = f'./IGTD/results/display_img'
os.makedirs(name=result_dir, exist_ok=True)
table_to_image(norm_data, [num_row, num_col], fea_dist_method, image_dist_method, save_image_size,
                max_step, val_step, result_dir, error)

In [None]:
feats_for_df = {}
d_order = []
for d in display_feats_seperate:
    for i in range(len(display_feats_seperate[d])):
        if not (i in feats_for_df):
            feats_for_df[i] = []
        feats_for_df[i].append(display_feats_seperate[d][i])
    d_order.append(d)
d_order = np.array(d_order, dtype=np.int32)

d_list = []
for i in feats_for_df:
    feats_for_df[i] = np.array(feats_for_df[i])
    new_columns = list(range(len(feats_for_df[i][0])))

    qdf = pd.DataFrame(data=feats_for_df[i], columns=new_columns)
    qdf.insert(loc=0, column='display_id', value=d_order)
    qdf.set_index('display_id', inplace=True)

    id = select_features_by_variation(qdf, variation_measure='var', num=num)
    qdf = qdf.iloc[:, id]

    norm_data = min_max_transform(qdf.values)
    norm_data = pd.DataFrame(norm_data, columns=qdf.columns, index=qdf.index)
    
    fea_dist_method = 'Euclidean'
    image_dist_method = 'Euclidean'
    error = 'abs'
    result_dir = f'./IGTD/results/display_img_{i}'
    os.makedirs(name=result_dir, exist_ok=True)
    table_to_image(norm_data, [num_row, num_col], fea_dist_method, image_dist_method, save_image_size,
                   max_step, val_step, result_dir, error)

In [None]:
# image_dist_method = 'Euclidean'
# error = 'abs'
# result_dir = './IGTD/results/display_images'
# os.makedirs(name=result_dir, exist_ok=True)

# method_list = ['Euclidean'] * len(d_list)
# weight_list = [1 / len(d_list)] * len(d_list)
# multi_table_to_image(norm_d_list=d_list, weight_list=weight_list,
#                      fea_dist_method_list=method_list, scale=[num_row, num_col],
#                      image_dist_method=image_dist_method, save_image_size=save_image_size,
#                      max_step=max_step, val_step=val_step, normDir=result_dir, error=error,
#                      switch_t=0, min_gain=0.000001)

In [None]:
# # Run the IGTD algorithm using (1) the Euclidean distance for calculating pairwise feature distances and pariwise pixel
# # distances and (2) the absolute function for evaluating the difference between the feature distance ranking matrix and
# # the pixel distance ranking matrix. Save the result in Test_1 folder.
# fea_dist_method = 'Euclidean'
# image_dist_method = 'Euclidean'
# error = 'abs'
# result_dir = './IGTD/results/display_images'
# os.makedirs(name=result_dir, exist_ok=True)
# table_to_image(norm_data, [num_row, num_col], fea_dist_method, image_dist_method, save_image_size,
#                max_step, val_step, result_dir, error)