In [5]:
import os 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [6]:
def plot_res(config_train, tresh, tresh_folder):

    dataset_name = config_train['dataset_name']
    
    config_test = config_train.copy()
    config_test['mode'] = 'test'
    config_test['step'] = config_test['win_size']
    config_test['test_model'] = f'test_{config_test["ID"]}/space_checkpoint.pth'
    config_test["quantile_treshold"] = tresh


    if not os.path.exists(f'test_{config_test["ID"]}/{tresh_folder}'):
        os.makedirs(f'test_{config_test["ID"]}/{tresh_folder}')

    #Plotting
    test = np.loadtxt(f'test_{config_test["ID"]}/train_energy_test.csv', delimiter=',')
    train = np.loadtxt(f'test_{config_test["ID"]}/train_energy.csv', delimiter=',')

    #HISTOGRAMS
    fig, ax = plt.subplots(2,1,figsize=(10, 6))

    ax[0].hist(test, bins=100)
    ax[0].set_title('Test set')
    ax[0].set_yscale('log')
    ax[1].hist(train, bins=100)
    ax[1].set_title('Train set')
    ax[1].set_yscale('log')

    fig.savefig(f'test_{config_test["ID"]}/{tresh_folder}/histograms.png')

    trh = np.quantile(train, config_test['quantile_treshold'])

    plt.close()

    #tresholded histograms
    fig, ax = plt.subplots(2,1,figsize=(10, 6))

    ax[0].hist(train[train>trh], bins=100)
    ax[0].set_title('Train set treshold: '+str(trh))
    ax[1].hist(test[test>trh], bins=100)
    ax[1].set_title('Test set')
    fig.savefig(f'test_{config_test["ID"]}/{tresh_folder}/histograms_trsh.png')
    plt.close()

    #load dataset
    test_path = os.path.join(config_test['data_path'], dataset_name[0])
    train_path = os.path.join(config_test['data_path'], dataset_name[1])

    test_set=pd.read_csv(test_path)
    train_set=pd.read_csv(train_path)
    test_set.set_index('time', inplace=True)
    train_set.set_index('time', inplace=True)

    comp_set=pd.concat([test_set, train_set])
    comp_set.sort_values(by='time', inplace=True)


    comp_set.index = pd.to_datetime(comp_set.index)

    fig, ax = plt.subplots(len(comp_set.columns)+1,1,figsize=(100, 50))

    points = np.where(train>trh)[0]

    points_test = np.where(test>trh)[0]
    points_test = points_test + len(train)

    for i in range(1, len(comp_set.columns)+1):
        ax[i].plot(range(len(comp_set[comp_set.keys()[i-1]].values)),comp_set[comp_set.keys()[i-1]].values)
        ax[i].set_title(comp_set.keys()[i-1])

    for l in range(0, len(comp_set.columns)+1):

        for x in points:
            ax[l].axvline(x=x, color='r',alpha=0.1)

        for x in points_test:
            ax[l].axvline(x=x, color='g',alpha=0.1)


    ax[0].plot(train)
    ax[0].set_title(f'Model output, {config_test["quantile_treshold"]} treshold, anomalies_found: {len(points)+len(points_test)}')
    ax[0].plot(range(len(train),len(train)+len(test)),test)


    fig.savefig(f'test_{config_test["ID"]}/{tresh_folder}/final_time_series.png')

    plt.close("all")




In [53]:
open_file = open("test_50/log.txt", "r")
lines = open_file.readlines()

list_param = []

list_param.append([])
list_param[-1].append("\"test_name\" : \""+str("test_5")+"\"")


for i in range(39,60):
    
    lines_txt = lines[i][:-1]
    
    index = lines_txt.find(':')-1
    
    output_line = lines_txt[:index] + '"' + lines_txt[index:]

    index = output_line.find(':')

    if "dataset" in output_line or "mode" in output_line or "data_path" in output_line or "model_save_path" in output_line:
        
        output_line = output_line[:index+2] + '"' + output_line[index+2:] + '"'

    list_param[-1].append(str("\"")+str(output_line))

dict_list = []

for i in list_param:
    dict_list.append(eval("{" + ", ".join(i) + "}"))

dict_list = dict_list[0]

dict_list["dataset_name"] = eval(dict_list["dataset_name"])

In [54]:
whole_ds = pd.concat([pd.read_csv(os.path.join(dict_list['data_path'], dict_list['dataset_name'][0])), pd.read_csv(os.path.join(dict_list['data_path'], dict_list['dataset_name'][1]))])

In [55]:
keyyy = []

for i in whole_ds.keys():
    keyyy.append(i)

keyyy = keyyy[1:]

In [56]:

whole_ds.drop(columns=keyyy, inplace=True)
whole_ds

Unnamed: 0,time
0,2021-10-01 00:30:00
1,2021-10-01 00:42:00
2,2021-10-01 00:43:00
3,2021-10-01 00:44:00
4,2021-10-01 00:45:00
...,...
116870,2021-09-30 23:25:00
116871,2021-09-30 23:26:00
116872,2021-09-30 23:27:00
116873,2021-09-30 23:28:00


In [57]:
training_df = pd.read_csv(f'test_{dict_list["ID"]}/train_energy.csv', names=["score"])

training_df.head()

Unnamed: 0,score
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [58]:
tests_df = pd.read_csv(f'test_{dict_list["ID"]}/train_energy_test.csv', names=["score"])
tests_df.head()

Unnamed: 0,score
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [59]:
whole_results = pd.concat([tests_df,training_df ], axis=0, ignore_index=True, )

whole_results

Unnamed: 0,score
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
293935,0.0
293936,0.0
293937,0.0
293938,0.0


In [61]:
whole_results.sort_values(by='score')

Unnamed: 0,score
0,0.000000
194637,0.000000
194638,0.000000
194639,0.000000
194640,0.000000
...,...
128990,1288.698853
12836,1321.618896
129181,2052.098877
240491,3923.886230


In [62]:
merged = pd.merge(whole_ds, whole_results, left_index=True, right_index=True)
merged

Unnamed: 0,time,score
0,2021-10-01 00:30:00,0.0
1,2021-10-01 00:42:00,0.0
2,2021-10-01 00:43:00,0.0
3,2021-10-01 00:44:00,0.0
4,2021-10-01 00:45:00,0.0
...,...,...
116870,2021-09-30 23:25:00,0.0
116871,2021-09-30 23:26:00,0.0
116872,2021-09-30 23:27:00,0.0
116873,2021-09-30 23:28:00,0.0


In [63]:
trsh = np.quantile(merged['score'].values, 0.997)
trsh 

2.418927425146028

In [29]:
#merged.to_csv("Aaaaaaaaaaaaaaaaaaaaaaaaaaa.csv")

In [64]:
merged_trsh = merged.where(merged['score'] > trsh).dropna()
merged_trsh.to_csv("anomalyes.csv", index=False)
merged_trsh

Unnamed: 0,time,score
1995,2021-10-05 15:02:00,6.986888
2393,2021-10-06 12:07:00,5.599429
3183,2021-10-08 06:46:00,2.445307
3606,2021-10-09 05:53:00,8.431059
4383,2021-10-10 21:56:00,5.373573
...,...,...
115448,2021-09-27 19:29:00,4.725725
115449,2021-09-27 19:30:00,4.296834
116009,2021-09-29 01:26:00,7.229868
116369,2021-09-29 21:39:00,7.772665
