In [1]:
import pandas as pd
import numpy as np
import json, csv, datetime
from vadetisweb.utils.anomaly_detection_utils import next_later_dt

In [2]:
#input

yahoo_path = '/home/adrian/Dokumente/real_data/ydata-labeled-time-series-anomalies-v1_0'

a1_path = '/A1Benchmark/'
a2_path = '/A2Benchmark/'
a3_path = '/A3Benchmark/'
a4_path = '/A4Benchmark/'

a1_elements = np.arange(1, 68).tolist()
a234_elements = np.arange(1, 101).tolist()

a1files = [{ 'name' : 'TS' + str(x), 'file_path' : yahoo_path + a1_path + 'real_' + str(x) + '.csv' } for x in a1_elements]
a2files = [{ 'name' : 'TS' + str(x), 'file_path' : yahoo_path + a2_path + 'synthetic_' + str(x) + '.csv' } for x in a234_elements]
a3files = [{ 'name' : 'TS' + str(x), 'file_path' : yahoo_path + a3_path + 'A3Benchmark-TS' + str(x) + '.csv' } for x in a234_elements]
a4files = [{ 'name' : 'TS' + str(x), 'file_path' : yahoo_path + a4_path + 'A4Benchmark-TS' + str(x) + '.csv' } for x in a234_elements]

#output
output_path = '/home/adrian/Dokumente/real_data/yahoo_out2'

#test_file_name = output_path + a1_path + 'test.csv'
#train_file_name = output_path + a1_path + 'train.csv'

In [3]:
def load_data_frame(ts_name, file_path):
    df = pd.read_csv(file_path)
    df.insert(0, 'ts_name', ts_name)
    df.insert(2, 'unit', "Value")
    df = df.rename(columns={'timestamp': 'time', 'is_anomaly' : 'class'})
    return df

In [4]:
def time_index_to_dt(df):
    date_time_str = '2020-04-01 00:00:00'
    dt = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S')
    for idx, row in df.iterrows():
        time_index = row['time']
        dt = next_later_dt(dt, '1H')
        df.loc[idx, 'time'] = dt
        
    return df
        

In [5]:
def init_file(outputfile):
    with open(outputfile, 'w') as file_output:
        writer = csv.writer(file_output, delimiter=';')
        header = ['ts_name', 'time', 'unit', 'value', 'class']
        writer.writerow(header)
        
def append_to_file(df, outputfile):
    with open(outputfile, 'a') as file_output:
        writer = csv.writer(file_output, delimiter=';')
        for index, row in df.iterrows():
            row = [row[0], row[1].isoformat(), row[2], row[3], row[4]]
            writer.writerow(row)

In [6]:
# S1 set 0-1
# do not use 'TS32'
names_set_a1_s1 = ['TS3', 'TS16', 'TS30', 'TS60']


In [7]:
# S1
test_file_name_s1 = output_path + a1_path + 'test_s1.csv'
train_file_name_s1 = output_path + a1_path + 'train_s1.csv'
init_file(test_file_name_s1)
init_file(train_file_name_s1)

In [8]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s1:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s1)
        append_to_file(df_train, train_file_name_s1)

TS3	 anomalies 15	 values 1461	 min 1.8619444444443998	 max 4.2797222222222
test	 anomalies 1	 values 1000
train	 anomalies 0	 values 400


TS16	 anomalies 3	 values 1461	 min 0.82472222222222	 max 2.5113888888889
test	 anomalies 0	 values 1000
train	 anomalies 3	 values 400


TS30	 anomalies 9	 values 1461	 min 1.0383333333333	 max 2.9558333333333
test	 anomalies 1	 values 1000
train	 anomalies 0	 values 400


TS60	 anomalies 16	 values 1461	 min 0.6105555555555601	 max 2.3672222222222
test	 anomalies 6	 values 1000
train	 anomalies 5	 values 400




In [9]:
# S1 V1
test_file_name_s1v1 = output_path + a1_path + 'test_s1v1.csv'
train_file_name_s1v1 = output_path + a1_path + 'train_s1v1.csv'
init_file(test_file_name_s1v1)
init_file(train_file_name_s1v1)

In [10]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s1v1:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s1v1)
        append_to_file(df_train, train_file_name_s1v1)

TS3	 anomalies 15	 values 1461	 min 1.8619444444443998	 max 4.2797222222222
test	 anomalies 1	 values 1000
train	 anomalies 0	 values 400


TS16	 anomalies 3	 values 1461	 min 0.82472222222222	 max 2.5113888888889
test	 anomalies 0	 values 1000
train	 anomalies 3	 values 400


TS23	 anomalies 19	 values 1420	 min 0.0	 max 0.97236626590723
test	 anomalies 15	 values 1000
train	 anomalies 0	 values 400


TS30	 anomalies 9	 values 1461	 min 1.0383333333333	 max 2.9558333333333
test	 anomalies 1	 values 1000
train	 anomalies 0	 values 400


TS60	 anomalies 16	 values 1461	 min 0.6105555555555601	 max 2.3672222222222
test	 anomalies 6	 values 1000
train	 anomalies 5	 values 400




In [11]:
# S2
test_file_name_s2 = output_path + a1_path + 'test_s2.csv'
train_file_name_s2 = output_path + a1_path + 'train_s2.csv'
init_file(test_file_name_s2)
init_file(train_file_name_s2)

In [12]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s2:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s2)
        append_to_file(df_train, train_file_name_s2)

TS12	 anomalies 3	 values 1439	 min 1	 max 115
test	 anomalies 1	 values 1000
train	 anomalies 0	 values 400


TS19	 anomalies 227	 values 1424	 min 0	 max 118
test	 anomalies 113	 values 1000
train	 anomalies 91	 values 400


TS33	 anomalies 2	 values 1439	 min 1	 max 179
test	 anomalies 0	 values 1000
train	 anomalies 0	 values 400


TS45	 anomalies 1	 values 1440	 min 0	 max 182
test	 anomalies 0	 values 1000
train	 anomalies 1	 values 400


TS58	 anomalies 43	 values 1435	 min 0	 max 35
test	 anomalies 0	 values 1000
train	 anomalies 9	 values 400




In [13]:
# S3
test_file_name_s3 = output_path + a1_path + 'test_s3.csv'
train_file_name_s3 = output_path + a1_path + 'train_s3.csv'
init_file(test_file_name_s3)
init_file(train_file_name_s3)

In [14]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s3:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s3)
        append_to_file(df_train, train_file_name_s3)

TS4	 anomalies 5	 values 1423	 min 5	 max 2575
test	 anomalies 0	 values 1000
train	 anomalies 2	 values 400


TS8	 anomalies 10	 values 1420	 min 0	 max 2926
test	 anomalies 3	 values 1000
train	 anomalies 7	 values 400


TS14	 anomalies 2	 values 1439	 min 8	 max 5002
test	 anomalies 2	 values 1000
train	 anomalies 0	 values 400


TS17	 anomalies 227	 values 1424	 min 0	 max 3287
test	 anomalies 114	 values 1000
train	 anomalies 90	 values 400


TS27	 anomalies 2	 values 1427	 min 0	 max 1352
test	 anomalies 0	 values 1000
train	 anomalies 2	 values 400


TS31	 anomalies 24	 values 1427	 min 0	 max 2259
test	 anomalies 0	 values 1000
train	 anomalies 24	 values 400


TS51	 anomalies 4	 values 1427	 min 0	 max 2910
test	 anomalies 1	 values 1000
train	 anomalies 3	 values 400


TS55	 anomalies 5	 values 1427	 min 0	 max 2672
test	 anomalies 1	 values 1000
train	 anomalies 4	 values 400




In [15]:
# S3 V1
test_file_name_s3v1 = output_path + a1_path + 'test_s3v1.csv'
train_file_name_s3v1 = output_path + a1_path + 'train_s3v1.csv'
init_file(test_file_name_s3v1)
init_file(train_file_name_s3v1)

In [16]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s3v1:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s3v1)
        append_to_file(df_train, train_file_name_s3v1)

TS14	 anomalies 2	 values 1439	 min 8	 max 5002
test	 anomalies 2	 values 1000
train	 anomalies 0	 values 400


TS17	 anomalies 227	 values 1424	 min 0	 max 3287
test	 anomalies 114	 values 1000
train	 anomalies 90	 values 400


TS27	 anomalies 2	 values 1427	 min 0	 max 1352
test	 anomalies 0	 values 1000
train	 anomalies 2	 values 400


TS51	 anomalies 4	 values 1427	 min 0	 max 2910
test	 anomalies 1	 values 1000
train	 anomalies 3	 values 400


TS55	 anomalies 5	 values 1427	 min 0	 max 2672
test	 anomalies 1	 values 1000
train	 anomalies 4	 values 400




In [17]:
# S4
test_file_name_s4 = output_path + a1_path + 'test_s4.csv'
train_file_name_s4 = output_path + a1_path + 'train_s4.csv'
init_file(test_file_name_s4)
init_file(train_file_name_s4)

In [18]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s4:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s4)
        append_to_file(df_train, train_file_name_s4)

TS5	 anomalies 2	 values 1439	 min 1170	 max 11000
test	 anomalies 2	 values 1000
train	 anomalies 0	 values 400


TS11	 anomalies 19	 values 1439	 min 1977	 max 19608
test	 anomalies 0	 values 1000
train	 anomalies 0	 values 400


TS13	 anomalies 12	 values 1439	 min 3838	 max 18137
test	 anomalies 6	 values 1000
train	 anomalies 0	 values 400


TS15	 anomalies 10	 values 1439	 min 3960	 max 18802
test	 anomalies 5	 values 1000
train	 anomalies 0	 values 400


TS22	 anomalies 63	 values 1420	 min 25	 max 19066
test	 anomalies 0	 values 1000
train	 anomalies 44	 values 400


TS28	 anomalies 82	 values 1441	 min 5	 max 36854
test	 anomalies 80	 values 1000
train	 anomalies 2	 values 400


TS34	 anomalies 7	 values 1427	 min 0	 max 41827
test	 anomalies 1	 values 1000
train	 anomalies 6	 values 400


TS47	 anomalies 10	 values 1427	 min 0	 max 24967
test	 anomalies 5	 values 1000
train	 anomalies 5	 values 400


TS48	 anomalies 24	 values 1439	 min 71	 max 24173
test	 anomalies 24	 value

In [19]:
# S4 V1
test_file_name_s4v1 = output_path + a1_path + 'test_s4v1.csv'
train_file_name_s4v1 = output_path + a1_path + 'train_s4v1.csv'
init_file(test_file_name_s4v1)
init_file(train_file_name_s4v1)

In [20]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s4v1:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s4v1)
        append_to_file(df_train, train_file_name_s4v1)

TS5	 anomalies 2	 values 1439	 min 1170	 max 11000
test	 anomalies 2	 values 1000
train	 anomalies 0	 values 400


TS13	 anomalies 12	 values 1439	 min 3838	 max 18137
test	 anomalies 6	 values 1000
train	 anomalies 0	 values 400


TS15	 anomalies 10	 values 1439	 min 3960	 max 18802
test	 anomalies 5	 values 1000
train	 anomalies 0	 values 400


TS34	 anomalies 7	 values 1427	 min 0	 max 41827
test	 anomalies 1	 values 1000
train	 anomalies 6	 values 400




In [21]:
# S5
test_file_name_s5 = output_path + a1_path + 'test_s5.csv'
train_file_name_s5 = output_path + a1_path + 'train_s5.csv'
init_file(test_file_name_s5)
init_file(train_file_name_s5)

In [22]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name in names_set_a1_s5:
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
              + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
        
        df_test = df.iloc[0:1000]
        print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')
        append_to_file(df_test, test_file_name_s5)
        append_to_file(df_train, train_file_name_s5)

TS29	 anomalies 7	 values 1441	 min 11	 max 190127
test	 anomalies 3	 values 1000
train	 anomalies 4	 values 400


TS38	 anomalies 9	 values 1427	 min 14	 max 889703
test	 anomalies 1	 values 1000
train	 anomalies 8	 values 400


TS39	 anomalies 10	 values 1427	 min 5	 max 79600
test	 anomalies 8	 values 1000
train	 anomalies 2	 values 400


TS44	 anomalies 9	 values 1461	 min 28158.890680375	 max 42778.736431106
test	 anomalies 9	 values 1000
train	 anomalies 0	 values 400


TS52	 anomalies 11	 values 1432	 min 434	 max 7845760
test	 anomalies 4	 values 1000
train	 anomalies 7	 values 400


TS56	 anomalies 5	 values 1427	 min 48	 max 81870
test	 anomalies 1	 values 1000
train	 anomalies 4	 values 400


TS57	 anomalies 3	 values 1441	 min 0	 max 33693
test	 anomalies 1	 values 1000
train	 anomalies 2	 values 400




In [23]:
test_file_all = output_path + a1_path + 'test_all.csv'
train_file_all = output_path + a1_path + 'train_all.csv'
init_file(test_file_all)
init_file(test_file_all)

In [28]:
for entry in a1files:
    ts_name = entry['name']
    if ts_name != 'TS54' and ts_name != 'TS62' and ts_name != 'TS64':
        file_path = entry['file_path']
        df = load_data_frame(ts_name, file_path)
        df = time_index_to_dt(df)
        """print(ts_name + "\t anomalies " 
              + str(df.loc[df['class']==1].shape[0]) 
              + '\t values ' + str(df.shape[0])
              + '\t min ' + str(df["value"].min()) 
              + '\t max ' + str(df["value"].max()))"""
        df_test = df.iloc[0:1000]
        #print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

        df_train = df.iloc[1001:1401]
        """print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
        print('\n')"""
        append_to_file(df_test, test_file_all)
        append_to_file(df_train, train_file_all)

In [24]:
init_file(test_file_name)
init_file(train_file_name)

In [25]:
for entry in a1files:
    ts_name = entry['name']
    file_path = entry['file_path']
    df = load_data_frame(ts_name, file_path)
    df = time_index_to_dt(df)
    print(ts_name + "\t anomalies " 
          + str(df.loc[df['class']==1].shape[0]) 
          + '\t values ' + str(df.shape[0])
          + '\t min ' + str(df.loc[df['class']==0, 'value'].min()) 
          + '\t max ' + str(df.loc[df['class']==0, 'value'].max()))
    #print(df)
    df_test = df.iloc[0:1000]
    print('test' + "\t anomalies " + str(df_test.loc[df_test['class']==1].shape[0]) + '\t values ' + str(df_test.shape[0]))

    df_train = df.iloc[1001:1401]
    print('train' + "\t anomalies " + str(df_train.loc[df_train['class']==1].shape[0]) + '\t values ' + str(df_train.shape[0]))
    print('\n')
    #append_to_file(df_test, test_file_name)
    #append_to_file(df_train, train_file_name)
    """if ts_name == 'TS9':
        break"""

TS1	 anomalies 2	 values 1420	 min 0.0	 max 0.4140256163498
test	 anomalies 0	 values 1000
train	 anomalies 2	 values 400


TS2	 anomalies 16	 values 1439	 min 3594	 max 14199
test	 anomalies 0	 values 1000
train	 anomalies 3	 values 400


TS3	 anomalies 15	 values 1461	 min 1.8619444444443998	 max 4.2797222222222
test	 anomalies 1	 values 1000
train	 anomalies 0	 values 400


TS4	 anomalies 5	 values 1423	 min 5	 max 2575
test	 anomalies 0	 values 1000
train	 anomalies 2	 values 400


TS5	 anomalies 2	 values 1439	 min 1170	 max 11000
test	 anomalies 2	 values 1000
train	 anomalies 0	 values 400


TS6	 anomalies 8	 values 1439	 min 34	 max 224
test	 anomalies 0	 values 1000
train	 anomalies 0	 values 400


TS7	 anomalies 61	 values 1423	 min 0	 max 2076
test	 anomalies 23	 values 1000
train	 anomalies 32	 values 400


TS8	 anomalies 10	 values 1420	 min 0	 max 2926
test	 anomalies 3	 values 1000
train	 anomalies 7	 values 400


TS9	 anomalies 8	 values 1461	 min 0.0	 max 0.14166666666

In [54]:
df.iloc[10001:15801]

Unnamed: 0,ts_name,time,unit,value,class
10001,UPS,2015-04-02 15:07:53,Value,2,0
10002,UPS,2015-04-02 15:12:53,Value,1,0
10003,UPS,2015-04-02 15:17:53,Value,5,0
10004,UPS,2015-04-02 15:22:53,Value,1,0
10005,UPS,2015-04-02 15:27:53,Value,2,0
...,...,...,...,...,...
15796,UPS,2015-04-22 18:02:53,Value,1,0
15797,UPS,2015-04-22 18:07:53,Value,2,0
15798,UPS,2015-04-22 18:12:53,Value,1,0
15799,UPS,2015-04-22 18:17:53,Value,5,0


In [47]:
test_data_frame

Unnamed: 0,ts_name,time,unit,value,class
0,Test,2015-02-26 21:42:53,Value,2,0
1,Test,2015-02-26 21:47:53,Value,2,0
2,Test,2015-02-26 21:52:53,Value,4,0
3,Test,2015-02-26 21:57:53,Value,3,0
4,Test,2015-02-26 22:02:53,Value,5,0
...,...,...,...,...,...
15861,Test,2015-04-22 23:27:53,Value,3,0
15862,Test,2015-04-22 23:32:53,Value,2,0
15863,Test,2015-04-22 23:37:53,Value,2,0
15864,Test,2015-04-22 23:42:53,Value,3,0


In [23]:
test_data_frame.loc[test_data_frame['class']==1]

Unnamed: 0,ts_name,time,unit,value,class
1993,TEST,2015-03-05 19:47:53,Value,716,1
3735,TEST,2015-03-11 20:57:53,Value,1673,1
9795,TEST,2015-04-01 21:57:53,Value,282,1
11606,TEST,2015-04-08 04:52:53,Value,211,1
