In [169]:
%run ../Python_files/util.py
%run ../Python_files/util_ano_detec.py
%run ../Python_files/load_dicts.py

traffic_data_with_anomaly = zload('../temp_files/traffic_data_with_anomaly.pkz')
traffic_data_ref = zload('../temp_files/traffic_data_ref.pkz')

In [170]:
def quantize(x, n, inf, sup):
    """
    x: the input data (scalar)
    n: quantization level
    inf: the lower bound of the input data x
    sup: the upper bound of the input data x
    -------------------
    Example:
    >>> quantize(4, 3, 2, 8)
    >>> 1
    >>> quantize(6.9, 3, 2, 8)
    >>> 2
    """
    if (x < inf):
        return 0
    elif (x >= sup):
        return (n-1)
    else:
        lev_length = (sup - inf) / float(n)
        return int(np.floor((x - inf) / lev_length))

In [171]:
tmc = '129-04138'
month = 1
day_list = [2, 3, 4, 5, 6]

traffic_data_ref_list = []
for hour in range(24):
    for minute in range(60):
        for day in day_list:
            key = str(tmc) + '_' + str(month) + '_' + str(day) + '_' + str(hour) + '_' + str(minute)
            traffic_data_ref_list.append(traffic_data_ref[key])

In [172]:
N = 10
inf = min(traffic_data_ref_list)
sup = max(traffic_data_ref_list)

In [173]:
traffic_data_ref_list_quantized = [quantize(s, N, inf, sup) for s in traffic_data_ref_list]

In [174]:
traffic_data_ref_list_quantized[0:10]

[6, 8, 6, 6, 6, 6, 8, 6, 6, 6]

In [175]:
mapping_dict = {}

for i in range(N):
    for j in range(N):
        mapping_dict[(i, j)] = i * N + j

In [176]:
# mapping_dict

In [177]:
traffic_data_ref_list_quantized_trans = [mapping_dict[(traffic_data_ref_list_quantized[i], \
                                                       traffic_data_ref_list_quantized[i+1])] \
                                         for i in range(len(traffic_data_ref_list_quantized)-1)]

In [178]:
traffic_data_ref_list_quantized[0:10], traffic_data_ref_list_quantized_trans[0:10]

([6, 8, 6, 6, 6, 6, 8, 6, 6, 6], [68, 86, 66, 66, 66, 68, 86, 66, 66, 66])

In [179]:
tmc = '129-04138'
month = 1
day = 9

traffic_data_with_anomaly_list = []
for hour in range(24):
    for minute in range(60):
        key = str(tmc) + '_' + str(month) + '_' + str(day) + '_' + str(hour) + '_' + str(minute)
        traffic_data_with_anomaly_list.append(traffic_data_with_anomaly[key])

In [180]:
traffic_data_with_anomaly_list_quantized = [quantize(s, N, inf, sup) for s in traffic_data_with_anomaly_list]

In [181]:
traffic_data_with_anomaly_list_quantized_trans = \
[mapping_dict[(traffic_data_with_anomaly_list_quantized[i], \
               traffic_data_with_anomaly_list_quantized[i+1])] \
 for i in range(len(traffic_data_with_anomaly_list_quantized)-1)]

In [182]:
traffic_data_with_anomaly_list_quantized[0:10], traffic_data_with_anomaly_list_quantized_trans[0:10]

([9, 6, 6, 7, 7, 7, 6, 6, 6, 6], [96, 66, 67, 77, 77, 76, 66, 66, 66, 66])

In [183]:
len(traffic_data_with_anomaly_list_quantized_trans)

1439

In [184]:
mu_1 = mu_est(traffic_data_ref_list_quantized_trans, N)  # normal PL

In [185]:
# mu_1

In [186]:
N, _ = np.shape(mu_1)
assert(N == _)

mu_1 = mu_adjust(mu_1)  # normal PL

mu_01, mu1, mu_11, P1, G_11, H_11, U_11 = ChainGen_(mu_1)

zdump([mu1, mu_11, P1, G_11, H_11, U_11], '../temp_files/Traffic_ano_detec_PLs.pkz')



In [187]:
# mu_11

In [188]:
# n_range = [40, 70, 100]
# num_test_sample = 200
# beta_list = list(np.arange(0, 0.2, 0.02)[1:-1]) + list(np.arange(0.2, 1.01, 0.02)[:-1])

num_of_samp_per_win = 50
num_test_sample = 24 * 60 - num_of_samp_per_win
beta = 0.001

In [189]:
eta_wc = {}
eta_Sanov = {}

n = num_of_samp_per_win

# Get thresholds for Hoeffding's test corresponding to sample length n    
key = str(n) + '_' + str(beta)
G_list = [G_11]
H_list = [H_11]
U_list = [U_11]
eta_1 = HoeffdingRuleMarkovRobust_(beta, G_list, H_list, U_list, n)
eta_2 =  - log(beta) / n
eta_wc[key] = eta_1
eta_Sanov[key] = eta_2
zdump([eta_wc, eta_Sanov], '../temp_files/testSample_threshold_traffic_ano_detec_%d.pkz'%N)

In [190]:
time_range = range(num_test_sample)

eta_wc_list = []
eta_Sanov_list = []
for idx in time_range:
    eta_wc_list.append(np.array(eta_wc[key]).tolist())
    eta_Sanov_list.append(np.array(eta_Sanov[key]).tolist())

In [191]:
test_sample = []

n = num_of_samp_per_win

for idx in range(num_test_sample):
    test_sample.append(traffic_data_with_anomaly_list_quantized_trans[idx : (idx+60)])

In [192]:
KL = []
key = str(n) + '_' + str(beta)
for idx in range(num_test_sample):
    KL.append(KL_est(test_sample[idx], mu_11))
        
zdump(KL, '../temp_files/KL_%d_traffic_ano_detec.pkz'%N)

In [193]:
len(test_sample[1])

60

In [194]:
eta_wc, eta_Sanov

({'50_0.001': 1.3672671856311132}, {'50_0.001': 0.13815510557964272})

In [195]:
# KL[975:1125]

In [203]:
import matplotlib.pyplot as plt
import pylab
from pylab import *

time_range_ = [time_range[i] / 60 for i in range(len(time_range))]

KL_, = plt.plot(time_range, KL, "r--")
eta_wc_, = plt.plot(time_range, eta_wc_list, "b--")
eta_Sanov_, = plt.plot(time_range, eta_Sanov_list, "g--")

# plt.legend([KL_, eta_wc_, eta_Sanov_], ["KL divergence", \
#                                                 "estimated by WC result", \
#                                                 "estimated by Sanov's theorem"], loc=2)
plt.xlabel('time (min)')
plt.ylabel('divergence')
# plt.title('Threshold ($\eta$) versus Number of samples ($n$)')
# pylab.xlim(np.amin(n_range) - 1, np.amax(n_range) + 1)
# pylab.ylim(0, 1)
savefig('../temp_files/detec_results_N_%s.eps'%N)
plt.show()

In [202]:
time_range_ 

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
