@@ -146,13 +146,15 @@ def __init__(self, period, granu, shift):
#self.out_rows = set() # deleted rows
#self.out_cols = set()
self .in_value_rset = dict ()
self .in_value_cset = dict ()
self .in_value2rowset = dict ()
self .in_value2colset = dict ()
self .out_value2rowset = dict ()
self .out_value2colset = dict ()
self .in_row_ones = dict () # row number: quantity of 1s
self .in_col_ones = dict () # col number: quantity of 1s
self .out_row_ones = dict () # row number: quantity of 1s
self .out_col_ones = dict () # col number: quantity of 1s
# self.in_row_ones = dict() # row number: quantity of 1s
# self.in_col_ones = dict() # col number: quantity of 1s
# self.out_row_ones = dict() # row number: quantity of 1s
# self.out_col_ones = dict() # col number: quantity of 1s
# update these 4 attributes after each matrix manipulation
self .in_candi_row = - 1
@@ -600,44 +602,41 @@ def analyze_bmatrix(self, unix_dt):
logging .info ('found event!' )
def min_key_random_value (self , value_set_dict ):
min = - 999999999
for v in value_set_dict .keys ():
if v < min :
min = v
return min , value_set_dict [min ][0 ]
# initialize event row and column attributes
def init_attri (self ):
def init_attri (self , all_rows , all_cols ):
col2ones = dict ()
for c in all_cols :
col2ones [c ] = 0
total_sum = 0.0
for r in self .in_row_ones .keys ():
#self.row_weight[r] = 0
for r in all_rows :
sum = 0
for c in self . in_col_ones . keys () :
value = self .bmatrix [r ,c ]
for c in all_cols :
value = self .bmatrix [r , c ]
sum += value
self . in_col_ones [c ] += value
col2ones [c ] += value
total_sum += value
self .in_row_ones [r ] = sum
try :
self .in_value_rset [sum ].add (r )
self .in_value2rowset [sum ].add (r )
except :
self .in_value_rset [sum ] = ([r ])
self .in_value2rowset [sum ] = ([r ])
for c in self . in_col_ones . keys () :
value = self .in_col_ones [c ]
for c in all_cols :
value = self .col2ones [c ]
try :
self .in_value_cset [value ].add (c )
self .in_value2colset [value ].add (c )
except :
self .in_value_cset [value ] = ([c ])
self .in_value2colset [value ] = ([c ])
self .in_cr_ones , self .in_candi_row = self .min_key_random_value (self .in_value_rset )
self .in_cc_ones , self .in_candi_col = self .min_key_random_value (self .in_value_cset )
small = min (self .in_value2rowset .keys ())
self .in_candi_row = self .in_value2rowset [small ].pop ()
self .in_value2rowset [small ].add (self .in_candi_row )
self .in_cr_ones = small
self .in_cc_ones , self .in_candi_col = self .min_key_random_value (self .in_value2colset )
self .event_ones = total_sum
self .event_den = total_sum / self .event_size
@@ -755,30 +754,32 @@ def analyze_bmatrix_new(self, unix_dt):
def analyze_bmatrix_plusminus (self , unix_dt ):
#--------------------------
#initialize the event submatrix to the original matrix
#------------------------------
# record the rows and columns after pre-processing
all_rows = set ()
all_cols = set ()
for index in xrange (0 , len (self .bmatrix .tolist ())):
self . in_row_ones [ index ] = 0
all_rows . add ( index )
for index in xrange (0 , int (self .mo_number )):
self . in_col_ones [ index ] = 0
all_cols . add ( index )
#-------------------
# preprocess the matrix
self .event_height = self .bmatrix .shape [0 ]
self .event_width = self .bmatrix .shape [1 ]
# preprocessing the matrix
self .event_height = len (all_rows )
min_row_sum = 0.2 * self .thre_width
for i in xrange (0 , self .event_height ):
if self .bmatrix [i ].sum () <= min_row_sum :
del self . in_row_ones [ i ]
all_rows . remove ( i )
self .event_height -= 1
self .event_width = len (all_cols )
min_col_sum = 0.2 * (float (self .thre_size ) / float (self .thre_width ))
for i in xrange (0 , self .event_width ):
if self .bmatrix [:,i ].sum () <= min_col_sum :
self .in_cols .remove (i )
del self .in_col_ones [i ]
all_cols ,remove (i )
self .event_width -= 1
self .event_size = float (self .event_height * self .event_width )
@@ -787,7 +788,7 @@ def analyze_bmatrix_plusminus(self, unix_dt):
logging .info ('%d : too small after preprocessing' , unix_dt )
return - 1
self .init_attri () # initialize row and col attributes and event density
self .init_attri (all_rows , all_cols )
#--------------------
# process the matrix
@@ -831,7 +832,7 @@ def analyze_bmatrix_plusminus(self, unix_dt):
# consider the width threshold
# we ignore any height threshold because the size threshold will be adequate
if self .event_width - 1 < self .thre_width : # cannot delete any more columns
cols_du = - 1
cols_du = - 999
if rows_du >= cols_du :
self .event_del_row ()
@@ -842,7 +843,7 @@ def analyze_bmatrix_plusminus(self, unix_dt):
# addition in the end
# TODO code here
# TODO code here deal with no possible addition
while (self .event_den >= self .thre_den ):
try :
print self .out_cr_ones
@@ -893,152 +894,238 @@ def analyze_bmatrix_plusminus(self, unix_dt):
def event_add_row (self ):
index = self .out_candi_row
ones_value = self .out_cr_ones
self .event_size += self .event_width
self .event_height += 1
self .event_ones += self . row_ones [ index ]
self .event_ones += ones_value
self .event_den = self .event_ones / self .event_size
value = self .out_row_ones [index ]
del self .out_row_ones [index ]
self .in_row_ones [index ] = value
out_max = - 999999999
for j in self .out_col_ones .keys ():
new_value = self .out_col_ones [j ] + self .bmatrix [index , j ]
self .out_col_ones [j ] = new_value
if new_value > out_max :
out_max = new_value
self .out_candi_col = j
self .out_cc_ones = out_max
in_min = 999999999
self .in_value_cset = {}
for j in self .in_col_ones .keys ():
new_value = self .in_col_ones [j ] + self .bmatrix [index , j ]
self .in_col_ones [j ] = new_value
try :
self .in_value_cset [new_value ].add (j )
except :
self .in_value_cset [new_value ] = ([j ])
if new_value < in_min :
in_min = new_value
self .in_candi_col = j
self .in_cc_ones = in_min
# new in candidate remains
try :
self .in_value2rowset [ones_value ].add (index )
except :
self .in_value2rowset [ones_value ] = ([index ])
# get new out candidate
self .out_value2rowset [ones_value ].remove (index )
if len (self .out_value2rowset [ones_value ]) is 0 :
del self .out_value2rowset [ones_value ]
def event_del_row (self , index ):
max = max (self .out_value2rowset .keys ())
self .out_candi_row = self .out_value2rowset [max ].pop ()
self .out_value2rowset [max ].add (self .out_candi_row )
else :
self .out_candi_row = self .out_value2rowset [ones_value ].pop ()
self .out_value2rowset .add (self .out_candi_row ) # must
# get new out column candidate
tmpdict = dict ()
for v in self .out_value2colset :
for col in self .out_value2colset [v ]:
new_value = v + self .bmatrix [index , col ]
try :
tmpdict [new_value ].add (col )
except :
tmpdict [new_value ] = ([col ])
self .out_value2colset = tmpdict
max = max (self .out_value2colset .keys ())
self .out_candi_col = self .out_value2colset [max ].pop ()
self .out_value2colset .add (self .out_candi_col )
# get new in column candidate
tmpdict = dict ()
for v in self .in_value2colset :
for col in self .in_value2colset [v ]:
new_value = v + self .bmatrix [index , col ]
try :
tmpdict [new_value ].add (col )
except :
tmpdict [new_value ] = ([col ])
self .in_value2colset = tmpdict
small = min (self .in_value2colset .keys ())
self .in_candi_col = self .in_value2colset [small ].pop ()
self .in_value2colset .add (self .in_candi_col )
def event_del_row (self ):
index = self .in_candi_row
ones_value = self .in_cr_ones
self .event_size -= self .event_width
self .event_height -= 1
self .event_ones -= self . row_ones [ index ]
self .event_ones -= ones_value
self .event_den = self .event_ones / self .event_size
value = self .in_row_ones [index ]
del self .in_row_ones [index ]
self .out_row_ones [index ] = value
out_max = - 999999999
for j in self .out_col_ones .keys ():
new_value = self .out_col_ones [j ] + self .bmatrix [index , j ]
self .out_col_ones [j ] = new_value
if new_value > out_max :
out_max = new_value
self .out_candi_col = j
self .out_cc_ones = out_max
in_min = 999999999
self .in_value_cset = {}
for j in self .in_col_ones .keys ():
new_value = self .in_col_ones [j ] + self .bmatrix [index , j ]
self .in_col_ones [j ] = new_value
try :
self .in_value_cset [new_value ].add (j )
except :
self .in_value_cset [new_value ] = ([j ])
if new_value < in_min :
in_min = new_value
self .in_candi_col = j
self .in_cc_ones = in_min
# get new in row candidate
self . in_value2rowset [ ones_value ]. remove ( index )
if len ( self .in_value2rowset [ ones_value ]) is 0 :
del self .in_value2rowset [ ones_value ]
small = min (self .in_value2rowset .keys ())
self .in_candi_row = self .in_value2rowset [small ].pop ()
self .in_value2rowset [min ].add (self .in_candi_row )
else : # this condition holds most of the time, which is efficient
self .in_candi_row = self .in_value2rowset [ones_value ].pop ()
self .in_value2rowset .add (self .in_candi_row ) # must
def event_add_col (self , index ):
# out row candidate does not change
try :
self .out_value2rowset [ones_value ].add (index )
except :
self .out_value2rowset [ones_value ] = ([index ])
# get new out column candidate
tmpdict = dict ()
for v in self .out_value2colset :
for col in self .out_value2colset [v ]:
new_value = v - self .bmatrix [index , col ]
try :
tmpdict [new_value ].add (col )
except :
tmpdict [new_value ] = ([col ])
self .out_value2colset = tmpdict
max = max (self .out_value2colset .keys ())
self .out_candi_col = self .out_value2colset [max ].pop ()
self .out_value2colset .add (self .out_candi_col )
# get new in column candidate
tmpdict = dict ()
for v in self .in_value2colset :
for col in self .in_value2colset [v ]:
new_value = v - self .bmatrix [index , col ]
try :
tmpdict [new_value ].add (col )
except :
tmpdict [new_value ] = ([col ])
self .in_value2colset = tmpdict
small = min (self .in_value2colset .keys ())
self .in_candi_col = self .in_value2colset [small ].pop ()
self .in_value2colset .add (self .in_candi_col )
def event_add_col (self ):
index = self .out_candi_col
ones_value = self .out_cc_ones
self .event_size += self .event_height
self .event_width += 1
self .event_ones += self . col_ones [ index ]
self .event_ones += ones_value
self .event_den = self .event_ones / self .event_size
self .in_cols .add (index )
self .out_cols .remove (index )
value = self .out_col_ones [index ]
del self .out_col_ones [index ]
self .in_col_ones [index ] = value
out_max = - 999999999
for i in self .out_row_ones .keys ():
new_value = self .out_row_ones [i ] + self .bmatrix [i , index ]
self .out_row_ones [i ] = new_value
if new_value > out_max :
out_max = new_value
self .out_candi_row = i
self .out_cr_ones = out_max
in_min = 999999999
self .in_value_rset = {}
for i in self .in_row_ones .keys ():
new_value = self .in_row_ones [i ] + self .bmatrix [i , index ]
self .in_row_ones [i ] = new_value
try :
self .in_value_rset [new_value ].add (i )
except :
self .in_value_rset [new_value ] = ([i ])
if new_value < in_min :
in_min = new_value
self .in_candi_row = i
self .in_cr_ones = in_min
# new in candidate remains
try :
self .in_value2colset [ones_value ].add (index )
except :
self .in_value2colset [ones_value ] = ([index ])
def event_del_col (self , index ):
# get new out candidate
self .out_value2colset [ones_value ].remove (index )
if len (self .out_value2colset [ones_value ]) is 0 :
del self .out_value2colset [ones_value ]
max = max (self .out_value2colset .keys ())
self .out_candi_col = self .out_value2colset [max ].pop ()
self .out_value2colset [max ].add (self .out_candi_col )
else :
self .out_candi_col = self .out_value2colset [ones_value ].pop ()
self .out_value2colset .add (self .out_candi_col ) # must
# get new out row candidate
tmpdict = dict ()
for v in self .out_value2rowset :
for row in self .out_value2rowset [v ]:
new_value = v + self .bmatrix [row , index ]
try :
tmpdict [new_value ].add (row )
except :
tmpdict [new_value ] = ([row ])
self .out_value2rowset = tmpdict
max = max (self .out_value2rowset .keys ())
self .out_candi_row = self .out_value2rowset [max ].pop ()
self .out_value2rowset .add (self .out_candi_row )
# get new in row candidate
tmpdict = dict ()
for v in self .in_value2rowset :
for row in self .in_value2rowset [v ]:
new_value = v + self .bmatrix [row , index ]
try :
tmpdict [new_value ].add (row )
except :
tmpdict [new_value ] = ([row ])
self .in_value2rowset = tmpdict
small = min (self .in_value2rowset .keys ())
self .in_candi_row = self .in_value2rowset [small ].pop ()
self .in_value2rowset .add (self .in_candi_row )
def event_del_col (self ):
index = self .in_candi_col
ones_value = self .in_cc_ones
self .event_size -= self .event_height
self .event_width -= 1
self .event_ones -= self . col_ones [ index ]
self .event_ones -= ones_value
self .event_den = self .event_ones / self .event_size
value = self .in_col_ones [index ]
del self .in_col_ones [index ]
self .out_col_ones [index ] = value
out_max = - 999999999
for i in self .out_row_ones .keys ():
new_value = self .out_row_ones [i ] + self .bmatrix [i , index ]
self .out_row_ones [i ] = new_value
if new_value > out_max :
out_max = new_value
self .out_candi_row = i
self .out_cr_ones = out_max
in_min = 999999999
self .in_value_rset = {}
for i in self .in_row_ones .keys ():
new_value = self .in_row_ones [i ] + self .bmatrix [i , index ]
self .in_row_ones [i ] = new_value
try :
self .in_value_rset [new_value ].add (i )
except :
self .in_value_rset [new_value ] = ([i ])
if new_value < in_min :
in_min = new_value
self .in_candi_row = i
self .in_cr_ones = in_min
# get new in col candidate
self .in_value2colset [ones_value ].remove (index )
if len (self .in_value2colset [ones_value ]) is 0 :
del self .in_value2colset [ones_value ]
small = min (self .in_value2colset .keys ())
self .in_candi_col = self .in_value2colset [small ].pop ()
self .in_value2colset [small ].add (self .in_candi_col )
else :
self .in_candi_col = self .in_value2colset [ones_value ].pop ()
self .in_value2colset .add (self .in_candi_col ) # must
# out col candidate does not change
try :
self .out_value2colset [ones_value ].add (index )
except :
self .out_value2colset [ones_value ] = ([index ])
# get new out row candidate
tmpdict = dict ()
for v in self .out_value2rowset :
for row in self .out_value2rowset [v ]:
new_value = v - self .bmatrix [row , index ]
try :
tmpdict [new_value ].add (row )
except :
tmpdict [new_value ] = ([row ])
self .out_value2rowset = tmpdict
max = max (self .out_value2rowset .keys ())
self .out_candi_row = self .out_value2rowset [max ].pop ()
self .out_value2rowset .add (self .out_candi_row )
# get new in row candidate
tmpdict = dict ()
for v in self .in_value2rowset :
for row in self .in_value2rowset [v ]:
new_value = v - self .bmatrix [row , index ]
try :
tmpdict [new_value ].add (row )
except :
tmpdict [new_value ] = ([row ])
self .in_value2rowset = tmpdict
small = min (self .in_value2rowset .keys ())
self .in_candi_row = self .in_value2rowset [small ].pop ()
self .in_value2rowset .add (self .in_candi_row )
def event_rm_line_ronly (self , index ): # do not remove column any more
@@ -1107,14 +1194,14 @@ def get_dict_min_list(self, mydict, keylist):
keylist = mydict .keys ()
mylist = list ()
min = 9999999999
small = 9999999999
for k in keylist :
value = mydict [k ]
if value == min :
if value == small :
mylist .append (k )
elif value < min :
min = value
elif value < small :
small = value
mylist = [k ]
return mylist