In [8]:
import numpy as np
from modules import Rule
import ipaddress


def load_ruleset(fname, except_zero = True, random_priority = 0):
	""" Load ruleset from ClassBench filter file """
	"""
	(expect_zero = True): due to zero nodes(0.0.0.0/n) causes huge computing resource, you can exclude all zero nodes here.
	(random_priorit = n): from 0 to n. if n = 0, use the sequence of the ruleset as each rule's priority.
	"""
	ruleset = []
	with open(fname, 'r') as f:
		for n, line in enumerate(f):
			# LINE FORMAT
			# @sip_network dip_network sp_low : sp_high dp_low: dp_high protocal/protocol_mask xxx/xxx
			tok = line.strip().split('\t')
			rule = Rule()
			sip = ipaddress.ip_network(tok[0][1:])
			dip = ipaddress.ip_network(tok[1])
			sp = tok[2].split(':')
			dp = tok[3].split(':')
			protocol = tok[4].split('/')
			
			if except_zero:
				if int(sip[0]) == 0 or int(dip[0]) == 0:
					continue
			#sip[0] : given a IP/mask, upper bound of IP address
			#sip[-1] : given a IP/mask, lower bound of IP address
			rule.sip_low = int(sip[0])
			rule.sip_high = int(sip[-1])
			rule.dip_low = int(dip[0])
			rule.dip_high = int(dip[-1])
			rule.sp_low, rule.sp_high = int(sp[0]), int(sp[1])
			rule.dp_low, rule.dp_high = int(dp[0]), int(dp[1])
			rule.protocol_val, rule.protocol_mask = int(protocol[0], 16), int(protocol[1], 16)

			if random_priority:
				rule.priority = int(random.randint(0, random_priority))
			else:
				rule.priority = n
			ruleset.append(rule)
	return ruleset

def cut2points(ruleset):
    sip = []
    dip = []
    sp = []
    dp = []
    for i in ruleset:
        sip.append(i.sip_low)
        sip.append(i.sip_high)
        dip.append(i.dip_low)
        dip.append(i.dip_high)
        sp.append(i.sp_low)
        sp.append(i.sp_high)
        dp.append(i.dp_low)
        dp.append(i.dp_high)
    return sorted(set(sip)), sorted(set(dip)), sorted(set(sp)), sorted(set(dp))

def list2mapping(a):
    mapping = {}
    for i,v in enumerate(a):
        mapping[v] = i
    return mapping

def get_point_index(rule,sip_map, dip_map, sp_map, dp_map):
    sip_low = sip_map[rule.sip_low]
    sip_high = sip_map[rule.sip_high]
    dip_low = dip_map[rule.dip_low]
    dip_high = dip_map[rule.dip_high]
    sp_low = sp_map[rule.sp_low]
    sp_high = sp_map[rule.sp_high]
    dp_low = dp_map[rule.dp_low]
    dp_high = dp_map[rule.dp_high]
    return sip_low, sip_high, dip_low, dip_high, sp_low, sp_high, dp_low, dp_high

In [10]:
"""this cell read the ruleset and cut the full space based on the rules' ranges"""
ruleset = load_ruleset("../data/fw filters/MyFilters10k_{}.txt".format(3), False)
sip, dip, sp, dp = cut2points(ruleset) # returns a set
sip_len = len(sip)
dip_len = len(dip)
sp_len = len(sp)
dp_len = len(dp)
print("current division of 4 dimension is: sip: {} , dip: {} , sp: {} ,dp: {}".format(sip_len, dip_len, sp_len, dp_len))

current division of 4 dimension is: sip: 5484 , dip: 10321 , sp: 15 ,dp: 45


In [11]:
"""construct a map(Using map is for quick searching with O(1) complexity. It can accelerate the process where squares be 
put into the divided space and quickly find their correct position."""
sip_map = list2mapping(sip)
dip_map = list2mapping(dip)
sp_map = list2mapping(sp)
dp_map = list2mapping(dp)

In [12]:
"""this cell using numpy function to construct a extra-huge space and the dimension is based on previous cutting"""
temp_1 = np.zeros((sip_len-1, dip_len-1, sp_len-1, dp_len-1), dtype=np.uint8)
print("the space has shape of: {}".format(temp_1.shape))

the space has shape of: (5483, 10320, 14, 44)


In [13]:
%%time
"""this cell implements putting squares into the devided space. 
It uses 1000 cycles here(about 1s) and for 10K, it is about 10s"""
for i in ruleset[0:1000]: # test 1000 rules
    sip_low, sip_high, dip_low, dip_high, sp_low, sp_high, dp_low, dp_high = get_point_index(i, sip_map, dip_map, sp_map, dp_map)
    print("{} {} {} {} {} {} {} {}".format(sip_low, sip_high, dip_low, dip_high, sp_low, sp_high, dp_low, dp_high))
    temp_1[sip_low:sip_high, dip_low:dip_high, sp_low:sp_high+1, dp_low:dp_high+1] += 1 
    # this is the main problem where sp_low:sp_high may be 4:4 and means nothing. Here I use sp_low:sp_high+1, which means 4:5 
    # hit the 4:4 rule, and 14:15 hits the 14:14 rule.

2335 2336 4412 4413 4 4 14 14
5465 5466 6084 6085 1 1 5 5
2072 2073 2207 2208 1 1 24 24
4847 4848 6110 6111 1 1 3 3
656 657 1244 1245 8 8 3 3
1048 1049 1904 1905 5 5 15 15
1353 1354 2924 2925 8 8 6 6
1361 1362 2426 2427 1 1 12 12
2133 2134 4289 4290 5 5 5 5
2193 2194 2262 2263 1 1 38 38
2595 2596 4537 4538 1 1 2 2
4787 4788 9033 9034 3 3 25 25
5096 5097 9360 9361 8 8 36 36
4303 4304 8199 8200 10 10 10 10
3540 3541 6964 6965 4 4 24 24
220 221 3007 3008 3 3 7 7
331 332 2886 2887 2 2 15 15
2094 2095 3957 3958 4 4 15 15
2655 2656 5054 5055 8 8 21 21
1561 1562 2841 2842 10 10 3 3
5109 5110 9358 9359 11 11 10 10
4257 4258 5967 5968 1 1 7 7
3232 3233 8103 8104 2 2 3 3
585 586 1085 1086 8 8 12 12
844 845 1554 1555 1 1 12 12
4115 4116 5926 5927 10 10 10 10
4041 4042 7739 7740 4 4 5 5
2747 2748 5271 5272 5 5 7 7
686 687 1289 1290 8 8 3 3
1177 1178 2132 2133 5 5 10 10
1323 1324 2354 2355 5 5 2 2
806 807 1642 1643 1 1 7 7
2142 2143 4071 4072 8 8 7 7
5437 5438 10211 10212 1 1 4 4
4920 4921 9314 931

0 5483 4392 4393 5 5 23 23
0 5483 4197 4198 1 1 35 35
0 5483 4174 4175 5 5 10 10
0 5483 4258 4259 5 5 3 3
0 5483 4296 4297 1 1 33 33
0 5483 3801 3802 2 2 10 10
0 5483 4105 4106 5 5 15 15
0 5483 3135 3136 2 2 7 7
0 5483 2539 2540 6 6 10 10
0 5483 2609 2610 5 5 7 7
0 5483 245 246 1 1 7 7
0 5483 205 206 2 2 7 7
0 5483 824 825 10 10 7 7
0 5483 7525 7526 5 5 9 9
0 5483 7691 7692 6 6 9 9
0 5483 7693 7694 6 6 15 15
0 5483 7768 7769 10 10 40 40
0 5483 7435 7436 5 5 2 2
0 5483 6856 6857 6 6 23 23
0 5483 6646 6647 6 6 7 7
0 5483 6682 6683 1 1 22 22
0 5483 5322 5323 5 5 31 31
0 5483 9425 9426 5 5 2 2
647 648 2970 2971 0 14 0 44
189 190 3011 3012 0 14 0 44
226 227 3004 3005 0 14 0 44
82 83 173 174 0 14 0 44
1307 1308 2349 2350 0 14 0 44
1245 1246 2878 2879 0 14 0 44
5342 5343 10082 10083 0 14 0 44
4926 4927 9599 9600 0 14 0 44
4943 4944 9243 9244 0 14 0 44
4953 4954 9369 9370 0 14 0 44
4959 4960 9521 9522 0 14 0 44
5038 5039 9543 9544 0 14 0 44
4244 4245 5970 5971 0 14 0 44
4285 4286 8036 8037 0 1