-
Notifications
You must be signed in to change notification settings - Fork 9
/
algo.py
449 lines (368 loc) · 15.7 KB
/
algo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
"""
This module implements Community ID network flow hashing.
"""
import abc
import base64
import collections
import hashlib
import socket
import string
import struct
from communityid import error
from communityid import compat
from communityid import icmp
from communityid import icmp6
# Proper enums here would be nice, but this aims to support Python
# 2.7+ and while there are ways to get "proper" enums pre-3.0, it just
# seems overkill. --cpk
PROTO_ICMP = 1
PROTO_TCP = 6
PROTO_UDP = 17
PROTO_ICMP6 = 58
PROTO_SCTP = 132
# The set of protocols we explicitly support as port-enabled:
# Community ID computations on those protocols should be based on a
# five-tuple.
PORT_PROTOS = set([PROTO_ICMP, PROTO_TCP, PROTO_UDP, PROTO_ICMP6, PROTO_SCTP])
class FlowTuple:
"""
Tuples of network flow endpoints, used as input for the Community
ID computation. These tuple objects are flexible regarding the
input data types -- for the addresses you can use NBO byte-strings
or ASCII, for example. They usually are 5-tuples of address & port
pairs, plus IP protocol number, but port-less tuples are supported
for less common IP payloads.
"""
Data = collections.namedtuple(
'Data', ['proto', 'saddr', 'daddr', 'sport', 'dport'])
def __init__(self, proto, saddr, daddr, sport=None, dport=None,
is_one_way=False):
"""Tuple initializer.
The proto argument is a non-negative integer and represents an
IP protocol number, e.g. 6 for TCP. You can use the PROTO_*
constants if convenient, and communityid.get_proto() to help
convert to integer.
The saddr and daddr arguments are source & destination IP
addresses, either IPv4 or IPv6. Multiple data types are
supported, including bytes (as str in older Pythons, or the
explicit bytes type), IPv4Address, IPv6Address, and string
representations.
The sport and dport arguments are numeric port numbers, either
provided as ints or in packed 16-bit network byte order. When
the protocol number is one of PORT_PROTOS (TCP, UDP, etc),
they are required. For other IP protocols they are optional.
The optional Boolean is_one_way argument indicates whether the
tuple captures a bidirectional flow (the default) or
not. Setting this to true means that the computation will
consider the tuple directional and not try to pair up with
flipped-endpoint tuples. Normally you don't need to pass this.
This can raise FlowTupleErrors when the input is inconsistent.
"""
self.proto = proto
self.saddr, self.daddr = saddr, daddr
self.sport, self.dport = sport, dport
if proto is None or type(proto) != int:
raise error.FlowTupleError('Need numeric protocol number')
if saddr is None or daddr is None:
raise error.FlowTupleError('Need source and destination address')
if not self.is_ipaddr(saddr):
raise error.FlowTupleError('Unsupported format for source IP address "%s"' % saddr)
if not self.is_ipaddr(daddr):
raise error.FlowTupleError('Unsupported format for destination IP address "%s"' % daddr)
if ((sport is None and dport is not None) or
(dport is None and sport is not None)):
raise error.FlowTupleError('Need either both or no port numbers')
if sport is not None and not self.is_port(sport):
raise error.FlowTupleError('Source port "%s" invalid' % sport)
if dport is not None and not self.is_port(dport):
raise error.FlowTupleError('Destination port "%s" invalid' % dport)
if proto in PORT_PROTOS and sport is None:
raise error.FlowTupleError('Need port numbers for port-enabled protocol %s' % proto)
# Our ICMP handling directly mirrors that of Zeek, since it
# tries hardest to map ICMP into traditional 5-tuples. For
# this, it evaluates the message type & code to identify
# whether the notion of two-way communication applies. If not,
# tuple-flipping isn't an option either. The following flag
# stores this result, assuming by default we're bidirectional.
self.is_one_way = is_one_way
# The rest of the constructor requires ports.
if sport is None or dport is None:
return
# If we're explicitly told this is a one-way flow-tuple, we
# don't need to consider directionality further. And, testing
# directionality only makes sense when the ports are integers,
# not lower-level NBO representations. Throughout we need to
# keep track of the types of the ports, since the ICMP logic
# works only with regular ints.
if self.is_one_way is False:
if self.proto == PROTO_ICMP:
sport, dport, self.is_one_way = icmp.get_port_equivalents(
self._port_to_int(sport), self._port_to_int(dport))
self.sport = self._port_to_same(sport, self.sport)
self.dport = self._port_to_same(dport, self.dport)
elif self.proto == PROTO_ICMP6:
sport, dport, self.is_one_way = icmp6.get_port_equivalents(
self._port_to_int(sport), self._port_to_int(dport))
self.sport = self._port_to_same(sport, self.sport)
self.dport = self._port_to_same(dport, self.dport)
def __repr__(self):
data = self.get_data()
if data.sport is None or data.dport is None:
return '[%s] %s -> %s' % (data.proto, data.saddr, data.daddr)
return '[%s] %s/%s -> %s/%s' % (data.proto, data.saddr, data.sport,
data.daddr, data.dport)
def get_data(self):
"""
Returns a FlowTuple.Data namedtuple with this flow tuple's
data. The protocol is an integer number (e.g. 6 for TCP),
saddr and daddr are ASCII-rendered/unpacked, and the ports
are integers or None, if absent.
"""
# Absent good types, make it best-effort to get these
# renderable. If all characters are printable, we assume this
# in not NBO.
saddr, daddr, sport, dport = self.saddr, self.daddr, self.sport, self.dport
if compat.have_real_bytes_type() and isinstance(saddr, bytes):
saddr = self._addr_to_ascii(saddr)
elif compat.is_ipaddress_type(saddr):
saddr = saddr.exploded
elif not all(c in string.printable for c in saddr):
saddr = self._addr_to_ascii(saddr)
if compat.have_real_bytes_type() and isinstance(daddr, bytes):
daddr = self._addr_to_ascii(daddr)
elif compat.is_ipaddress_type(daddr):
daddr = daddr.exploded
elif not all(c in string.printable for c in daddr):
daddr = self._addr_to_ascii(daddr)
if sport is not None and not isinstance(sport, int):
sport = struct.unpack('!H', sport)[0]
if dport is not None and not isinstance(dport, int):
dport = struct.unpack('!H', dport)[0]
return FlowTuple.Data(self.proto, saddr, daddr, sport, dport)
def is_ordered(self):
return (self.is_one_way or self.saddr < self.daddr or
(self.saddr == self.daddr and
self.sport is not None and self.dport is not None and
self.sport < self.dport))
def has_ports(self):
return self.sport is not None and self.dport is not None
def in_order(self):
"""
Returns a copy of this tuple that is ordered canonically. Ie,
regardless of the direction of src/dest, the returned tuple
will be sorted the same way.
"""
if self.is_ordered():
return FlowTuple(self.proto, self.saddr, self.daddr,
self.sport, self.dport, self.is_one_way)
return FlowTuple(self.proto, self.daddr, self.saddr,
self.dport, self.sport, self.is_one_way)
def in_nbo(self):
"""
Returns a copy of this tuple where the addresses and port are
rendered into NBO byte strings.
"""
saddr = self._addr_to_nbo(self.saddr)
daddr = self._addr_to_nbo(self.daddr)
if isinstance(self.sport, int):
sport = struct.pack('!H', self.sport)
else:
sport = self.sport
if isinstance(self.dport, int):
dport = struct.pack('!H', self.dport)
else:
dport = self.dport
return FlowTuple(self.proto, saddr, daddr, sport, dport, self.is_one_way)
@staticmethod
def is_ipaddr(val):
return (FlowTuple.addr_is_text(val) or
FlowTuple.addr_is_packed(val) or
FlowTuple.addr_is_ipaddress_type(val))
@staticmethod
def addr_is_text(addr):
for family in (socket.AF_INET, socket.AF_INET6):
try:
socket.inet_pton(family, addr)
return True
except (socket.error, TypeError):
pass
return False
@staticmethod
def addr_is_packed(addr):
for family in (socket.AF_INET, socket.AF_INET6):
try:
socket.inet_ntop(family, addr)
return True
except (socket.error, ValueError, TypeError):
pass
return False
@staticmethod
def addr_is_ipaddress_type(addr):
return compat.is_ipaddress_type(addr)
@staticmethod
def is_port(val):
try:
port = int(val)
return 0 <= port <= 65535
except ValueError:
pass
try:
port = struct.unpack('!H', val)[0]
return 0 <= port <= 65535
except (struct.error, IndexError, TypeError):
pass
return False
@staticmethod
def _port_to_int(port):
"""Convert a port number to regular integer."""
if isinstance(port, int):
return port
# Assume it's two bytes in NBO:
return struct.unpack('!H', port)[0]
@staticmethod
def _port_to_nbo(port):
"""Convert a port number to 2-byte NBO."""
if isinstance(port, int):
return struct.pack('!H', port)
# Assume it's two bytes in NBO
return port
@staticmethod
def _port_to_same(port, sample):
"""Convert a port number to the same type as that of another instance."""
if isinstance(sample, int):
return FlowTuple._port_to_int(port)
return FlowTuple._port_to_nbo(port)
@staticmethod
def _addr_to_ascii(addr):
if compat.is_ipaddress_type(addr):
return addr.exploded
for family in (socket.AF_INET, socket.AF_INET6):
try:
return socket.inet_ntop(family, addr)
except (socket.error, ValueError, TypeError):
pass
return addr
@staticmethod
def _addr_to_nbo(addr):
if compat.is_ipaddress_type(addr):
return addr.packed
for family in (socket.AF_INET, socket.AF_INET6):
try:
return socket.inet_pton(family, addr)
except (socket.error, TypeError):
pass
return addr
# Convenience wrappers for making protocol-specific tuple instances.
@classmethod
def make_tcp(cls, saddr, daddr, sport, dport):
return cls(PROTO_TCP, saddr, daddr, int(sport), int(dport))
@classmethod
def make_udp(cls, saddr, daddr, sport, dport):
return cls(PROTO_UDP, saddr, daddr, int(sport), int(dport))
@classmethod
def make_sctp(cls, saddr, daddr, sport, dport):
return cls(PROTO_SCTP, saddr, daddr, int(sport), int(dport))
@classmethod
def make_icmp(cls, saddr, daddr, mtype, mcode):
return cls(PROTO_ICMP, saddr, daddr, int(mtype), int(mcode))
@classmethod
def make_icmp6(cls, saddr, daddr, mtype, mcode):
return cls(PROTO_ICMP6, saddr, daddr, int(mtype), int(mcode))
@classmethod
def make_ip(cls, saddr, daddr, proto):
return cls(proto, saddr, daddr)
class CommunityIDBase:
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def get_error(self):
"""
Error handler. After something fails during the ID computation,
this method should return an explanation why.
"""
return None
@abc.abstractmethod
def calc(self, tpl):
"""
Entrypoint to the ID computation, given a FlowTuple instance.
Returns a string containing the Community ID value, or None on
error.
"""
return None
@abc.abstractmethod
def hash(self, tpl):
"""
The tuple-hashing part of the computation. Returns hashlib
algorithm instance ready for digesting, or None on error.
"""
return None
@abc.abstractmethod
def render(self, hashstate):
"""
The rendering part of the computation. Receives a hashlib
algorithm instance and returns a string containing the
community ID value according to this instance's configuration,
or None on error.
"""
return None
class CommunityID(CommunityIDBase):
"""
An algorithm object that computes Community IDs on FlowTuple instances.
"""
def __init__(self, seed=0, use_base64=True):
self._version = 1
self._seed = seed
self._use_base64 = use_base64
self._err = None
def __repr__(self):
return 'CommunityID(v=%s,seed=%s,base64=%s)' \
% (self._version, self._seed, self._use_base64)
def get_error(self):
"""
Returns an error string when problems came up during the
computation. This is only valid directly after calc() returned
None, i.e., something went wrong during the calculation.
"""
return self._err
def calc(self, tpl):
"""
The biggie: given a FlowTuple instance, returns a string
containing the Community ID. In case of problems, returns
None. In that case consider get_error() to learn more about
what happened.
"""
tpl = tpl.in_nbo().in_order()
return self.render(self.hash(tpl))
def hash(self, tpl):
hashstate = hashlib.sha1()
def hash_update(data):
# Handy for troubleshooting: shows exact byte sequence hashed
#hexbytes = ':'.join('%02x' % ord(b) for b in data)
#print('XXX %s' % hexbytes)
hashstate.update(data)
return len(data)
try:
dlen = hash_update(struct.pack('!H', self._seed)) # 2-byte seed
dlen += hash_update(tpl.saddr) # 4 bytes (v4 addr) or 16 bytes (v6 addr)
dlen += hash_update(tpl.daddr) # 4 bytes (v4 addr) or 16 bytes (v6 addr)
dlen += hash_update(struct.pack('B', tpl.proto)) # 1 byte for transport proto
dlen += hash_update(struct.pack('B', 0)) # 1 byte padding
if tpl.has_ports():
dlen += hash_update(tpl.sport) # 2 bytes
dlen += hash_update(tpl.dport) # 2 bytes
except struct.error as err:
self._err = 'Could not pack flow tuple: %s' % err
return None
# The data structure we hash should always align on 32-bit
# boundaries.
if dlen % 4 != 0:
self._err = 'Unexpected hash input length: %s' % dlen
return None
return hashstate
def render(self, hashstate):
if hashstate is None:
return None
# Unless the user disabled the feature, base64-encode the
# (binary) hash digest. Otherwise, print the ASCII digest.
if self._use_base64:
return str(self._version) + ':' + base64.b64encode(hashstate.digest()).decode('ascii')
return str(self._version) + ':' + hashstate.hexdigest()