25
25
import collections
26
26
import json
27
27
import time
28
+ import multiprocessing
28
29
29
30
PATTERN_IPV4 = re .compile (r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$" )
30
31
PATTERN_IPV6 = re .compile (r"^\[([0-9a-z:]+)\]:(\d+)$" )
@@ -90,6 +91,10 @@ def filtermultiport(ips):
90
91
hist [ip ['sortkey' ]].append (ip )
91
92
return [value [0 ] for (key ,value ) in list (hist .items ()) if len (value )== 1 ]
92
93
94
+ def resolveasn (resolver , ip ):
95
+ asn = int ([x .to_text () for x in resolver .query ('.' .join (reversed (ip .split ('.' ))) + '.origin.asn.cymru.com' , 'TXT' ).response .answer ][0 ].split ('\" ' )[1 ].split (' ' )[0 ])
96
+ return asn
97
+
93
98
# Based on Greg Maxwell's seed_filter.py
94
99
def filterbyasn (ips , max_per_asn , max_total ):
95
100
# Sift out ips by type
@@ -99,17 +104,23 @@ def filterbyasn(ips, max_per_asn, max_total):
99
104
100
105
my_resolver = dns .resolver .Resolver ()
101
106
107
+ pool = multiprocessing .Pool (processes = 16 )
108
+
102
109
# OpenDNS servers
103
110
my_resolver .nameservers = ['208.67.222.222' , '208.67.220.220' ]
104
111
112
+ # Resolve ASNs in parallel
113
+ asns = [pool .apply_async (resolveasn , args = (my_resolver , ip ['ip' ])) for ip in ips_ipv4 ]
114
+
105
115
# Filter IPv4 by ASN
106
116
result = []
107
117
asn_count = {}
108
- for ip in ips_ipv4 :
118
+ for i in range (len (ips_ipv4 )):
119
+ ip = ips_ipv4 [i ]
109
120
if len (result ) == max_total :
110
121
break
111
122
try :
112
- asn = int ([ x . to_text () for x in my_resolver . query ( '.' . join ( reversed ( ip [ 'ip' ]. split ( '.' ))) + '.origin.asn.cymru.com' , 'TXT' ). response . answer ][ 0 ]. split ( ' \" ' )[ 1 ]. split ( ' ' )[ 0 ] )
123
+ asn = asns [ i ]. get ( )
113
124
if asn not in asn_count :
114
125
asn_count [asn ] = 0
115
126
if asn_count [asn ] == max_per_asn :
@@ -127,7 +138,11 @@ def filterbyasn(ips, max_per_asn, max_total):
127
138
return result
128
139
129
140
def main ():
130
- js = json .load (sys .stdin )
141
+ if len (sys .argv ) > 1 :
142
+ with open (sys .argv [1 ], 'r' ) as f :
143
+ js = json .load (f )
144
+ else :
145
+ js = json .load (sys .stdin )
131
146
ips = [parseline (line ) for collateral , line in js .items ()]
132
147
133
148
cur_time = int (time .time ())
0 commit comments