Skip to content

Commit 5fc4072

Browse files
authored
Parallel ASN resolve and allow passing of input file names to makeseeds.py (#2432)
* Resolve ASNs in parallel * Allow passing of filename to makeseeds.py
1 parent c27e629 commit 5fc4072

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

contrib/seeds/makeseeds.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import collections
2626
import json
2727
import time
28+
import multiprocessing
2829

2930
PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$")
3031
PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$")
@@ -90,6 +91,10 @@ def filtermultiport(ips):
9091
hist[ip['sortkey']].append(ip)
9192
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
9293

94+
def resolveasn(resolver, ip):
95+
asn = int([x.to_text() for x in resolver.query('.'.join(reversed(ip.split('.'))) + '.origin.asn.cymru.com', 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
96+
return asn
97+
9398
# Based on Greg Maxwell's seed_filter.py
9499
def filterbyasn(ips, max_per_asn, max_total):
95100
# Sift out ips by type
@@ -99,17 +104,23 @@ def filterbyasn(ips, max_per_asn, max_total):
99104

100105
my_resolver = dns.resolver.Resolver()
101106

107+
pool = multiprocessing.Pool(processes=16)
108+
102109
# OpenDNS servers
103110
my_resolver.nameservers = ['208.67.222.222', '208.67.220.220']
104111

112+
# Resolve ASNs in parallel
113+
asns = [pool.apply_async(resolveasn, args=(my_resolver, ip['ip'])) for ip in ips_ipv4]
114+
105115
# Filter IPv4 by ASN
106116
result = []
107117
asn_count = {}
108-
for ip in ips_ipv4:
118+
for i in range(len(ips_ipv4)):
119+
ip = ips_ipv4[i]
109120
if len(result) == max_total:
110121
break
111122
try:
112-
asn = int([x.to_text() for x in my_resolver.query('.'.join(reversed(ip['ip'].split('.'))) + '.origin.asn.cymru.com', 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
123+
asn = asns[i].get()
113124
if asn not in asn_count:
114125
asn_count[asn] = 0
115126
if asn_count[asn] == max_per_asn:
@@ -127,7 +138,11 @@ def filterbyasn(ips, max_per_asn, max_total):
127138
return result
128139

129140
def main():
130-
js = json.load(sys.stdin)
141+
if len(sys.argv) > 1:
142+
with open(sys.argv[1], 'r') as f:
143+
js = json.load(f)
144+
else:
145+
js = json.load(sys.stdin)
131146
ips = [parseline(line) for collateral, line in js.items()]
132147

133148
cur_time = int(time.time())

0 commit comments

Comments
 (0)