In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
%load_ext cython

In [1]:
from traceutils.as2org.as2org import AS2Org
from glob import glob
import bdrmapit_parser.pyparser as p
import bdrmapit_parser.parser.cyparser as c
import bdrmapit_parser.graph.construct as gc
import ujson as json
import pickle
from traceutils.radix.ip2as import IP2AS, create_table
from traceutils.scamper.warts import WartsReader
from time import time

In [2]:
ip2as = create_table('../ip2as/test.csv')

In [3]:
as2org = AS2Org('../retrieve-external/orgs/20181001.as-org2info.txt.gz')

In [4]:
files = [c.TraceFile(file, c.OutputType.WARTS) for file in sorted(glob('../retrieve-external/team/*.warts.gz'))]
len(files)

236

In [5]:
results = c.parse_sequential(files[:1], ip2as)

[KParsing traceroute files 100.00% (1 / 1). Addrs 47,078 Adjs 59,311 DPs 118,008 MPLS 4,570


In [6]:
# %%timeit -r1 -n1
s = time()
if __name__ == '__main__':
    parseres = c.parse_parallel(files, ip2as, 12)
e = time()
(e - s) / 60

[KParsing traceroute files 100.00% (236 / 236). Addrs 1,409,820 Adjs 4,289,920 DPs 16,377,831 MPLS 69,374


10.449943395455678

In [19]:
%%cython
from bdrmapit_parser.parser.cyparser cimport ParseResults
from traceutils.radix.ip2as cimport IP2AS
from collections import defaultdict

cdef dict listify(d):
    cdef str k
    cdef set v
    return {k: list(v) for k, v in d.items()}

cpdef dict build_graph_json(ParseResults parseres, IP2AS ip2as):
    cdef dict results = {'addrs': list(parseres.addrs), 'mpls': list(parseres.mpls)}
    cdef set remaining = set()
    cdef str x, y
    cdef int distance

    nexthop = defaultdict(set)
    multi = defaultdict(set)
    for x, y, distance in parseres.adjs:
        if distance == 1 or ip2as[x] == ip2as[y]:
            nexthop[x].add(y)
        elif distance > 0:
            remaining.add((x, y))
    for x, y in remaining:
        if x not in nexthop:
            multi[x].add(y)
    results['nexthop'] = listify(nexthop)
    results['multi'] = listify(multi)
    dests = defaultdict(set)
    for addr, asn in parseres.dps:
        dests[addr].add(asn)
    results['dps'] = listify(dests)
    return results

def build_graph(parseres, ip2as: IP2AS):
    results = {'addrs': parseres.addrs, 'mpls': parseres.mpls}
    remaining = set()
    nexthop = defaultdict(set)
    multi = defaultdict(set)
    for x, y, distance in parseres.adjs:
        if distance == 1 or ip2as[x] == ip2as[y]:
            nexthop[x].add(y)
        elif distance > 0:
            remaining.add((x, y))
    for x, y in remaining:
        if x not in nexthop:
            multi[x].add(y)
    results['nexthop'] = dict(nexthop)
    results['multi'] = dict(multi)
    dests = defaultdict(set)
    for addr, asn in parseres.dps:
        dests[addr].add(asn)
    results['dps'] = dict(dests)
    return results

In [7]:
s = time()
results = c.build_graph_json(parseres, ip2as)
e = time()
e - s

49.89645791053772

In [8]:
s = time()
with open('results.pickle', 'wb') as f:
    pickle.dump(results, f)
e = time()
e - s

6.298480033874512

In [10]:
s = time()
with open('results.pickle', 'rb') as f:
    results2 = pickle.load(f)
e = time()
e - s

8.602235078811646

In [5]:
s = time()
graph = gc.construct_graph(results['addrs'], results['nexthop'], results['multi'], results['dps'], results['mpls'], ip2as, as2org)
e = time()
e - s

23.512253999710083

In [3]:
with open('test.pickle', 'rb') as f:
    results = pickle.load(f)
len(results)

3

In [4]:
addrs = results['addrs']
adjs = results['adjs']
dps = results['dps']

In [25]:
s = time()
with open('results.pickle', 'wb') as f:
    pickle.dump(results, f)
e = time()
e - s

26.92295002937317

In [7]:
s = time()
with open('results2.pickle', 'wb') as f:
    pickle.dump(results2, f)
e = time()
e - s

5.392574787139893

In [10]:
s = time()
with open('results2.json', 'w') as f:
    json.dump(results2, f)
e = time()
e - s

6.725600004196167

In [5]:
s = time()
results2 = c.build_graph_json(addrs, adjs, dps, ip2as)
e = time()
e - s

37.104645013809204

In [4]:
s = time()
with open('results2.pickle', 'rb') as f:
    results2 = pickle.load(f)
e = time()
e - s

5.855299234390259

In [5]:
s = time()
graph = gc.construct_graph(results2['addrs'], results2['nexthop'], results2['multi'], results2['dps'], ip2as, as2org)
e = time()
e - s

23.512253999710083

In [10]:
trace

01: 84.88.81.121
02: 84.88.19.149
03: 130.206.211.69
04: 130.206.245.121
05: 149.11.68.49
06: 154.25.1.109
07: 130.117.50.201
08: 130.117.14.202
09: 193.251.131.133
12: 193.251.255.102
13: 87.237.20.76
14: 87.237.20.157

In [4]:
%timeit -r1 -n1 c.parse_sequential(files[:2], ip2as)

[KParsing traceroute files 50.00% (1 / 2). Addrs 54,500 Adjs 68,365 DPs 156,770

7.84 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


[KParsing traceroute files 100.00% (2 / 2). Addrs 86,562 Adjs 119,203 DPs 258,607[KParsing traceroute files 100.00% (2 / 2). Addrs 86,562 Adjs 119,203 DPs 258,607


In [9]:
import pickle

In [8]:
results = {'addrs': addrs, 'adjs': adjs, 'dps': dps}

In [10]:
with open('test.pickle', 'wb') as f:
    pickle.dump(results, f)

In [3]:
%timeit p.parse('../retrieve-external/team/daily.l7.t1.c006867.20180801.bed-us.warts.gz', p.OutputType.WARTS, ip2as)

3.51 s ± 30.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
addrs = set()
adjs = set()
dps = set()

In [5]:
%timeit c.parse('../retrieve-external/team/daily.l7.t1.c006867.20180801.bed-us.warts.gz', c.OutputType.WARTS, ip2as, addrs=addrs, adjs=adjs, dps=dps)

3.13 s ± 68.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
addrs, adjs, dps = p.parse('../retrieve-external/team/daily.l7.t1.c006867.20180801.bed-us.warts.gz', p.OutputType.WARTS, ip2as)
len(addrs), len(adjs), len(dps)

(56503, 70127, 121340)

In [13]:
dps

{('50.242.149.2', 26223),
 ('87.226.229.250', 35154),
 ('68.85.106.165', 12066),
 ('4.68.71.217', 12767),
 ('129.250.3.150', 2715),
 ('208.83.127.237', 54508),
 ('84.116.137.193', 41635),
 ('1.215.78.126', 3786),
 ('103.200.13.106', 45671),
 ('4.68.71.217', 41750),
 ('186.3.125.249', 264668),
 ('112.174.85.30', 17866),
 ('213.228.22.90', 12322),
 ('50.242.151.70', 19271),
 ('91.23.197.161', 3320),
 ('212.103.130.1', 204500),
 ('199.192.70.17', 32281),
 ('172.19.128.6', 23908),
 ('68.86.85.22', 2635),
 ('217.141.104.193', 3269),
 ('149.6.155.74', 36992),
 ('142.165.60.129', 803),
 ('187.120.9.78', 263855),
 ('96.120.69.61', 20928),
 ('68.86.86.242', 9811),
 ('66.253.205.253', 23473),
 ('188.111.217.211', 3209),
 ('68.87.158.157', 395871),
 ('221.5.239.74', 4837),
 ('96.120.69.61', 22363),
 ('68.86.90.173', 133799),
 ('62.115.44.26', 31967),
 ('27.85.131.186', 2516),
 ('68.85.106.165', 50719),
 ('68.87.158.197', 13611),
 ('68.87.158.193', 45454),
 ('94.142.98.199', 23201),
 ('89.149.183.