In [None]:
%pip install pandas

In [1]:
from datetime import datetime
from collections import defaultdict

import pandas as pd

from ip_as_org import IPASnPrefix, ASOrg

In [None]:
def get_ip_asn_as_org_obj(timestamps):
    dates = [ts.strftime("%Y%m%d") for ts in timestamps]
    dataset_dir = "../dataset/"
    ip_asn = IPASnPrefix(dates, dataset_dir)
    as_org = ASOrg(dates, dataset_dir)
    return ip_asn, as_org


def get_nr_prefixes_from_asn_list(asn_list, ip_asn):
    prefixes_dict = defaultdict(int)
    for asn in asn_list:
        if asn is None:
            continue
        prefixes = ip_asn.get_prefixes_from_asn(asn, when)
        if prefixes is None:
            continue
        for prefix in prefixes:
            if prefix is None:
                continue
            prefix = prefix.split("/")[1]
            prefixes_dict[prefix] += 1
    return prefixes_dict

def get_distr_prefix_len(prefixes):
    prefixes_dict = defaultdict(int)
    for prefix in prefixes:
        if prefix is None:
            continue
        prefix = prefix.split("/")[1]
        prefixes_dict[prefix] += 1
    return prefixes_dict

def get_nr_ips_per_prefix(prefixes_dict):
    nr_ips = 0
    for prefix_len in prefixes_dict.keys():
        e = 32 - int(prefix_len, 10)
        nr_ips += 2 ** e  # if finding the nr of available ips in the subnet, then we need to - 2: exclude network address and default gw
    return nr_ips

ip_asn, as_org = get_ip_asn_as_org_obj([
    datetime(2024, 10, 29),
    datetime(2024, 10, 30),
    datetime(2024, 10, 31),
    datetime(2024, 11, 1),
])

when = "20241029"
org_list = ["Google", "Hetzner", "OVH", "NhanHoa", "Xneelo", "Contabo", "DigitalOcean"]
amazon = ["AWS", "Amazon"]
microsoft = ["Azure", "Microsoft"]
ali = ["Aliyun", "Alibaba"]
# regex to match a ip prefix in sublime: \d+\.\d+\.\d+../\d+

Extract ASes of each IP

In [3]:
ips_pdf = pd.read_csv("output/202411_goscanner_ipv4_netstats_ip2location.csv")

In [None]:
def get_asn_from_ip(ip):
    return ip_asn.get_asn_from_ip(ip, when)

def get_org_from_asn(asn):
    return as_org.get_org_name_from_asn(asn, when)

ips_pdf["asn"] = ips_pdf["ip"].apply(get_asn_from_ip)
ips_aggr_pdf = ips_pdf.drop_duplicates(subset=["ip"]).groupby("asn").size().reset_index(name="count").sort_values(by="count", ascending=False)
ips_aggr_pdf["percent"] = ips_aggr_pdf["count"] / ips_aggr_pdf["count"].sum() * 100

ips_aggr_pdf["org_name"] = ips_aggr_pdf["asn"].apply(get_org_from_asn)

# how many make 80%?
ips_aggr_pdf["cumsum"] = ips_aggr_pdf["percent"].cumsum()
top_80 = len(ips_aggr_pdf[ips_aggr_pdf["cumsum"] <= 80])
ips_aggr_pdf.head(top_80)

Unnamed: 0,asn,count,percent,org_name,cumsum
1625,16276.0,4392,4.789218,OVH SAS,4.789218
2417,24940.0,3418,3.727128,Hetzner Online GmbH,8.516346
1650,16509.0,3162,3.447975,"Amazon.com, Inc.",11.964321
263,3462.0,2241,2.443679,"Chunghwa Telecom Co., Ltd.",14.407999
648,8075.0,2140,2.333544,Microsoft Corporation,16.741544
...,...,...,...,...,...
129,2018.0,10,0.010904,TENET (The UNINET Project),79.955510
2872,29145.0,10,0.010904,Centaur GmbH,79.966414
7864,204887.0,10,0.010904,SCT PARTNER SAS,79.977319
4060,41079.0,10,0.010904,Cyber_Folks S.A.,79.988223


In [5]:
print(top_80, len(ips_aggr_pdf))

1110 9436


Used for ranking ASes

In [46]:
ips_aggr_pdf[["asn", "org_name"]].to_csv("output/202411_asn_org.csv", index=False)

Add ranks

In [47]:
as_rank_pdf = pd.read_csv("output/202411_asn_org_ranked.csv")

In [55]:
ips_pdf.join(
    as_rank_pdf.drop(columns=["date"], axis=1).set_index("asn"),
    on="asn",
    how="left"
).to_csv("output/202411_goscanner_ipv4_netstats_ip2location_asn_rank.csv", index=False)

In [None]:
def get_nr_ips_per_org(org_name):
    asns = as_org.get_asn_from_org_name(org_name, when)
    prefixes_dict = get_nr_prefixes_from_asn_list(asns, ip_asn)
    nr_ips = get_nr_ips_per_prefix(prefixes_dict.keys())
    return nr_ips

In [7]:
ips_aggr_pdf["nr_ips"] = ips_aggr_pdf["org_name"].apply(get_nr_ips_per_org)

In [10]:
ips_aggr_pdf[ips_aggr_pdf["nr_ips"] > 0].sort_values(by="nr_ips", ascending=False)

Unnamed: 0,asn,count,percent,org_name,cumsum,nr_ips
713,8452.0,162,0.176651,TE-AS,42.534840,4194020
3609,36925.0,26,0.028351,MEDITELECOM,69.069636,2095338
1878,19108.0,36,0.039256,Optimum,65.016466,1572584
309,4152.0,1,0.001090,USDA,95.951192,1263346
893,9316.0,1,0.001090,DACOM-PUBNETPLUS,95.422328,1040624
...,...,...,...,...,...,...
1908,19447.0,3,0.003271,ALFANUMERIC,91.407323,254
6810,137584.0,1,0.001090,Prefixnet,99.052407,254
1702,17124.0,3,0.003271,SouthArk,91.387695,254
6620,135330.0,9,0.009814,Adcdata.com,80.693739,254


In [11]:
ips_aggr_pdf

Unnamed: 0,asn,count,percent,org_name,cumsum,nr_ips
1625,16276.0,4392,4.789218,OVH SAS,4.789218,0
2417,24940.0,3418,3.727128,Hetzner Online GmbH,8.516346,0
1650,16509.0,3162,3.447975,"Amazon.com, Inc.",11.964321,0
263,3462.0,2241,2.443679,"Chunghwa Telecom Co., Ltd.",14.407999,0
648,8075.0,2140,2.333544,Microsoft Corporation,16.741544,0
...,...,...,...,...,...,...
5638,56301.0,1,0.001090,National Data Center,99.995638,0
1728,17456.0,1,0.001090,Pacific Data Systems,99.996729,0
1729,17465.0,1,0.001090,Asianet Satellite Communications Pvt Ltd,99.997819,0
5635,56255.0,1,0.001090,PT Transtech Communication Media,99.998910,0


Extract nr of IPs each provider has

In [3]:
org_pdf = pd.read_csv("output/cloud_providers.csv")

In [None]:
org_pdf["nr_ips"] = org_pdf["cloud_provider"].apply(get_nr_ips_per_org)

In [11]:
print("% of not found/no IP address", round(len(org_pdf[org_pdf["nr_ips"] == 0]) / len(org_pdf) * 100, 1))

org_pdf[org_pdf["nr_ips"] > 0].sort_values("nr_ips", ascending=False)

% of not found/no IP address 97.2


Unnamed: 0,cloud_provider,nr_ips
3905,Level,12582850
1756,Internet,4194145
4324,Taiwan,4194082
1002,Beltelecom,1048296
2484,GTT,524228
...,...,...
1295,DataXion,254
1441,Nodisto,254
3509,EboundHost.com,254
1832,Withsystems,254


In [73]:
org_pdf.to_csv("output/cloud_providers_size.csv", index=False)

In [9]:
org_name = "ROU"
asns = as_org.get_asn_from_org_name(org_name, when)
print(asns)
prefixes_dict = get_nr_prefixes_from_asn_list(asns, ip_asn)

as_org.get_org_name_from_asn(35357, when)

[]


'N&TS GROUP NETWORKS & TRANSACTIONAL SYSTEMS GROUP S.P.A.'

Known providers

In [45]:
for org in org_list:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict = get_nr_prefixes_from_asn_list(asns, ip_asn)
    nr_ips = get_nr_ips_per_prefix(prefixes_dict)
    print(org, "has", nr_ips, "IPs")


prefixes_dict = defaultdict(int)
for org in amazon:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict.update(get_nr_prefixes_from_asn_list(asns, ip_asn))
nr_ips = get_nr_ips_per_prefix(prefixes_dict)
print("Amazon + AWS have", nr_ips, "IPs")


prefixes_dict = defaultdict(int)
for org in microsoft:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict.update(get_nr_prefixes_from_asn_list(asns, ip_asn))
nr_ips = get_nr_ips_per_prefix(prefixes_dict)
print("Microsoft + Azure have", nr_ips, "IPs")


prefixes_dict = defaultdict(int)
for org in ali:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict.update(get_nr_prefixes_from_asn_list(asns, ip_asn))
nr_ips = get_nr_ips_per_prefix(prefixes_dict)
print("Aliyun + Alibaba have", nr_ips, "IPs")

Google has 4194020 IPs
Hetzner has 257774 IPs
OVH has 261868 IPs
NhanHoa has 1786 IPs
Xneelo has 65264 IPs
Contabo has 32498 IPs
DigitalOcean has 65264 IPs
Amazon + AWS have 4194020 IPs
Microsoft + Azure have 8388322 IPs
Aliyun + Alibaba have 1048295 IPs


In [54]:
google_cloud_prefixes = pd.read_json("cloud-ip-prefixes/ip_prefixes/google/cloud.json")
ipv4_prefixes = google_cloud_prefixes["prefixes"].apply(lambda x: x.get("ipv4Prefix"))
prefixes_dict = get_distr_prefix_len(ipv4_prefixes.to_list())
nr_ips = get_nr_ips_per_prefix(prefixes_dict)
print("Google Cloud has", nr_ips, "IPs")

Google Cloud has 524010 IPs


In [35]:
for org in org_list:
    print(org)
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict = get_nr_prefixes_from_asn_list(asns, ip_asn)
    display(prefixes_dict)

Google


defaultdict(int,
            {'19': 132,
             '21': 99,
             '24': 1224,
             '18': 44,
             '16': 91,
             '17': 72,
             '22': 65,
             '20': 2031,
             '14': 31,
             '23': 58,
             '13': 10,
             '15': 9,
             '12': 5,
             '11': 3})

Hetzner


defaultdict(int,
            {'16': 31,
             '24': 169,
             '18': 2,
             '23': 9,
             '22': 12,
             '17': 6,
             '19': 1,
             '15': 1,
             '21': 2})

OVH


defaultdict(int,
            {'17': 36,
             '16': 40,
             '24': 31,
             '19': 7,
             '15': 2,
             '18': 6,
             '23': 3,
             '21': 7,
             '22': 4,
             '20': 3})

NhanHoa


defaultdict(int, {'24': 20, '22': 3, '23': 2})

Xneelo


defaultdict(int,
            {'21': 4,
             '24': 18,
             '20': 6,
             '22': 9,
             '23': 3,
             '17': 2,
             '19': 2,
             '18': 1})

Contabo


defaultdict(int,
            {'20': 18,
             '22': 27,
             '21': 34,
             '24': 155,
             '23': 307,
             '18': 1,
             '19': 5})

DigitalOcean


defaultdict(int,
            {'24': 51,
             '22': 167,
             '20': 458,
             '23': 6,
             '18': 21,
             '19': 23,
             '21': 18,
             '17': 4})

In [37]:
print("Amazon + AWS")
prefixes_dict = defaultdict(int)
for org in amazon:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict.update(get_nr_prefixes_from_asn_list(asns, ip_asn))
display(prefixes_dict)

print("Microsoft + Azure")
prefixes_dict = defaultdict(int)
for org in microsoft:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict.update(get_nr_prefixes_from_asn_list(asns, ip_asn))
display(prefixes_dict)

print("Aliyun + Alibaba")
prefixes_dict = defaultdict(int)
for org in ali:
    asns = as_org.get_asn_from_org_name(org, when)
    prefixes_dict.update(get_nr_prefixes_from_asn_list(asns, ip_asn))
display(prefixes_dict)

Amazon + AWS


defaultdict(int,
            {'20': 158,
             '19': 59,
             '22': 784,
             '24': 5655,
             '23': 837,
             '21': 593,
             '16': 116,
             '15': 107,
             '17': 90,
             '18': 52,
             '14': 43,
             '12': 8,
             '13': 13,
             '11': 2})

Microsoft + Azure


defaultdict(int,
            {'24': 417,
             '23': 47,
             '20': 14,
             '22': 26,
             '21': 15,
             '16': 58,
             '19': 13,
             '15': 26,
             '12': 11,
             '17': 16,
             '11': 5,
             '18': 10,
             '14': 21,
             '10': 2,
             '13': 11})

Aliyun + Alibaba


defaultdict(int,
            {'24': 463,
             '23': 124,
             '32': 5,
             '22': 83,
             '20': 52,
             '16': 99,
             '17': 134,
             '14': 13,
             '21': 63,
             '15': 33,
             '18': 91,
             '19': 75,
             '13': 1})

# TESTING

In [None]:
# pyasn
p = ip_asn.get_prefixes_from_asn(24940, when="20241029")
print(get_nr_ips_per_prefix(get_distr_prefix_len(p)))
print(len(p), p)

257792
79 {'185.157.178.0/23', '49.13.0.0/16', '193.25.170.0/23', '185.126.28.0/22', '194.62.106.0/24', '159.69.0.0/16', '91.107.128.0/17', '193.163.198.0/24', '185.189.228.0/24', '142.132.128.0/17', '167.235.0.0/16', '213.239.192.0/18', '138.199.128.0/17', '216.55.108.0/22', '116.202.0.0/16', '185.213.45.0/24', '45.145.227.0/24', '195.248.224.0/24', '168.119.0.0/16', '185.157.176.0/23', '91.190.240.0/21', '195.60.226.0/24', '185.226.99.0/24', '195.201.0.0/16', '23.88.0.0/17', '157.180.0.0/17', '185.228.8.0/23', '185.242.76.0/24', '176.9.0.0/16', '188.40.0.0/16', '185.189.230.0/24', '185.107.52.0/22', '201.131.3.0/24', '94.130.0.0/16', '185.50.120.0/23', '213.133.96.0/19', '65.109.0.0/16', '185.189.229.0/24', '188.245.0.0/16', '65.108.0.0/16', '91.233.8.0/22', '65.21.0.0/16', '157.90.0.0/16', '185.157.83.0/24', '128.140.0.0/17', '148.251.0.0/16', '136.243.0.0/16', '204.29.146.0/24', '185.253.111.0/24', '185.189.231.0/24', '171.25.225.0/24', '78.46.0.0/15', '116.203.0.0/16', '188.34.128

In [None]:
# bgp.he.net (off by one and from a different time!)
p2 =["5.9.0.0/16", "5.75.128.0/17", "23.88.0.0/17", "37.27.0.0/16", "45.145.227.0/24", "46.4.0.0/16", "49.12.0.0/16", "49.13.0.0/16", "65.21.0.0/16", "65.108.0.0/16", "65.109.0.0/16", "78.46.0.0/15", "78.138.62.0/24", "85.10.192.0/18", "88.99.0.0/16", "88.198.0.0/16", "91.107.128.0/17", "91.190.240.0/21", "91.233.8.0/22", "94.130.0.0/16", "95.216.0.0/16", "95.217.0.0/16", "116.202.0.0/16", "116.203.0.0/16", "128.140.0.0/17", "135.181.0.0/16", "136.243.0.0/16", "138.199.128.0/17", "138.201.0.0/16", "142.132.128.0/17", "144.76.0.0/16", "148.251.0.0/16", "157.90.0.0/16", "157.180.0.0/17", "159.69.0.0/16", "162.55.0.0/16", "167.233.0.0/16", "167.235.0.0/16", "168.119.0.0/16", "171.25.225.0/24", "176.9.0.0/16", "178.63.0.0/16", "178.212.75.0/24", "185.50.120.0/23", "185.107.52.0/22", "185.126.28.0/22", "185.157.83.0/24", "185.157.176.0/23", "185.157.178.0/23", "185.171.224.0/22", "185.189.228.0/24", "185.189.229.0/24", "185.189.230.0/24", "185.189.231.0/24", "185.213.45.0/24", "185.216.237.0/24", "185.226.99.0/24", "185.228.8.0/23", "185.253.111.0/24", "188.34.128.0/17", "188.40.0.0/16", "188.245.0.0/16", "193.25.170.0/23", "193.110.6.0/23", "193.163.198.0/24", "194.42.180.0/22", "194.42.184.0/22", "194.62.106.0/24", "195.60.226.0/24", "195.201.0.0/16", "195.248.224.0/24", "197.242.84.0/22", "201.131.3.0/24", "204.29.146.0/24", "213.133.96.0/19", "213.232.193.0/24", "213.239.192.0/18", "216.55.108.0/22"]
print(get_nr_ips_per_prefix(get_distr_prefix_len(p2)))
print(len(p2), p2)

257792
78 ['5.9.0.0/16', '5.75.128.0/17', '23.88.0.0/17', '37.27.0.0/16', '45.145.227.0/24', '46.4.0.0/16', '49.12.0.0/16', '49.13.0.0/16', '65.21.0.0/16', '65.108.0.0/16', '65.109.0.0/16', '78.46.0.0/15', '78.138.62.0/24', '85.10.192.0/18', '88.99.0.0/16', '88.198.0.0/16', '91.107.128.0/17', '91.190.240.0/21', '91.233.8.0/22', '94.130.0.0/16', '95.216.0.0/16', '95.217.0.0/16', '116.202.0.0/16', '116.203.0.0/16', '128.140.0.0/17', '135.181.0.0/16', '136.243.0.0/16', '138.199.128.0/17', '138.201.0.0/16', '142.132.128.0/17', '144.76.0.0/16', '148.251.0.0/16', '157.90.0.0/16', '157.180.0.0/17', '159.69.0.0/16', '162.55.0.0/16', '167.233.0.0/16', '167.235.0.0/16', '168.119.0.0/16', '171.25.225.0/24', '176.9.0.0/16', '178.63.0.0/16', '178.212.75.0/24', '185.50.120.0/23', '185.107.52.0/22', '185.126.28.0/22', '185.157.83.0/24', '185.157.176.0/23', '185.157.178.0/23', '185.171.224.0/22', '185.189.228.0/24', '185.189.229.0/24', '185.189.230.0/24', '185.189.231.0/24', '185.213.45.0/24', '185.21