In [25]:
import cymruwhois
import pandas as pd
import pickle
from datetime import datetime, timedelta

def get_ips():
    with open('affected_ips.txt', 'r') as r_file:
        return [ip.strip().rstrip('\n').split(':')[0] for ip in r_file.readlines()]

def cymru_lookup(ips):
    cymru_client = cymruwhois.Client()
    return cymru_client.lookupmany_dict(ips)

def save_to_disk(data, filename="enrichment.pkl"):
    with open(filename, "wb") as file:
        pickle.dump(data, file)
        print(f"Dataset saved to {filename}.")


In [26]:
ips = get_ips()
ips[:10]

['63.143.98.183',
 '63.143.101.78',
 '69.160.105.242',
 '173.225.247.213',
 '207.204.113.145',
 '69.160.106.62',
 '208.131.188.18',
 '72.27.4.3',
 '212.12.162.17',
 '196.206.217.71']

In [None]:
cymru_enriched = cymru_lookup(ips)

In [28]:
cymru_enriched_as_list = []

start_time = datetime.now()
for (ip, record) in cymru_enriched.items():
    cymru_enriched_as_list.append({
        'asn': record.asn,
        'ip': record.ip,
        'subnet': record.prefix,
        'owner': record.owner.split(',')[0],
        'country': record.cc
    })
    current_time = datetime.now() 
    if current_time - timedelta(seconds=30) > start_time:
        save_to_disk(cymru_enriched_as_list)
        start_time = current_time 

In [29]:
df = pd.DataFrame(cymru_enriched_as_list)

In [30]:
df

Unnamed: 0,asn,ip,subnet,owner,country
0,9534,121.120.36.132,121.120.0.0/16,MAXIS-AS1-AP Binariang Berhad,MY
1,45758,49.49.140.188,49.49.128.0/17,TTBP-AS-AP Triple T Broadband Public Company L...,TH
2,57332,5.206.244.130,5.206.240.0/20,TOM-NET-AS,PL
3,14754,190.62.80.18,190.62.64.0/18,TELECOMUNICACIONES DE GUATEMALA,SV
4,5617,83.13.153.3,83.8.0.0/13,TPNET,PL
...,...,...,...,...,...
15464,9268,43.247.64.70,43.247.64.0/22,OVERTHEWIRE-AS-AP Over The Wire Pty Ltd,AU
15465,3352,83.37.155.57,83.37.0.0/16,TELEFONICA_DE_ESPANA,ES
15466,5384,217.165.14.176,217.165.0.0/18,EMIRATES-INTERNET Emirates Internet,AE
15467,5384,83.110.148.77,83.110.128.0/18,EMIRATES-INTERNET Emirates Internet,AE


In [31]:
df.to_csv('affected_ips_enriched.csv')

### Top Observed ASNs/Owners

In [35]:
df.groupby(['asn', 'owner']).size().sort_values(ascending=False)

asn     owner                                      
5384    EMIRATES-INTERNET Emirates Internet            887
4788    TTSSB-MY TM TECHNOLOGY SERVICES SDN. BHD.      535
8151    UNINET                                         470
6400    Compania Dominicana de Telefonos S. A.         423
262916  Mega Cable                                     304
                                                      ... 
266206  CABONNET INTERNET LTDA                           1
266087  Orbitel Telecomunicacoes e Informatica Ltda      1
266084  F DOS S BEZERRA PONTES - ME                      1
266052  Interviva Telecom e Servicos LTDA ME             1
28431   RAUL DUARTE URITA                                1
Length: 2031, dtype: int64

### Top Observed Subnets/Owners

In [39]:
df.groupby(['subnet', 'owner']).size().sort_values(ascending=False)


subnet           owner                                       
2.50.128.0/18    EMIRATES-INTERNET Emirates Internet             91
217.165.0.0/18   EMIRATES-INTERNET Emirates Internet             72
50.128.0.0/9     COMCAST-7922                                    72
190.167.0.0/16   Compania Dominicana de Telefonos S. A.          63
190.166.0.0/16   Compania Dominicana de Telefonos S. A.          59
                                                                 ..
187.86.156.0/23  VETORIALNET INF. E SERVICOS DE INTERNET LTDA     1
187.86.140.0/23  VETORIALNET INF. E SERVICOS DE INTERNET LTDA     1
187.86.132.0/24  VETORIALNET INF. E SERVICOS DE INTERNET LTDA     1
187.86.132.0/23  VETORIALNET INF. E SERVICOS DE INTERNET LTDA     1
197.248.24.0/24  Safaricom                                        1
Length: 8224, dtype: int64