In [121]:
import time
import datetime
import numpy as np

# BFS using Customer > Peer > Provider order for trial() function

class Internet:
    def __init__(self, last_updated_time, AS_set, graph_component_c = None, graph_component_peer = None, graph_component_p = None):
        if(isinstance(last_updated_time, datetime.datetime)):
            self._time = last_updated_time
        else:
            print("Error: Internet: invalid last_updated_time passed")
        self.AS_set = np.array([], dtype = object)
        self.AS_numbers = self.set_AS_set(AS_set)
        if(graph_component_c):
            self.set_neighbors("customer", graph_component_c)
        if(graph_component_peer):
            self.set_neighbors("peer", graph_component_peer)
        if(graph_component_p):
            self.set_neighbors("provider", graph_component_p)
                    
    def set_neighbors(self, neighbor_type, graph_component):
        for key in graph_component:
            flag = True
            if key not in self.AS_numbers:
                flag = False
                print('Error: Internet: graph component does not match the AS numbers passed in ', key, neighbor_type)
        if flag:
            for key in graph_component:
                tmp = self.get_AS_by_number(key)
                for neighbors in graph_component[key]:
                    neighbor_tmp = self.get_AS_by_number(neighbors)
                    # one direction, can change to two direction
                    if neighbor_type == "customer":
                        tmp.set_neighbors_customer(np.array([neighbor_tmp], dtype = object))
                    elif neighbor_type == "peer":
                        tmp.set_neighbors_peer(np.array([neighbor_tmp], dtype = object))
                    elif neighbor_type == "provider":
                        tmp.set_neighbors_provider(np.array([neighbor_tmp], dtype = object))
                    else:
                        raise Exception("neighbor type wrong")
                    
    def set_AS_set(self, AS_set):
        AS_number = []
        for AS in AS_set:
            if AS.number not in AS_number:
                self.AS_set = np.append(self.AS_set, AS)
                AS_number.append(AS.number)
            else:
                print('Error: Internet: redundent AS number ', AS.number)
        return AS_number
    
    def get_AS_by_number(self, as_number):
        for AS in self.AS_set:
            if AS.number == as_number:
                return AS
        print("AS not in as set")
        return None
    
    def add_connection(self, AS1, AS2, neighbor_type):
        if AS1.number not in self.AS_numbers:
            self.AS_numbers.append(AS1.number)
            self.AS_set = np.append(self.AS_set, AS1)
        if AS2.number not in self.AS_numbers:
            self.AS_numbers.append(AS2.number)
            self.AS_set = np.append(self.AS_set, AS2)
        # one direction, can change to two direction by add following
        if neighbor_type == "customer":
            AS1.set_neighbors_customer(np.array([AS2], dtype = object))
        elif neighbor_type == "peer":
            AS1.set_neighbors_peer(np.array([AS2], dtype = object))
        elif neighbor_type == "provider":
            AS1.set_neighbors_provider(np.array([AS2], dtype = object))
        else:
            raise Exception("neighbor type wrong")
        
    def trial(self, AS1, AS2):
        queue = np.array([AS1],dtype = object)
        trial = {}
        visited = {AS1.number: 1}
        while queue.size > 0:
            current = queue[-1]
            queue = queue[:-1]
            if current.number == AS2.number:
                result = [AS2.number]
                while AS2.number in trial:
                    AS2.number = trial[AS2.number]
                    result.insert(0, AS2.number)
                return result
            for neighbor in current.neighbors_provider:
                if neighbor.number not in visited:
                    trial[neighbor.number] = current.number
                    queue = np.append(neighbor, queue)
                    visited[neighbor.number] = 1
            for neighbor in current.neighbors_peer:
                if neighbor.number not in visited:
                    trial[neighbor.number] = current.number
                    queue = np.append(neighbor, queue)
                    visited[neighbor.number] = 1
            for neighbor in current.neighbors_customer:
                if neighbor.number not in visited:
                    trial[neighbor.number] = current.number
                    queue = np.append(neighbor, queue)
                    visited[neighbor.number] = 1
        return []
    def distance(self, AS1, AS2):
        len_trial = len(self.trial(AS1, AS2))
        if len_trial > 1:
            return len_trial - 1;
        print("The input ASes are not connected, or inputs are the same")
        return 0
    
    def get_edges(self, neighbor_type = None):
        edgename = []
        for AS in self.AS_set:
            if neighbor_type == "customer":
                for neighbor in AS.neighbors_customer:
                    if {AS.number, neighbor.number} not in edgename:
                        edgename.append((AS.number, neighbor.number))
            elif neighbor_type == "peer":
                for neighbor in AS.neighbors_peer:
                    if {AS.number, neighbor.number} not in edgename:
                        edgename.append((AS.number, neighbor.number))
            elif neighbor_type == "provider":
                for neighbor in AS.neighbors_provider:
                    if {AS.number, neighbor.number} not in edgename:
                        edgename.append((AS.number, neighbor.number))
            elif neighbor_type == None:
                for neighbor in AS.neighbors_customer:
                    if {AS.number, neighbor.number} not in edgename:
                        edgename.append((AS.number, neighbor.number))
                for neighbor in AS.neighbors_peer:
                    if {AS.number, neighbor.number} not in edgename:
                        edgename.append((AS.number, neighbor.number))
                for neighbor in AS.neighbors_provider:
                    if {AS.number, neighbor.number} not in edgename:
                        edgename.append((AS.number, neighbor.number))
        return edgename
    def get_vertices(self):
        vtx = []
        for AS in self.AS_set:
            if AS.number not in self.AS_set:
                vtx.append(AS.number)
        return vtx  
    def get_prefix_set(self):
        prefixes = np.array([], dtype = object)
        for AS in self.AS_set:
            for routers in AS.routers:
                for prefix in routers.prefix_set:
                    if prefix not in prefixes:
                        prefixes = np.append(prefix, prefixes)
        return prefixes
    def build_structure(self, parsedObj):
        if(parsedObj.label == "shodan"):
            return self.build_structure_shodan(parsedObj)
    
    def build_structure_shodan(self, parsedObj):
        host = Host(None, None, parsedObj.data["ip_addr"], parsedObj.timestamp)
        self.AS_set[0].routers[0].prefix_set[0].append_host(host)
        
    def count_hosts(self):
        prefixes = self.get_prefix_set()
        count = 0
        for prefix in prefixes:
            for host in prefix.hosts:
                count += 1
        return count

In [107]:
import numpy as np
geo_location_set = ['Asia', 'US', 'Europe', 'Australia', 'Africa','SouthernAmerica']
as_type_set = ['enterprise', 'private', 'non-profit']

# keep three sets of neighbors, Customer, Peer, Provider

class AS:
    def __init__(self, as_type=None, geo_location=None, routers = None, number=None, customers=None, peers=None, providers=None):
        if as_type in as_type_set:
            self.as_type = as_type
        else:
            print("Error: in AS: as_type not match")
        if geo_location in geo_location_set:
            self.geo_location = geo_location
        else:
            print("Error: in AS: geo_location not match")
        self.number = number
        self.routers = np.array([], dtype = object)
        self.neighbors_customer = np.array([], dtype = object)
        self.neighbors_peer = np.array([], dtype = object)
        self.neighbors_provider = np.array([], dtype = object)
        self.set_routers_set(routers)
        if customers != None:
            self.set_neighbors_customer(customers)
        if peers != None:
            self.set_neighbors_peer(peers)
        if providers != None:
            self.set_neighbors_provider(providers)
    def set_routers_set(self, routers):
        for router in routers:
            self.routers = np.append(self.routers, router)
    def set_neighbors_customer(self, customers):
        for customer in customers:
            if isinstance(customer, AS):
                if customer.number != self.number and customer not in self.neighbors_customer:
                    self.neighbors_customer = np.append(self.neighbors_customer, customer)
            else:
               print("customer type not correct")
    def set_neighbors_peer(self, peers):
        for peer in peers:
            if isinstance(peer, AS):
                if peer.number != self.number and peer not in self.neighbors_peer:
                    self.neighbors_peer = np.append(self.neighbors_peer, peer)
            else:
               print("peer type not correct")
    def set_neighbors_provider(self, providers):
        for provider in providers:
            if isinstance(provider, AS):
                if provider.number != self.number and provider not in self.neighbors_provider:
                    self.neighbors_provider = np.append(self.neighbors_provider, provider)
            else:
               print("provider type not correct")

In [114]:
import numpy as np
import re
regex_IPprefix = re.compile('^([0-9]{1,3}\.){3}[0-9]{1,3}(\/([0-9]|[1-2][0-9]|3[0-2]))?$')

# @to do: when set the hosts, check if host IP address match the prefix after mask
class Prefix:
    def __init__(self, prefix=None, hosts=None, mask = None):
        self.hosts = np.array([], dtype = object)
        self.prefix = prefix
        if(hosts != None):
            self.set_hosts(hosts, prefix)
    def set_hosts(self, hosts, prefix):
        for host in hosts:
            self.hosts = np.append(self.hosts, host)
    def contains(self, IP_address):
        for i in range(len(self.hosts)):
            if (self.hosts[i].IP == IP_address):
                return True
        return False
    def subnet_size(self):
        return len(self.hosts)
    
    def append_host(self, host):
        self.hosts = np.append(self.hosts, host)

In [115]:
import numpy as np
import re
regex_IPprefix = re.compile('^([0-9]{1,3}\.){3}[0-9]{1,3}(\/([0-9]|[1-2][0-9]|3[0-2]))?$')

class Routers:
    def __init__(self, prefix_set=None):
        self.prefix_set = np.array([], dtype = object)
        if(prefix_set != None):
            self.set_prefix_set(prefix_set)
    def set_prefix_set(self, prefix_set):
        for prefix in prefix_set:
            if(prefix.prefix == None):
                print("Error: Routers: router prefix is None, but still making prefix set")
                self.prefix_set = np.append(self.prefix_set, prefix)
                break
            if(regex_IPprefix.match(prefix.prefix)):
                self.prefix_set = np.append(self.prefix_set, prefix)
            else:
                print("Error: Routers: router prefix does not fit IPV4 regex")

In [110]:
import numpy as np
import re
regex_DNS = re.compile('^(?![0-9]+$)(?!-)[a-zA-Z0-9-]{,63}(?<!-)$')
regex_IP = re.compile(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')

class Host:
    def __init__(self, SSL_version=None, DNS_name=None, IP=None, time_stamp = None):
        if(DNS_name != None):
            self.set_DNS_name(DNS_name)
        if(SSL_version != None):
            self.set_SSL_version(SSL_version)
        if(IP != None):
            self.set_IP(IP)
        self.timestamp = time_stamp
    def set_IP(self, IP):
        if(regex_IP.match(IP)):
            self.IP = IP
        else:
            print("Error: Hosts: IP type does not match IPV4")
    def set_DNS_name(self, DNS_name):
        if(regex_DNS.match(DNS_name)):
            self.DNS_name = DNS_name
        else:
            print("Error: Hosts: DNS_name type does not match")
    def set_SSL_version(self, SSL_version):
        if(isinstance(SSL_version, float)):
            self.SSL_version = SSL_version
        else:
            print("Error: Hosts: SSL_version type does not match")

In [112]:
# this function is used to test the functionality of the data structure
# run main.py will do the test
# host = Host(3.5, 'abc', '200.4.5.7')
# AS_list = np.array([], dtype = object)
# Host_list = np.array([], dtype = object)
# for i in range(10):
#     Host_list = np.append(Host_list ,Host(3 + i/10, 'abc', '200.4.5.7'))

# a = Prefix('200.4.0.1/24',Host_list)

# a = Prefix('200.4.0.1/24',Host_list)
# a.contains('200.4.5.7')

# prefix_list = np.array([], dtype = object)
# for i in range(5):
#     prefix_list = np.append(prefix_list, Prefix('200.4.0.1/24',Host_list))

# b = Routers(prefix_list).prefix_set[0].hosts[0].IP

# router_list = np.array([], dtype = object)
# for i in range(6):
#     router_list = np.append(router_list, Routers(prefix_list))

# as_system1 = AS('enterprise','US',router_list, 1,)
# as_system2 = AS('private','Asia',router_list, 2,)
# as_system3 = AS('non-profit','Europe',router_list, 3,)
# as_system4 = AS('non-profit','Europe',router_list, 4,)
# as_list = np.array([as_system1, as_system2, as_system3, as_system4], dtype = object)
# graph_component = {1:[2,3],
#                    3:[1, 2]}

# graph_component2 = {2:[1],
#                     3:[4]}

# graph_component3 = {1:[4]}
# a = Internet(datetime.datetime.now(), as_list, graph_component, graph_component2, graph_component3)
# a.add_connection(as_system2, as_system4,  "customer")
# print(as_system1.routers)
# print(as_system3.neighbors_peer)
# as_system2.geo_location = "US"
# a.add_connection(as_system4, as_system3, "peer")
# a.get_edges()
# a.trial(as_system4, as_system1)
# print(as_system1.neighbors_peer)

In [73]:
print(as_system3.neighbors_peer)

[<__main__.AS object at 0x1167ca5d0>]


In [74]:
as_system2.geo_location = "US"

In [75]:
a.add_connection(as_system4, as_system3, "peer")
a.get_edges()

[(4, 2), (4, 3), (4, 4), (2, 4), (2, 4), (3, 4), (3, 2), (3, 4), (4, 3)]

In [76]:
a.trial(as_system4, as_system1)

[4]

In [77]:
class ParsedObj:

    def __init__(self, timestamp, label, data):
        # Example attributes

        # timestamp is a datatime object
        self.timestamp = timestamp

        self.data = data
        self.label = label

In [78]:
# input is an object with timestamp, data, label
# make a skeleton object from the input
def build_structure(parsedObj):
    if(parsedObj.label == "shodan"):
        return build_structure_shodan(parsedObj)

In [79]:
# this function will only return a host for the 
def build_structure_shodan(parsedObj):
    host = Host(None, None, parsedObj.data["ip_addr"], parsedObj.timestamp)
    return host

In [80]:
parsedObj = ParsedObj("fake_time", "shodan", {"ip_addr":'200.4.5.7'})

In [81]:
Host_list = np.array([], dtype = object)

In [82]:
a = build_structure(parsedObj)
a

<__main__.Host at 0x11678c710>

In [101]:
#build an empty Internet object with everything empty with only one host with everything None
def build_sleleton_Internet():
    host = Host(None, None, None, None)
    Host_list = np.array([host], dtype = object)
    prefix = Prefix(None,Host_list)
    Prefix_list = np.array([prefix], dtype = object)
    router = Routers(Prefix_list)
    Router_list = np.array([router], dtype = object)
    as_system = AS(None,None,Router_list, None,)
    AS_list = np.array([as_system], dtype = object)
    internet = Internet(datetime.datetime.now(), AS_list,)
    return internet

In [124]:
skeleton = build_sleleton_Internet()
skeleton.build_structure(parsedObj)
skeleton.count_hosts()

Error: Routers: router prefix is None, but still making prefix set
Error: in AS: as_type not match
Error: in AS: geo_location not match


2