## Experiment: End2End random DNS subdomains DDOS detection
Get the Data if needed

In [None]:
from utils.data import download_url
download_url(url='https://ndownloader.figshare.com/files/17905811', target_folder='./data/', filename='dns-requests.csv.gz')

Simulate baseline creation in peacetime and attack on github.com domain

In [None]:
import random
from algorithm import DistinctHeavyHitters, HeavyHitters
from smart_open import open
import uuid
dwsHH_baseline = DistinctHeavyHitters(k=10000)
dwsHH_detector = DistinctHeavyHitters(k=10000)

kHH = HeavyHitters(k=10000)
first_run = True
num_line_peacetime = 85000000
counter = 0

def is_peacetime():
    return counter < num_line_peacetime

white_list_subdomains = None
white_list_domains = None
top_domains_num = 20
confidence_threshold = 0.75
victims = ['github.com']
max_subdomains_per_attack = 1

for line in open('data/dns-requests.csv.gz'):
    if first_run:
        first_run = False
        continue
    values = line.rstrip().split(',')
    counter += 1
    if(len(values)==2):
        subdomain, domain = values
        if is_peacetime():
            dwsHH_baseline.update(subdomain, domain)
            kHH.update(subdomain)
        else:
            if white_list_subdomains is None:
                white_list_subdomains = kHH.get()
                white_list_domains = []
                top_domains_list = sorted(dwsHH_baseline.count(), key=lambda x: x[1], reverse=True)
                for top_domain_index in range(top_domains_num):
                    top_domain = top_domains_list[top_domain_index]
                    if top_domain[2] > confidence_threshold:
                        white_list_domains.append(top_domain[0])   
            dns_stream = []
            # choose to inject subdomains random attack on selected domains or not
            if random.uniform(0, 1) < 0.2:
                # inject attack
                # choose attacked domain at random
                domain_index = random.randint(0,len(victims)-1)
                # choose how many generated subdomains for this attack
                dns_stream.append((str(uuid.uuid4()), victims[domain_index]))
            # append the original dns request
            dns_stream.append((subdomain, domain))
            for (subdomain, domain) in dns_stream:  
                if subdomain in white_list_subdomains:
                    continue
                if domain in white_list_domains:
                    continue
                dwsHH_detector.update(subdomain, domain)

In [19]:
for i in range(20):
    print(white_list_subdomains[i])

vip-skyrockcom-lbs
www
username
blogspot.l
q50
vip-skyrockmobi-lbs
clb
pages-wildcard
dq33tynpwunh
www3.l
e8e03d8f-fce6-47db-a48a-1982558191aa
http
nb-69-164-223-52.newark.nodebalancer
urlforward
mx2
p2620
a-us00
bitrixssd
s
web


In [4]:
white_list_domains

['blogspot.com',
 'wordpress.com',
 'skyrock.com',
 'home.blog',
 'herokuapp.com',
 'deviantart.com',
 'fc2.com',
 'livejournal.com',
 'weebly.com',
 'filetransit.com',
 'lofter.com',
 'wixsite.com',
 'soft112.com',
 'list-manage.com',
 'amazonaws.com',
 'jimdo.com',
 'wix.com',
 'azurewebsites.net',
 'uptodown.com',
 'blogspot.ca']

In [10]:
github_baseline = 1
for domain in dwsHH_baseline.count():
    if domain[0] == 'github.com':
        github_baseline = domain[1]
print("Baseline for github.com: %d (how many distinct subdomain in peacetime)" % github_baseline)

baseline for github.com: 21354 (how many distinct subdomain in peacetime)


In [14]:
top_domain_in_attack_time = sorted(dwsHH_detector.count(), key=lambda x: x[1], reverse=True)
for i in range(20):
    domain = top_domain_in_attack_time[i]
    print(domain[0],domain[1])

github.com 16243296.342877353
myshopify.com 384536.65077588946
cloudflare.net 227763.7876032608
food.blog 137231.1965133412
video.blog 117915.23354495593
altervista.org 107703.70762908598
art.blog 106280.71926067249
windows.net 101418.59947917162
googlehosted.com 92433.17745677632
bokhra.com 86927.75309414702
bigcartel.com 78017.22972080091
vigbo.com 70129.20200206093
cloudfront.net 69248.63179016161
bjbqz.com 62986.591890642936
fashion.blog 62423.808337435396
163.com 57673.533326227516
school.blog 55786.85505379681
gwstest.net 54586.5221201435
wdstq.com 54584.348984069635
herokudns.com 54541.16363671564


In [13]:
github_attacktime= 0
for domain in dwsHH_detector.count():
    if domain[0] == 'github.com':
        github_attacktime = domain[1]
github_ratio = github_attacktime / github_baseline
print("Ratio between github in attack time to baseline: %f" % (github_attacktime / github_baseline))

Ratio between github in attack time to baseline: 760.640724


In [16]:
# for compression lets check the second score  
myshopify_baseline = 1
for domain in dwsHH_baseline.count():
    if domain[0] == 'myshopify.com':
        myshopify_baseline = domain[1]
print("Baseline for myshopify.com: %d (how many distinct subdomain in peacetime)" % myshopify_baseline)

Baseline for myshopify.com: 3710 (how many distinct subdomain in peacetime)


In [20]:
myshopify_attacktime= 0
for domain in dwsHH_detector.count():
    if domain[0] == 'myshopify.com':
        myshopify_attacktime = domain[1]
myshopify_ratio = myshopify_attacktime / myshopify_baseline
print("Ratio between myshopify in attack time to baseline: %f" % (myshopify_attacktime / myshopify_baseline))

Ratio between myshopify in attack time to baseline: 103.648583
