In [None]:
import yaml
import sys
from pprint import pprint

cfg = yaml.load(open('config.yml'))
pprint(cfg)

sys.path.append(cfg['dedos_module_dir'])

In [None]:
from dedos_analytic.database.db_api import DbApi, DedosEncoder
db = DbApi(cfg)

In [None]:
msus = db.get_items('msus')
pprint(msus)

In [None]:
from dedos_analytic.database.event_reader import read_traffic_events
traffic = read_traffic_events(cfg)
print(traffic)

In [None]:
import dedos_analytic.engine.data_manipulation as ddm

sampled_msus = ddm.sample_msus_by_type(msus)
#df = db.get_msus_epoch_df(sampled_msus)
df = db.get_msus_epoch_df(msus)

In [None]:
import dedos_analytic.engine.feature_engineering as dfe

rate_df = dfe.make_rate_df(df)
type_df = ddm.average_within_type_epoch(rate_df)
labeled_df = ddm.label_with_traffic(type_df, traffic)

In [None]:
TRAFFIC_MSUS = dict(
    tls_reneg = 'read',
    redos = 'regex',
    slowloris = 'http'    
)

separated = ddm.separate_msu_type_per_traffic_type(labeled_df, TRAFFIC_MSUS)

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt

def plot_metric(df, metric, traffic):
    plt.plot(df.epoch, df[metric], '.', label=metric)
    plt.title(traffic)
    plt.legend()
    plt.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off') # labels along the bottom edge are off
    
    first_attack = min(df[df.traffic == traffic].epoch)
    plt.axvline(x=first_attack, color='r')


In [None]:
TRAFFIC_METRICS = dict(
    tls_reneg = 'MSU_USER_TIME',
    redos = 'MSU_USER_TIME',
    slowloris = 'MEMORY_ALLOCATED'
)

def plot_traffic_types(dfs):
    fig, ax = plt.subplots(len(dfs), 1, figsize=(8, 6))
    for i, msu_df in enumerate(dfs):
        plt.sca(ax[i])
        traffic_type = ddm.traffic_type(msu_df)
        plot_metric(msu_df, TRAFFIC_METRICS[traffic_type], traffic_type)
        
plot_traffic_types(separated)

In [None]:
import pandas as pd

PLOTTED_STAT_TYPES = (
 "QUEUE_LEN", "ERROR_COUNT", "NUM_STATES",
 "MSU_USER_TIME", "MSU_MINOR_FAULTS", 
 "MSU_INVOL_CTX_SW",
)  

def plot_metric_matrix(df, metrics=PLOTTED_STAT_TYPES, edgecolor='k', markersize=5):
    fig, ax = plt.subplots(len(metrics), len(metrics), figsize=(8,8))
    traffics = pd.unique(df.traffic.sort_values(ascending=False))
    for j, m1 in enumerate(metrics):
        for i, m2 in enumerate(metrics):
            for traffic in traffics:
                group = df[df.traffic == traffic]
                if i != j:
                    ax[i][j].plot(group[m1], group[m2], 'o', markeredgecolor=edgecolor, 
                                  markersize=markersize,label=traffic)
                else:
                    ax[i][j].hist(group[m1], 25, label=traffic)
            ax[i][j].ticklabel_format(style='sci', axis='both', scilimits=(2,2))
            if i == len(metrics)-1:
                ax[i][j].set_xlabel(m1, fontsize=8)
            else:
                ax[i][j].set_xticks([])
            if j == 0:
                ax[i][j].set_ylabel(m2, fontsize=8)
            elif j != 1 or i != 0:
                ax[i][j].set_yticks([])
                    
            if i == 0 and j == 0:
                ax[i][j].legend()
                
    plt.savefig("metrics.png")
      
            
plot_metric_matrix(labeled_df, PLOTTED_STAT_TYPES)

In [None]:
from mpl_toolkits.mplot3d import Axes3D

def plot_metrics_3d(df, m1, m2, m3, title=None):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    traffics = pd.unique(df.traffic.sort_values(ascending=False))
    for traffic in traffics:
        group = df[df.traffic == traffic]
        ax.scatter(group[m1], group[m2], group[m3], label=traffic)
    ax.legend()
    ax.set_xlabel(m1)
    ax.set_ylabel(m2)
    ax.set_zlabel(m3)
    if title is not None:
        fig.title(title)
        
metrics = ['MSU_USER_TIME', 'NUM_STATES', 'MSU_INVOL_CTX_SW']
        
plot_metrics_3d(labeled_df, *metrics)

In [None]:
import numpy as np
from sklearn.cluster import DBSCAN


CLUSTER_COLORS = ('mediumblue', 'darkgreen', 'mediumseagreen', 'lawngreen')
NOISE_COLOR = 'maroon'

PLOT_OPTS = dict(
    marker = 'o',
    markeredgecolor='k',
    markeredgewidth=.2,
    linestyle='None',
)

def plot_dbscan_results(X_in, metrics, min_samples, eps, do_plot=True, plot_metrics=None):
    if plot_metrics is None:
        plot_metrics = ('TIME', metrics[0])
    
    X = dfe.scale_data(X_in, metrics)
    
    db = DBSCAN(min_samples=min_samples, eps=eps).fit(X)
    
    core_mask = np.zeros_like(db.labels_, dtype=bool)
    core_mask[db.core_sample_indices_] = True
    labels = db.labels_
    
    unique_labels = list(set(labels))
    
    client_classes = [0 for _ in unique_labels]
    attack_classes = [0 for _ in unique_labels]
    
    for k, color in zip(unique_labels, CLUSTER_COLORS):
        if k == -1:
            color = NOISE_COLOR
        
        class_mask = (labels == k)

        core = X_in[core_mask & class_mask]
        ncore = X_in[(~core_mask) & class_mask]
        
        
        client_core = core[core.traffic == 'client']
        client_ncore = ncore[ncore.traffic == 'client']
        
        client_classes[k] = len(client_core) + len(client_ncore)
        
        attack_core = core[core.traffic != 'client']
        attack_ncore = ncore[ncore.traffic != 'client']
        
        attack_classes[k] = len(attack_core) + len(attack_ncore)
        
        if do_plot:
            plt.plot(core[plot_metrics[0]], core[plot_metrics[1]], 
                    markerfacecolor=color, markersize=14, **PLOT_OPTS)
            plt.plot(ncore[plot_metrics[0]], ncore[plot_metrics[1]],
                    markerfacecolor=color, markersize=6, **PLOT_OPTS)
        
    total_client = sum(client_classes)
    total_attack = sum(attack_classes)
        
    if do_plot:
        plt.gca().set_ylabel(plot_metrics[1])
        for i, (c, a) in enumerate(zip(client_classes, attack_classes)):
            if i != len(client_classes)-1:
                cls = 'class {}'.format(i)
            else:
                cls = 'noise'
            
            print 'Client {}: {}/{}: {:.1f}%'.format(cls, c, total_client, float(100*c) / total_client)
            print 'Attack {}: {}/{}: {:.1f}%'.format(cls, a, total_attack, float(100*a) / total_attack)
    
    client_as_client = 0
    attack_as_attack = 0
    for c, a in zip(client_classes, attack_classes):
        if float(c) / total_client > float(a) / total_attack:
            client_as_client += c
        else:
            attack_as_attack += a
    
    out = float(client_as_client) / total_client, float(attack_as_attack) / total_attack
    
    return out
    

In [None]:
EPS = dict(
    redos=.1,
    tls_reneg=.5,
    slowloris=.1
)

MIN_SAMPLES = 150

METRICS = dict (
    redos = ('MSU_USER_TIME','QUEUE_LEN', ),
    tls_reneg = ('MSU_USER_TIME','QUEUE_LEN', ),
    slowloris = ('NUM_STATES',)
)

TITLES = dict(
    redos = 'ReDOS',
    tls_reneg = 'TLS Renegotiation',
    slowloris = 'Slowloris'
)

TRAFFIC_NAMES = dict(
    MSU_USER_TIME = 'User CPU time',
    MEMORY_ALLOCATED = 'Memory Allocated',
    NUM_STATES = 'Number of states'
)

fig, ax = plt.subplots(len(separated), 1, figsize=(7,4))
for i, msu_df in enumerate(separated):
    traffic_type = ddm.traffic_type(msu_df)
    msu_type = msu_df.iloc[0].msu_type
    plt.sca(ax[i])
    print '********* {}'.format(traffic_type)
    out = plot_dbscan_results(msu_df, METRICS[traffic_type], eps=EPS[traffic_type], min_samples=MIN_SAMPLES)
    print 'Client correct: {:.2f}%\nAttack correct: {:.2f}%\n'.format(out[0]*100, out[1]*100)