# How Private is Android's Private DNS Setting? 
# Identifying Apps by Encrypted DNS Traffic

In [None]:
import math, random, sys, os, json, gzip, warnings
import numpy as np
import pandas as pd
from IPython.core.display import HTML, display
from matplotlib import pyplot as plt, ticker
from scipy.sparse import hstack
from sklearn.ensemble import RandomForestClassifier
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import RFECV
from sklearn.metrics import (
    accuracy_score,
    auc,
    average_precision_score,
    f1_score,
    precision_recall_curve,
    precision_score,
    recall_score,
)
from sklearn.model_selection import (
    StratifiedKFold,
    cross_validate,
    train_test_split,
)
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.utils import shuffle

# local imports
sys.path.append('../database/')
import db
from models import App, Dataset, Features, Trace

# set seed for reproducibility
myseed = 42
np.random.seed(myseed)
random.seed(myseed)


# load functions
def join_lists(tls_sizes0, times0):
    result = []
    for index, elems in enumerate(zip(times0, tls_sizes0)):
        if index > 0:
            gap = int(math.log(1 + (float(elems[0]) * 1000), 2))
            if gap >= 5:
                result.append("G" + str(gap))
        result.append(elems[1])
    return result


# load function for bursts - idea from https://github.com/spring-epfl/doh_traffic_analysis
def create_burst_seq(x):
    split_points = np.where(np.diff(np.sign(x)))[0] + 1
    traffic_bursts = np.split(x, split_points)
    burst_seq = map(sum, traffic_bursts)
    return burst_seq


# make a list of dataframes from database for closed world evaluation
def get_list_of_df(world="closed", cache="NO_CACHE"):
    list_of_df = []

    # load DoT datasets
    dot_resolvers = ["dns.google", "dns.digitale-gesellschaft.ch",
                     "dns.quad9.net", "one.one.one.one", "dot1.applied-privacy.net"]
    with db.session_scope() as session:
        for resolver in dot_resolvers:
            q = session.query(Dataset.name, Dataset.padding, Dataset.world, Dataset.caching, Trace.timestamp, 
                              Features).join(Features).join(App).join(Dataset).filter(
                Dataset.hostname == resolver, Dataset.dns_type == "dot", Dataset.caching == cache, 
                Dataset.world == world).order_by(Features.id)
            sql_df = pd.read_sql(q.statement, q.session.bind)
            sql_df = shuffle(sql_df, random_state=myseed)
            list_of_df.append(sql_df)

    # load DoH datasets
    doh_resolvers = ["dns.google", "dns.digitale-gesellschaft.ch",
                     "dns.quad9.net", "cloudflare-dns.com", "doh.applied-privacy.net"]

    with db.session_scope() as session:
        for resolver in doh_resolvers:
            q = session.query(Dataset.name, Dataset.padding, Dataset.world, Dataset.caching, Trace.timestamp, 
                              Features).join(Features).join(App).join(Dataset).filter(
                Dataset.hostname == resolver, Dataset.dns_type == "doh", Dataset.caching == cache, 
                Dataset.world == world).order_by(Features.id)
            sql_df = pd.read_sql(q.statement, q.session.bind)
            sql_df = shuffle(sql_df, random_state=myseed)
            list_of_df.append(sql_df)
    return list_of_df


# make a list of dataframes from database for closed world evaluation
def get_list_of_df_weeks():
    list_of_df = []
    dates_db = [('2020-04-25 13:00:00', '2020-05-02 13:00:00'), ('2020-05-02 13:01:00', '2020-05-09 13:00:00'),
                ('2020-05-09 13:01:00', '2020-05-16 13:00:00'), ('2020-05-16 13:01:00', '2020-05-20 13:00:00')]
    for mydate in dates_db:
        lower_date = mydate[0]
        upper_date = mydate[1]
        # load DoT datasets
        dot_resolvers = ["dns.google", "dns.digitale-gesellschaft.ch",
                         "dns.quad9.net", "one.one.one.one", "dot1.applied-privacy.net"]
        with db.session_scope() as session:
            for resolver in dot_resolvers:
                q = session.query(Dataset.name, Dataset.padding, Dataset.world, Dataset.caching, 
                                  Trace.timestamp, Features).join(Features).join(App).join(Dataset).filter(
                    Dataset.hostname == resolver, Dataset.dns_type == "dot", Dataset.caching == "NO_CACHE",
                    Dataset.world == "closed", Features.datetime_trace <= upper_date,
                    Features.datetime_trace > lower_date).order_by(Features.id)
                sql_df = pd.read_sql(q.statement, q.session.bind)
                sql_df = shuffle(sql_df, random_state=myseed)
                list_of_df.append(sql_df)

        # load DoH datasets
        doh_resolvers = ["dns.google", "dns.digitale-gesellschaft.ch",
                         "dns.quad9.net", "cloudflare-dns.com", "doh.applied-privacy.net"]

        with db.session_scope() as session:
            for resolver in doh_resolvers:
                q = session.query(Dataset.name, Dataset.padding, Dataset.world, Dataset.caching,
                                  Trace.timestamp, Features).join(Features).join(App).join(Dataset).filter(
                    Dataset.hostname == resolver, Dataset.dns_type == "doh", Dataset.caching == "NO_CACHE",
                    Dataset.world == "closed", Features.datetime_trace <= upper_date, 
                    Features.datetime_trace > lower_date).order_by(Features.id)
                sql_df = pd.read_sql(q.statement, q.session.bind)
                sql_df = shuffle(sql_df, random_state=myseed)
                list_of_df.append(sql_df)
    return list_of_df


def create_tls_fd_df(prep_df):
    predict_df = pd.DataFrame()
    predict_df['app_name'] = prep_df.app_name
    predict_df['tls_sizes'] = prep_df.tls_sizes
    predict_df['tls_sizes'] = [' '.join(map(str, tls_len)) for tls_len in predict_df['tls_sizes']]
    return predict_df


def create_n_grams_df(prep_df):
    predict_df = pd.DataFrame()
    predict_df['app_name'] = prep_df.app_name
    predict_df['n-grams'] = prep_df.tls_sizes.map(lambda x: create_burst_seq(np.array([int(elem) for elem in x])))
    predict_df['n-grams'] = [' '.join(map(str, ng)) for ng in predict_df['n-grams']]
    predict_df['tls_sizes'] = [' '.join(map(str, tl))for tl in prep_df['tls_sizes']]
    return predict_df


def create_dns_sequences(prep_df):
    predict_df = pd.DataFrame()
    predict_df['app_name'] = prep_df['app_name']
    predict_df['tls_sizes'] = prep_df.tls_sizes
    predict_df['times'] = prep_df.tls_sizes_times
    predict_df['dns_seq'] = predict_df.apply(lambda x: join_lists(x.tls_sizes, x.times), axis=1)
    predict_df['dns_seq'] = [' '.join(map(str, seq)) for seq in predict_df['dns_seq']]
    return predict_df


def build_ow_dataframe(total_df, class_problem, rand_seed, app_list, number_monitored_sites, n_monitored,
                       test_size_mon, num_unm, n_unknown_train, n_unknown_test, sample_unm,test_unm, 
                       sample_unknown):
    np.random.seed(rand_seed)
    # select n monitored sites from open world with m instances
    interested = np.random.choice(app_list, number_monitored_sites, replace=False)
    monitored_df = total_df.loc[total_df['app_name'].isin(interested)].copy()
    monitored_df = monitored_df.groupby('app_name', group_keys=False).apply(
        lambda x: x.sample(n=n_monitored, random_state=myseed))
    if class_problem == "binary":
        monitored_df['target'] = "monitored"
    else:
        monitored_df['target'] = monitored_df.app_name
    # use x instances of monitored sites for training and y instances for testing
    X_train, X_test, y_train, y_test = train_test_split(monitored_df, monitored_df.world, 
                                                        test_size=test_size_mon, random_state=myseed, 
                                                        stratify=monitored_df.app_name)

    # select rest of apps and tag them as unmonitored
    unmonitored_df = total_df.loc[total_df['app_name'].isin(interested) == False].copy()
    unmonitored_df['target'] = "unmonitored"

    # use i unmonitored apps for training and j as unknown in testing
    np.random.seed(rand_seed)
    unmonitored_apps_list = np.unique(unmonitored_df.app_name)
    interested_unmonitored = np.random.choice(unmonitored_apps_list, num_unm, replace=False)
    unknown_train = interested_unmonitored[:n_unknown_train]
    unknown_test = interested_unmonitored[n_unknown_test:]

    # take k instance of unmonitored apps for training
    unmonitored_df_train = unmonitored_df.loc[unmonitored_df['app_name'].isin(unknown_train)]
    unmonitored_df_train = unmonitored_df_train.groupby('app_name',group_keys=False).apply(
        lambda x:x.sample(n=sample_unm, random_state=myseed))
    X_train_unm, X_test_unm, y_train_unm, y_test_unm = train_test_split(unmonitored_df_train,
                                                                        unmonitored_df_train.world,
                                                                        test_size=test_unm,
                                                                        random_state=myseed,
                                                                    stratify=unmonitored_df_train.app_name)
    # take l instances for each of the j unknown apps
    unmonitored_df_test = unmonitored_df.loc[unmonitored_df['app_name'].isin(unknown_test)]
    unmonitored_df_test = unmonitored_df_test.groupby('app_name', group_keys=False).apply(
        lambda x: x.sample(n=sample_unknown, random_state=myseed))

    # final datasets
    training_data = X_train.append(X_train_unm, ignore_index=True).copy()
    training_data = shuffle(training_data, random_state=myseed)
    test_data = X_test.append(unmonitored_df_test, ignore_index=True).copy()
    test_data = shuffle(test_data, random_state=myseed)
    return training_data, test_data

## 1. Approach based on TLS Record Size Frequency Distribution

In [None]:
def predict_tls_fd(dataframes):
    accuracies = []
    precisions = []
    recalls = []
    f1scores = []
    for df in dataframes:
        resolver_metrics = []
        print(f"TLS Record Sizes - {df.name.value_counts().index[0]}")
        print("Shape of Dataframe: " + str(df.shape))
        print("Minimum Traces per App:", np.min(df['app_name'].value_counts()))
        predict_df = create_tls_fd_df(df)
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), use_idf=False, sublinear_tf=False,
                                     norm='', smooth_idf=False)

        X = vectorizer.fit_transform(predict_df['tls_sizes'])
        y = predict_df["app_name"]

        # random forest model creation
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        mnb = MultinomialNB()
        knn = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
        svc = SVC(random_state=myseed)
        mlp = MLPClassifier(random_state=myseed)
        scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
        skf = StratifiedKFold(n_splits=5, random_state=myseed, shuffle=True)
        with warnings.catch_warnings():
            print("after: " + str(X.shape))
            warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
            rfc_cv_score = cross_validate(rfc, X, y, cv=skf, scoring=scoring)
        print("Accuracy: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_accuracy'].mean(),
                                               rfc_cv_score['test_accuracy'].std()))
        print("Precision: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_precision_macro'].mean(), 
                                                rfc_cv_score['test_precision_macro'].std()))
        print("Recall: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_recall_macro'].mean(),
                                             rfc_cv_score['test_recall_macro'].std()))
        print("F1-Score: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_f1_macro'].mean(), 
                                               rfc_cv_score['test_f1_macro'].std()))
        resolver_metrics.extend(["%0.3f" % rfc_cv_score['test_accuracy'].mean(), 
                                 "%0.3f" % rfc_cv_score['test_precision_macro'].mean(),
                                 "%0.3f" % rfc_cv_score['test_recall_macro'].mean(), 
                                 "%0.3f" % rfc_cv_score['test_f1_macro'].mean()])
        print(df.name.value_counts().index[0], str(resolver_metrics).replace("'", ""))
        print("\n")
        accuracies.append("%0.3f" % rfc_cv_score['test_accuracy'].mean())
        precisions.append("%0.3f" % rfc_cv_score['test_precision_macro'].mean())
        recalls.append("%0.3f" % rfc_cv_score['test_recall_macro'].mean())
        f1scores.append("%0.3f" % rfc_cv_score['test_f1_macro'].mean())
    print("Accuracies:", str(accuracies).replace("'", ""))
    print("Precisions:", str(precisions).replace("'", ""))
    print("Recalls:", str(recalls).replace("'", ""))
    print("F1 scores:", str(f1scores).replace("'", ""))
    print("\n")

In [None]:
# prediction on raw dataset for each recursive resolver
list_of_df = get_list_of_df()
predict_tls_fd(list_of_df)

## 2. Approach based on N-Grams of TLS Record Sizes / Bursts

In [None]:
# USE BURSTS, N-GRAMS AND TLS SIZES
def predict_ngrams_tls_sizes(dataframes):
    accuracies = []
    precisions = []
    recalls = []
    f1scores = []
    for df in dataframes:
        resolver_metrics = []
        print(f"N-Grams - {df.name.value_counts().index[0]}")
        print("Shape of Dataframe: " + str(df.shape))
        print("Minimum Traces per App:", np.min(df['app_name'].value_counts()))
        # create dataframe for prediction
        predict_df = create_n_grams_df(df)
        # vectorizer
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 2), use_idf=False,
                                     sublinear_tf=False, norm='', smooth_idf=False)
        vectorizer2 = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 2), use_idf=False,
                                      sublinear_tf=False, norm='', smooth_idf=False)
        # feature for n-grams
        X0 = vectorizer.fit_transform(predict_df['n-grams'])
        # features based on tls sizes
        X1 = vectorizer.fit_transform(predict_df.tls_sizes)
        # combine features
        X = hstack([X0, X1])
        y = predict_df["app_name"]

        # random forest model creation
        scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        MultinomialNB()
        knn = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
        skf = StratifiedKFold(n_splits=5, random_state=myseed, shuffle=True)
        with warnings.catch_warnings():
            print("after: " + str(X.shape))
            warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
            rfc_cv_score = cross_validate(rfc, X, y, cv=skf, scoring=scoring)
        print("Accuracy: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_accuracy'].mean(), 
                                               rfc_cv_score['test_accuracy'].std()))
        print("Precision: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_precision_macro'].mean(), 
                                                rfc_cv_score['test_precision_macro'].std()))
        print("Recall: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_recall_macro'].mean(), 
                                             rfc_cv_score['test_recall_macro'].std()))
        print("F1-Score: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_f1_macro'].mean(), 
                                               rfc_cv_score['test_f1_macro'].std()))
        resolver_metrics.extend(["%0.3f" % rfc_cv_score['test_accuracy'].mean(), 
                                 "%0.3f" % rfc_cv_score['test_precision_macro'].mean(),
                                 "%0.3f" % rfc_cv_score['test_recall_macro'].mean(), 
                                 "%0.3f" % rfc_cv_score['test_f1_macro'].mean()])
        print(df.name.value_counts().index[0], str(resolver_metrics).replace("'", ""))
        print("\n")
        accuracies.append("%0.3f" % rfc_cv_score['test_accuracy'].mean())
        precisions.append("%0.3f" % rfc_cv_score['test_precision_macro'].mean())
        recalls.append("%0.3f" % rfc_cv_score['test_recall_macro'].mean())
        f1scores.append("%0.3f" % rfc_cv_score['test_f1_macro'].mean())
    print("Accuracies:", str(accuracies).replace("'", ""))
    print("Precisions:", str(precisions).replace("'", ""))
    print("Recalls:", str(recalls).replace("'", ""))
    print("F1 scores:", str(f1scores).replace("'", ""))
    print("\n")

In [None]:
# prediction on raw dataset for each recursive resolver
list_of_df = get_list_of_df()
predict_ngrams_tls_sizes(list_of_df)

## 3. Approach Distances of DNS Sequences

In [None]:
path = "./distances_resp/"
gzip_files = os.listdir(path)
gzip_files = [gfile for gfile in gzip_files if gfile.endswith("gz")]

for gzip_file in gzip_files:
    path_df = f"./pickle_resp/{gzip_file.split('.json')[0]}"
    predict_df = pd.read_pickle(path_df)    
    with gzip.open(path + gzip_file) as f:
        results = json.load(f)
        distances = results['results']
        dimension = max(distance['i'] for distance in distances) + 1
        matrix = np.zeros(dimension**2, dtype=int).reshape((dimension, dimension))
        for distance in distances:
            i = distance['i']
            j = distance['j']
            d = distance['distance']
            matrix[i,j] = matrix[j,i] = d

    print(f"\nBushart et al. - {gzip_file}")

    # feature w. timing
    X = matrix
    y = predict_df["app_name"]

    # knn classifier
    scoring = ['accuracy']
    knn = KNeighborsClassifier(n_neighbors=1, n_jobs=-1, metric="precomputed")
    skf = StratifiedKFold(n_splits=5, random_state=myseed, shuffle=True)
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
        rfc_cv_score = cross_validate(knn, X, y, cv=skf, scoring=scoring)
    print("Accuracy: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_accuracy'].mean(), 
                                           rfc_cv_score['test_accuracy'].std()))

## 4. Segram: N-Grams of DNS Sequences

In [None]:
from scipy.sparse import coo_matrix, hstack

def predict_ngrams_dns_seq(dflist, target='app_name'):
    accuracies = []
    precisions = []
    recalls = []
    f1scores = []
    for df in dflist:
        resolver_metrics = []
        print(f"N-Grams / Times - {df.name.value_counts().index[0]}")
        print("Shape of Dataframe: " + str(df.shape))
        print("Minimum Traces per App:", np.min(df['app_name'].value_counts()))
        # create dataframe for prediction
        predict_df = create_dns_sequences(df)
        # vectorizer and transformer
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3), use_idf=False,
                                     sublinear_tf=False, norm='', smooth_idf=False)
        # feature w. timing
        X = vectorizer.fit_transform(predict_df['dns_seq'])
        y = predict_df[target]

        # random forest model creation
        scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        MultinomialNB()
        knn = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
        skf = StratifiedKFold(n_splits=5, random_state=myseed, shuffle=True)
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
            rfc_cv_score = cross_validate(rfc, X, y, cv=skf, scoring=scoring)

        print("Accuracy: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_accuracy'].mean(), 
                                               rfc_cv_score['test_accuracy'].std()))
        print("Precision: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_precision_macro'].mean(), 
                                                rfc_cv_score['test_precision_macro'].std()))
        print("Recall: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_recall_macro'].mean(),
                                             rfc_cv_score['test_recall_macro'].std()))
        print("F1-Score: %0.3f (+/- %0.3f)" % (rfc_cv_score['test_f1_macro'].mean(),
                                               rfc_cv_score['test_f1_macro'].std()))
        resolver_metrics.extend(["%0.3f" % rfc_cv_score['test_accuracy'].mean(), 
                                 "%0.3f" % rfc_cv_score['test_precision_macro'].mean(),
                                 "%0.3f" % rfc_cv_score['test_recall_macro'].mean(), 
                                 "%0.3f" % rfc_cv_score['test_f1_macro'].mean()])
        print(df.name.value_counts().index[0], str(resolver_metrics).replace("'", ""))
        print("\n")
        accuracies.append("%0.3f" % rfc_cv_score['test_accuracy'].mean())
        precisions.append("%0.3f" % rfc_cv_score['test_precision_macro'].mean())
        recalls.append("%0.3f" % rfc_cv_score['test_recall_macro'].mean())
        f1scores.append("%0.3f" % rfc_cv_score['test_f1_macro'].mean())
    print("Accuracies:", str(accuracies).replace("'", ""))
    print("Precisions:", str(precisions).replace("'", ""))
    print("Recalls:", str(recalls).replace("'", ""))
    print("F1 scores:", str(f1scores).replace("'", ""))
    print("\n")

In [None]:
# prediction on raw dataset for each recursive resolver
list_of_df = get_list_of_df()
predict_ngrams_dns_seq(list_of_df)

## 5. Influence of Sample Size

In [None]:
# get traces for resolvers from first week
week_dfs = get_list_of_df_weeks()
resolver_week1 = [week_dfs[0], week_dfs[4], week_dfs[5], week_dfs[9]]
sample_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

for res in resolver_week1:
    accs = []
    print(res.name.value_counts().index[0])
    for sample_size in sample_sizes:
        mydf = res.groupby('app_name', group_keys=False).apply(lambda x: x.sample(n=sample_size,
                                                                                  random_state=myseed))
        mydf = shuffle(mydf, random_state=myseed)
        X_test_df = res.drop(mydf.index)
        predict_df = create_dns_sequences(mydf)
        
        # vectorizer and transformer
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3), use_idf=False,
                                     sublinear_tf=False, norm='', smooth_idf=False)

        X = vectorizer.fit_transform(predict_df['dns_seq'])
        y = predict_df['app_name']
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        rfc.fit(X, y)
        
        # classify test data
        predict_df_test = create_dns_sequences(X_test_df)
        X_test = vectorizer.transform(predict_df_test['dns_seq'])
        y_test = predict_df_test['app_name']
        y_pred = rfc.predict(X_test)
        print("Instances: ", sample_size, "Accuracy", "%0.3f" % accuracy_score(y_test, y_pred))
        accs.append("%0.3f" % accuracy_score(y_test, y_pred))
    #print(accs)
    print("\n")

## 6. Influence of Updates

In [None]:
week_dfs = get_list_of_df_weeks()
resolver_week1 = week_dfs[:10]
resolver_week2 = week_dfs[10:20]
resolver_week3 = week_dfs[20:30]
resolver_week4 = week_dfs[30:40]
resolver_indices = [0, 4, 5, 9]

for resolver_index in resolver_indices:
    mydfs = [resolver_week1[resolver_index], resolver_week2[resolver_index],
             resolver_week3[resolver_index], resolver_week4[resolver_index]]

    for i in range(1):
        print("Position", i+1, i+1)
        df = mydfs[i]
        print(f"Resolver - {df.name.value_counts().index[0]}")
        # base
        predict_ngrams_dns_seq([df])
        for j in range(4):
            if i != j:
                # create model
                predict_df = create_dns_sequences(df)
                # vectorizer and transformer
                vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3), 
                                             use_idf=False, sublinear_tf=False, norm='', smooth_idf=False)

                X = vectorizer.fit_transform(predict_df['dns_seq'])
                y = predict_df['app_name']
                rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
                rfc.fit(X, y)

                new_data = mydfs[j]
                predict_df = create_dns_sequences(new_data)
                X_test = vectorizer.transform(predict_df['dns_seq'])
                y_test = predict_df['app_name']

                y_pred = rfc.predict(X_test)
                print("Position", j+1, i+1)
                print("Accuracy", "%0.3f" % accuracy_score(y_test, y_pred))
                print("\n")
    print("-------------------------------------")

## 7. Train + Predict with Different Resolver

### 7.1 DoT

In [None]:
mydfs = get_list_of_df(world="closed", cache="NO_CACHE")[:5]
for i in range(len(mydfs)):
    print("Position", i+1, i+1)
    df = mydfs[i]
    print(f"Resolver - {df.name.value_counts().index[0]}")
    # base
    predict_ngrams_dns_seq([df])
    for j in range(len(mydfs)):
        if i != j:
            # create model
            predict_df = create_dns_sequences(df)
            # vectorizer and transformer
            vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3),
                                         use_idf=False, sublinear_tf=False, norm='', smooth_idf=False)

            X = vectorizer.fit_transform(predict_df['dns_seq'])
            y = predict_df['app_name']
            rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
            rfc.fit(X, y)

            new_data = mydfs[j]
            print(f"Resolver - {new_data.name.value_counts().index[0]}")
            predict_df_new = create_dns_sequences(new_data)
            X_test = vectorizer.transform(predict_df_new['dns_seq'])
            y_test = predict_df_new['app_name']

            y_pred = rfc.predict(X_test)
            print("Position", j+1, i+1)
            print("Accuracy", "%0.3f" % accuracy_score(y_test, y_pred))
            print("\n")

### 7.2 DoH

In [None]:
mydfs = get_list_of_df(world="closed", cache="NO_CACHE")[5:]
for i in range(len(mydfs)):
    print("Position", i+1, i+1)
    df = mydfs[i]
    print(f"Resolver - {df.name.value_counts().index[0]}")
    # base
    predict_ngrams_dns_seq([df])
    for j in range(len(mydfs)):
        if i != j:
            # create model
            predict_df = create_dns_sequences(df)
            # vectorizer and transformer
            vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3), 
                                         use_idf=False, sublinear_tf=False, norm='', smooth_idf=False)

            X = vectorizer.fit_transform(predict_df['dns_seq'])
            y = predict_df['app_name']
            rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
            rfc.fit(X, y)

            new_data = mydfs[j]
            print(f"Resolver - {new_data.name.value_counts().index[0]}")
            predict_df_new = create_dns_sequences(new_data)
            X_test = vectorizer.transform(predict_df_new['dns_seq'])
            y_test = predict_df_new['app_name']

            y_pred = rfc.predict(X_test)
            print("Position", j+1, i+1)
            print("Accuracy", "%0.3f" % accuracy_score(y_test, y_pred))
            print("\n")

## 8. Impact of Caching

In [None]:
list_of_df_cache = get_list_of_df(cache="CACHE")
predict_ngrams_dns_seq(list_of_df_cache)

## 9. Open World

## 9.1 Binary-Case

In [None]:
random.seed(myseed)
resolver_indices = {0:"Google DoT", 4:"Applied Privacy DoT", 5:"Google DoH", 9:"Applied Privacy DoH"}
for resolver_index, resolver_text in resolver_indices.items():
    number_monitored_sites = 10

    cw_data = get_list_of_df()[resolver_index]
    ow_data = get_list_of_df(world="open")[resolver_index]

    total_df = cw_data.append(ow_data, ignore_index=True)
    app_list = np.unique(cw_data.app_name)
    seeds = [42, 24, 2, 4]

    fig, ax = plt.subplots(figsize=(8, 5))
    f_scores = np.linspace(0.0, 0.8, num=5)
    lines = []
    labels = []
    for i, f_score in enumerate(f_scores):
        xf = np.linspace(0.01, 1)
        yf = f_score * xf / (2 * xf - f_score)
        l, = plt.plot(xf[yf >= 0], yf[yf >= 0], color='gray', alpha=0.2)
        if i > 0:
            plt.annotate(' F1={0:0.1f}'.format(f_score),xy=(0.85, yf[45] + 0.035), fontsize=16)
    y_real = []
    y_proba = []
    print(f"Resolver - {total_df.name.value_counts().index[0]}")
    for s in seeds:
        training_data, test_data = build_ow_dataframe(total_df, "binary", s, app_list, number_monitored_sites,
                                                      40, 0.25, 200, 100, 100, 10, 7/10, 12)
        predict_df_train = create_dns_sequences(training_data)
        predict_df_test = create_dns_sequences(test_data)

        # vectorizer and transformer
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3),
                                     use_idf=False, sublinear_tf=False, norm='', smooth_idf=False)

        X = vectorizer.fit_transform(predict_df_train['dns_seq'])
        y = training_data["target"]
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        rfc.fit(X, y)
        newdf = vectorizer.transform(predict_df_test['dns_seq'])
        scores = rfc.predict_proba(newdf)
        precision, recall, thresholds = precision_recall_curve(test_data['target'], scores[:, 0], pos_label=rfc.classes_[0])
        y_pred = rfc.predict(newdf)
        # plot precision-recall curve
        #ax.plot(recall, precision, marker='.', label=None, alpha=0.2, color="lightgray")
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        y_real.append(test_data['target'])
        y_proba.append(scores[:, 0])

    # plot the no skill precision-recall curve
    y_real = np.concatenate(y_real)
    y_proba = np.concatenate(y_proba)
    precision, recall, thresholds = precision_recall_curve(y_real, y_proba, pos_label='monitored')
    lab = 'Random Forest' % (average_precision_score(y_real, y_proba, pos_label='monitored'))
    
    ax.plot(recall, precision, label=lab, lw=2, color='black')
    ax.set_xlabel('Recall', fontsize=20, labelpad=8)
    ax.set_ylabel('Precision', fontsize=20, labelpad=8)

    positions = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
    labels = [None, 0.2, 0.4, 0.6, 0.8, 1.0]
    ax.yaxis.set_major_locator(ticker.FixedLocator(positions))
    ax.yaxis.set_major_formatter(ticker.FixedFormatter(labels))

    ax.tick_params(labelsize=20)

    # calculate the no skill line as the proportion of the positive class
    no_skill = len(test_data['target'][test_data['target'] == 'monitored']) / len(test_data['target'])
    print("NoSkill: " + str(no_skill))
    ax.plot([0, 1], [no_skill, no_skill], linestyle='--',label='Random Classifier', color="darkgrey")

    ax.legend(bbox_to_anchor=(0, 0.985, 1, 0.15), loc="lower left", mode="expand", borderaxespad=0, 
              ncol=3, fontsize=18, frameon=False)
    f1_scores = 2*recall*precision/(recall+precision)
    index_f1 = np.argmax(f1_scores)    
    best_threshold = thresholds[np.argmax(f1_scores)]
    print('Best threshold: ', best_threshold)
    print('Best F1-Score: ', np.max(f1_scores))
    print("Best Precision:", precision[np.argmax(f1_scores)])
    print("Best Recall:", recall[np.argmax(f1_scores)])

    try:
        if resolver_index==0:
            os.remove("ow_results.csv")
    except OSError:
        pass
    with open('ow_results.csv','a') as file:
        #file.write("resolver,precision,recall,best_threshold,best_precision,best_recall\n")
        file.write(resolver_text + "\t")
        file.write("%s\t" % list(precision))
        file.write("%s\t" % list(recall))
        file.write("%s\t" % best_threshold)
        file.write("%s\t" % precision[index_f1])
        file.write("%s" % recall[index_f1])
        file.write('\n')
        
    ax.plot([recall[np.where(thresholds == best_threshold)]], [precision[np.where(thresholds == best_threshold)]],
            marker='o', markersize=8, color="black")
    ax.annotate(f"t={'%.2f' % thresholds[np.argmax(f1_scores)]}",
                xy=(recall[np.where(thresholds == best_threshold)]-0.07, 
                    precision[np.where(thresholds == best_threshold)]+0.055),
                textcoords='data', fontsize=16)
    ax.text(0.5, 0.7, resolver_text, fontsize=20, horizontalalignment='center', alpha=1)
    #save_path = f"prcurve_{resolver_text}"
    #plt.savefig(save_path + ".pdf", bbox_inches='tight', pad_inches=0.1)
    plt.show()

## 9.2 Multi-Class Case

In [None]:
myseed = 42
random.seed(myseed)

resolver_indices = np.arange(0, 10)
for i in resolver_indices:
    resolver_index = i
    number_monitored_sites = 10

    cw_data = get_list_of_df()[resolver_index]
    ow_data = get_list_of_df(world="open")[resolver_index]

    total_df = cw_data.append(ow_data, ignore_index=True)
    app_list = np.unique(cw_data.app_name)
    seeds = [42, 24, 2, 4]
    print(f"Resolver - {total_df.name.value_counts().index[0]}")
    all_f1 = []
    all_prec = []
    all_rec = []
    for s in seeds:
        training_data, test_data = build_ow_dataframe(
            total_df, "multi-class", s, app_list, 
            number_monitored_sites, 40, 0.25, 200, 100, 100, 10, 7/10, 12)
        predict_df_train = create_dns_sequences(training_data)
        predict_df_test = create_dns_sequences(test_data)
        # vectorizer and transformer
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3), use_idf=False,
                                     sublinear_tf=False, norm='', smooth_idf=False)

        X = vectorizer.fit_transform(predict_df_train['dns_seq'])
        y = training_data["target"]
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        rfc.fit(X, y)

        newdf = vectorizer.transform(predict_df_test['dns_seq'])
        y_pred = rfc.predict(newdf)
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
            all_prec.append(precision_score(test_data['target'], y_pred, average='macro'))
            all_rec.append(recall_score(test_data['target'], y_pred, average='macro'))
            all_f1.append(f1_score(test_data['target'], y_pred, average='macro'))
    print("F1-Scores:", all_f1)
    print("AVG-Precision:", "%0.3f" % np.mean(all_prec))
    print("AVG-Recall:", "%0.3f" % np.mean(all_rec))
    print("AVG-F1:", "%0.3f" % np.mean(all_f1))
    print("\n")

## 9.3 Open World + Caching

In [None]:
myseed = 42
random.seed(myseed)
allf1_resolver = []
resolver_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for i in resolver_indices:
    resolver_index = i
    number_monitored_sites = 20

    cw_data = get_list_of_df(cache="CACHE")[resolver_index]
    ow_data = get_list_of_df(world="open", cache="CACHE")[resolver_index]

    total_df = cw_data.append(ow_data, ignore_index=True)
    app_list = np.unique(cw_data.app_name)
    seeds = [42, 24, 2, 4]
    print(f"Resolver - {total_df.name.value_counts().index[0]}")

    all_f1 = []
    all_prec = []
    all_rec = []
    for s in seeds:
        training_data, test_data = build_ow_dataframe(total_df, "multi-class", s,
                                                      app_list, number_monitored_sites, 
                                                      10, 2/10, 200, 80, 80, 3, 1/3, 4)
        predict_df_train = create_dns_sequences(training_data)
        predict_df_test = create_dns_sequences(test_data)
        # vectorizer and transformer
        vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(" "), ngram_range=(1, 3), use_idf=False,
                                     sublinear_tf=False, norm='', smooth_idf=False)

        X = vectorizer.fit_transform(predict_df_train['dns_seq'])
        y = training_data["target"]
        rfc = RandomForestClassifier(n_jobs=-1, random_state=myseed)
        rfc.fit(X, y)

        newdf = vectorizer.transform(predict_df_test['dns_seq'])
        y_pred = rfc.predict(newdf)
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
            all_prec.append(precision_score(test_data['target'], y_pred, average='macro'))
            all_rec.append(recall_score(test_data['target'], y_pred, average='macro'))
            all_f1.append(f1_score(test_data['target'], y_pred, average='macro'))
    print("All F1-Scores", all_f1)
    print("AVG-Precision:", "%0.3f" % np.mean(all_prec))
    print("AVG-Recall:", "%0.3f" % np.mean(all_rec))
    print("AVG-F1:", "%0.3f" % np.mean(all_f1))
    print("SD-F1:", "%0.3f" % np.std(all_f1))

    allf1_resolver.append("%0.3f" % np.mean(all_f1))
    print("\n")
print("Total F1 Scores:", str(allf1_resolver).replace("'", ""))