In [1]:
import os
import sys

import numpy as np

In [2]:
from read_values import get_dataframe
from calculate import calculate_drift, plot_drift

In [3]:
data = get_dataframe()

Extracting sta2.tar.xz
Extracting removed.tar.xz
Extracting ap.tar.xz
Extracting ctrl.tar.xz
Extracting sta1.tar.xz
Reading temp/ctrl
Found 237646


# Concept Drift ADWIN

__ADWIN__

* BIFET, Albert; GAVALDA, Ricard. Learning from time-changing data with adaptive windowing. In: Proceedings of the 2007 SIAM international conference on data mining. Society for Industrial and Applied Mathematics, 2007. p. 443-448.


__Page-Hinckley Test__

* GAMA, João; SEBASTIÃO, Raquel; RODRIGUES, Pedro Pereira. On evaluating stream learning algorithms. Machine learning, v. 90, n. 3, p. 317-346, 2013.

## Only Google

In [4]:
data1 = data[data['sites'] == ('google', 'google')]

In [5]:
y1 = data1['r']
y1 = np.sign(np.concatenate(([1], y1[1:].values - y1[:-1].values)))
y1[y1==-1] = 0  # y will have only 0s and 1s

In [6]:
X1 = data1[['Active time', 'Medium busy', 'channel',
            'new Active time', 'new Busy time', 'new Medium busy',
            'new_channel', 'new_txpower', 'txpower']].values

In [None]:
result = calculate_drift(X1, y1, n_train=1800, w=16)

In [None]:
result

In [None]:
plot_drift(result)

# Only one experiment

In [None]:
interval = 1800

In [None]:
for i in range(10):
    li = interval * i
    ls = li + 1800
    y1 = y[li:ls]
    X1 = X[li:ls]
    print("Experiment #{} from {} to {}".format(i, li, ls))
    result = calculate_drift(X1, y1, n_train=100, w=16, clfs_label=["AdWin"])
    plot_drift(result)

# Changing lambda from Page-Hinkley

In [None]:
i = 7
for lambda_ in [10, 20, 30, 40, 50, 100]:
    li = interval * i
    ls = li + 1800
    y1 = y[li:ls]
    X1 = X[li:ls]
    print("Experiment #{} from {} to {} - lambda={}".format(i, li, ls, lambda_))
    result = calculate_drift(X1, y1, n_train=100, w=16, lambda_=lambda_, 
                             clfs_label=["Page-Hinkley"],
                             plot_circles=["Page-Hinkley"],
                             )
    plot_drift(result)

# varying the window size

In [None]:
y = data['r']
y = np.sign(np.concatenate(([1], y[1:].values - y[:-1].values)))
y[y==-1] = 0  # y will have only 0s and 1s

In [None]:
X = data[['Active time', 'Medium busy', 'channel',
       'new Active time', 'new Busy time', 'new Medium busy',
       'new_channel', 'new_txpower', 'txpower']].values

In [None]:
n_train = 60 * 30  # 30 minutes

In [None]:
num_files = data['file_id'].unique().shape[0]

In [None]:
for w in [16, 32, 64, 128, 256, 512]:
    result = calculate_drift(X, y, n_train=1000, w=w)
    plot_drift(result)
    print("On average", result.get("AdWin", 0) / num_files, "in AdWin")