In [1]:
# ---------------- For Developers Only -------------------------------------------------
# ---------------- Check current working directory for the notebooks -------------------
# import os

# # Move one directory up
# os.chdir('..')
# print("Current Directory:", os.getcwd())

Current Directory: G:\github-aditya0by0\stream-viz\stream_viz


In [None]:
# ---------------- For Developers Only -------------------------------------------------
# ---------------- Move one directory up from the current working directory  -----------
# from pathlib import Path

# # Move one directory up
# current_dir = Path.cwd()
# parent_dir = current_dir.parent
# os.chdir(parent_dir)
# print("Current Directory:", Path.cwd())

## Data Encoding

There basically *three* final implementations for data encoders in our library: 
1. **`NormalDataEncoder`**: Data encoder for a normal data like *cfpdss* without any missing values. 
2. **`MissingDataEncoder`**: Data encoder for a normal data like *cfpdss* with any missing values. 
3. **`KappaStrategyDataEncoder`**: Data encoder for a strategy data like *experiments* without any missing values.

The above implementations inherits `DataEncoder` abstract class to sign a contract for to implement common methods below:
- **`read_csv_data`**: Reads csv data from provided file path and stores in *`original_data`* class attribute. (Internally calls pandas `read_csv` method)
- **`encode_data`**: Encodes data according to internally-defined process and stores the result in *`encoded_data`* class attribute.

**<u>Note</u>** : Cfpdss-related data-encoders implements additional cfpdss contract which stores the encoded data in different class attributes `X_encoded_data` and `y_encoded_data`, hereby seperating the target variable and rest of the data. Hence, for them  this class attribute should be used instead of `encoded_data` attribute as mentioned above.

In [12]:
from stream_viz.data_encoders.cfpdss_data_encoder import NormalDataEncoder
from stream_viz.utils.constants import _NORMAL_DATA_PATH  # Variable only for developers

normal_encoder = NormalDataEncoder()
# Here, add path to your file, the below variable is for internal use only.
# Add relevant/neccessary parameters supported by pandas.read_csv, if required
normal_encoder.read_csv_data(filepath_or_buffer=_NORMAL_DATA_PATH)
normal_encoder.encode_data()
normal_encoder.X_encoded_data.head()

Unnamed: 0,c5_b,c6_b,c7_b,c8_b,c9_b,n0,n1,n2,n3,n4
0,0,0,1,0,0,0.528245,0.598345,0.558432,0.482846,0.612024
1,0,0,0,1,1,0.662432,0.423329,0.487623,0.454495,0.452664
2,0,0,0,1,1,0.56299,0.576429,0.545916,0.370166,0.543403
3,0,0,0,1,1,0.475311,0.566046,0.539992,0.421434,0.544852
4,1,0,0,1,0,0.370579,0.554642,0.536804,0.223743,0.392332


In [13]:
from stream_viz.data_encoders.cfpdss_data_encoder import MissingDataEncoder
from stream_viz.utils.constants import (
    _MISSING_DATA_PATH,
)  # Variable only for developers

missing_encoder = MissingDataEncoder()
missing_encoder.read_csv_data(
    filepath_or_buffer=_MISSING_DATA_PATH,  # Here, add path to your file, this variable is for internal use only.
    index_col=[
        0
    ],  # Add relevant/neccessary parameters supported by pandas.read_csv, if required
)
missing_encoder.encode_data()
missing_encoder.X_encoded_data.head()

Unnamed: 0,c5_b,c6_b,c7_b,c8_b,c9_b,n0,n1,n2,n3,n4
0,0.0,0.0,1.0,0.0,0.0,0.530356,0.598345,0.519161,0.478557,0.620371
1,0.0,0.0,0.0,1.0,1.0,0.672618,0.423329,0.442055,0.449888,0.458838
2,0.0,0.0,0.0,1.0,1.0,0.567192,0.576429,0.505532,0.364614,0.550814
3,0.0,0.0,0.0,1.0,1.0,0.474236,0.566046,0.499081,0.416457,0.552283
4,1.0,0.0,0.0,1.0,0.0,0.363202,0.554642,0.49561,0.21655,0.397683


In [14]:
from stream_viz.data_encoders.strategy_data_encoder import KappaStrategyDataEncoder
from stream_viz.utils.constants import (
    _LEARNING_STRATEGY_DATA_PATH,
)  # Variable only for developers

kappa_encoder = KappaStrategyDataEncoder()
kappa_encoder.read_csv_data(
    filepath_or_buffer=_LEARNING_STRATEGY_DATA_PATH,  # Here, add path to your file, this variable is for internal use only.
    header=[
        0,
        1,
        2,
    ],  # Add relevant/neccessary parameters supported by pandas.read_csv, if required
    index_col=[0, 1],
)
kappa_encoder.encode_data()
kappa_encoder.encoded_data.head()

Unnamed: 0_level_0,model_all,model_optimal,model_label,model_feat,model_nafa,model_smraed_catc,model_smraed_sumc,model_smraed_prioc,model_smraed_
Batch_Start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,0.593128,0.593128,0.432892,0.593128,0.432892,0.257426,0.257426,0.432892,0.593128
100,0.44795,0.409449,0.294671,0.33281,0.296875,0.334898,0.296875,0.221184,0.334898
150,0.83871,0.919614,0.388254,0.676375,0.384236,0.592834,0.634146,0.592834,0.42623
200,0.88,0.84,0.72,0.76,0.36,0.68,0.68,0.68,0.52
250,0.918831,0.959612,0.72,0.708819,0.295775,0.672131,0.708819,0.672131,0.573379


## 1. Real Concept Drift Detection

A real concept drift refers to the changes in p(y|X) which affects the decision boundaries or the target concept.</br>
Eg. Intially user was interested in news articles related to dwelling houses, but now interested in holiday homes.
#### McDiarmid Drift Detection Method (MDDM)- MDDM applies McDiarmid’s inequality to detect real concept drifts- Uses sliding window approach
- Weighting scheme for element in window : $w_{i} <  w_{i+1}$    - *Arithmetic*: $w_{i} = 1 + (i + d)$&emsp;...where d ≥ 0, is difference between two consecutive weights    - *Geometric*: $w_{i} = r^{(i−1)}$&emsp;...where r ≥ 1, is ratio between two consecutive weights    - *Euler*: $w_{i} = r^{(i−1)}$  with  $r=e^{λ}$&emsp;... where  λ ≥ 0
Refer Paper for more info: [Pesaranghader, A., Viktor, H.L., & Paquet, E. (2017). McDiarmid Drift Detection Methods for Evolving Data Streams. 2018 International Joint Conference on Neural Networks (IJCNN), 1-9.](https://arxiv.org/pdf/1710.02030)

In [None]:
# --------------- Real Concept Drift --------------------------------------------------

## 2. Feature Drift Detection

## 3. Velocity Plots

## 4. Missingness Plots

## 5. Learning Strategies Plot