In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from skmultiflow.data import ConceptDriftStream, \
    AGRAWALGenerator, \
    HyperplaneGenerator, \
    SEAGenerator

In [3]:
def scale_and_save(x, y, name):  # normalize and save the data being provided
    data = pd.DataFrame(MinMaxScaler().fit_transform(x.copy()))  # normalize data
    data['class'] = y.astype(int)
    
    data.to_csv('./data/synthetic/'+name+'.csv', index=False, header=False)

## SEA Generator (Sudden Drift)
1.000.000 instances, 3 features, Drift @ 200.000,400.000,600.000,800.000 observations

In [6]:
# Drift @ 800.000
alternate3 = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=3, random_state=112, noise_percentage=0.1),
    drift_stream=SEAGenerator(balance_classes=False, classification_function=0, random_state=112, noise_percentage=0.1),
    position=200000,
    width=1,
    random_state=0)

# Drift @ 600.000
alternate2 = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=2, random_state=112, noise_percentage=0.1),
    drift_stream=alternate3,
    position=200000,
    width=1,
    random_state=0)

# Drift @ 400.000
alternate1 = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=1, random_state=112, noise_percentage=0.1),
    drift_stream=alternate2,
    position=200000,
    width=1,
    random_state=0)

# Drift @ 200.000
stream = ConceptDriftStream(
    stream=SEAGenerator(balance_classes=False, classification_function=0, random_state=112, noise_percentage=0.1),
    drift_stream=alternate1,
    position=200000,
    width=1,
    random_state=0)

stream.prepare_for_use()
x, y = stream.next_sample(1000000)

# Normalize and save data
scale_and_save(x, y, 'sea')

New instances of the Stream class are now ready to use after instantiation.
  probability_drift = 1.0 / (1.0 + np.exp(x))


## Agrawal Generator (Incremental Drift)
1.000.000 instances, 9 features, Drift @ 100.000-200.000, 300.000-500.000, 800.000-900.000

In [7]:
# Drift @ 800.000-900.000
alternate2 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False, classification_function=2, random_state=112, perturbation=0.1),
    drift_stream=AGRAWALGenerator(balance_classes=False, classification_function=3, random_state=112, perturbation=0.1),
    position=300000,
    width=100000,
    random_state=0)

# Drift @ 300.000-500.000
alternate1 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False, classification_function=1, random_state=112, perturbation=0.1),
    drift_stream=alternate2,
    position=100000,
    width=200000,
    random_state=0)

# Drift @ 100.000-200.000
stream = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False, classification_function=0, random_state=112, perturbation=0.1),
    drift_stream=alternate1,
    position=100000,
    width=100000,
    random_state=0)

stream.prepare_for_use()
x, y = stream.next_sample(1000000)

# Normalize and save data
scale_and_save(x, y, 'agrawal_inc')

New instances of the Stream class are now ready to use after instantiation.


## Hyperplane Generator (Incremental Drift)
500.000 instances, 50 features

In [9]:
stream = HyperplaneGenerator(random_state=112,
                             n_features=50,
                             n_drift_features=25,
                             mag_change=0.5, 
                             noise_percentage=0.1,
                             sigma_percentage=0.1)

stream.prepare_for_use()

x, y = stream.next_sample(500000)

# Normalize and save data
scale_and_save(x, y, 'hyperplane')

New instances of the Stream class are now ready to use after instantiation.
