In [1]:
from scengen.exp.experiment import ComparisonExperiment
from scengen.generators import PredClusGenerator, SampleGenerator, RandomGenerator

In [2]:
import numpy as np

## Util to generate random data

In [3]:
random_gen = np.random.default_rng(12341)
def random_dataset(N):
    attributes = random_gen.random(size = (N, 25))
    timeseries = random_gen.random(size = (N, 60))
    folds = np.array_split(np.arange(0, N, dtype= 'int'), 5)
    return attributes, timeseries, folds

## Experiment interface to easily run experiments

In [4]:
from sklearn.tree import DecisionTreeRegressor
from pathlib import Path



experiment = (
    ComparisonExperiment(result_path = Path('results/first_test'), nb_of_samples = 250)
    .add_methods(
        predictive_clustering =  PredClusGenerator(DecisionTreeRegressor(max_depth = 4, min_samples_leaf = 100)),
        deep_predictive_clustering = PredClusGenerator(DecisionTreeRegressor(max_depth = 10, min_samples_leaf = 100)),
        random_baseline = RandomGenerator(),
    )
    .add_datasets(
        random_dataset = random_dataset(1000),
        deep_random_dataset = random_dataset(10000)
    )
)

energy_scores, timing_df = experiment.execute()




Methods:   0%|          | 0/3 [00:00<?, ?it/s, method=predictive_clustering]
Datasets:   0%|          | 0/2 [00:00<?, ?it/s][A
Datasets:   0%|          | 0/2 [00:00<?, ?it/s, dataset=random_dataset][A
Datasets:   0%|          | 0/2 [00:00<?, ?it/s, dataset=deep_random_dataset][A
Methods:   0%|          | 0/3 [00:00<?, ?it/s, method=deep_predictive_clustering]
Datasets:   0%|          | 0/2 [00:00<?, ?it/s][A
Datasets:   0%|          | 0/2 [00:00<?, ?it/s, dataset=random_dataset][A
Datasets:   0%|          | 0/2 [00:00<?, ?it/s, dataset=deep_random_dataset][A
Methods:   0%|          | 0/3 [00:00<?, ?it/s, method=random_baseline]           
Datasets:   0%|          | 0/2 [00:00<?, ?it/s][A
Datasets:   0%|          | 0/2 [00:00<?, ?it/s, dataset=random_dataset][A

0it [00:00, ?it/s][A[A

1it [00:00,  2.90it/s][A[A

2it [00:00,  3.67it/s][A[A

3it [00:00,  4.12it/s][A[A

4it [00:00,  4.31it/s][A[A

5it [00:01,  4.09it/s][A[A

Datasets:  50%|█████     | 1/2 [00:01<00:01,

In [5]:
energy_scores.mean(axis = 0)

predictive_clustering       random_dataset         1.600256
                            deep_random_dataset    1.586180
deep_predictive_clustering  random_dataset         1.600099
                            deep_random_dataset    1.595053
random_baseline             random_dataset         1.587108
                            deep_random_dataset    1.583526
dtype: float64

In [6]:
timing_df

Unnamed: 0,Unnamed: 1,training_time,predict_time,eval_time
predictive_clustering,random_dataset,0.061019,0.050253,1.691875
predictive_clustering,deep_random_dataset,0.95483,0.212758,11.465797
deep_predictive_clustering,random_dataset,0.057783,0.035363,1.30346
deep_predictive_clustering,deep_random_dataset,1.554065,0.225117,11.162136
random_baseline,random_dataset,0.001014,0.0029,1.209708
random_baseline,deep_random_dataset,0.009206,0.016619,11.206723


## Use model directly
PredClusGenerator return indices into the training data as samples

In [7]:
model =  PredClusGenerator(DecisionTreeRegressor(max_depth = 4, min_samples_leaf = 100))
attributes, timeseries, _ = random_dataset(1000)
model.fit(attributes, timeseries)

test_attributes = np.random.random(size = (1, attributes.shape[1]))
indices = model.generate(test_attributes, nb_of_samples=100)
indices

array([[335, 393,  17, 665, 925, 600, 224, 444, 966, 733, 750, 195, 393,
        280, 109, 195, 976, 910, 491, 505, 109, 552, 220, 840,   1, 539,
        881, 334, 335, 718, 915, 881,  65, 730, 888, 600, 730, 970, 793,
        974, 749, 811, 840, 974, 749, 792, 750, 419, 515, 172, 981, 728,
        297, 280,  62,  17, 973, 379, 297, 619, 582, 280, 109,  56, 468,
        696, 720, 903, 718, 239, 868, 155, 900, 366, 444,  97, 492, 712,
        491, 220, 973, 994, 951, 999, 285, 814, 701, 109, 223, 588, 712,
        888, 235, 973, 365, 649, 968, 300, 974, 419]])

In [8]:
timeseries[indices]

array([[[0.78917159, 0.29641053, 0.88647886, ..., 0.31124221,
         0.49443381, 0.00291426],
        [0.37260943, 0.62400753, 0.00716524, ..., 0.45125638,
         0.82006294, 0.20655288],
        [0.29781456, 0.08027319, 0.69547696, ..., 0.49901803,
         0.99129198, 0.5225916 ],
        ...,
        [0.59350444, 0.99189365, 0.57016508, ..., 0.12343996,
         0.09424864, 0.57252782],
        [0.36438214, 0.32872846, 0.01873795, ..., 0.00198167,
         0.63073099, 0.66979514],
        [0.91221482, 0.31460003, 0.03629783, ..., 0.94277473,
         0.81787294, 0.76477122]]])

## Or use SampleGenerator

In [9]:
model =  SampleGenerator(PredClusGenerator(DecisionTreeRegressor(max_depth = 4, min_samples_leaf = 100)))
attributes, timeseries, _ = random_dataset(1000)
model.fit(attributes, timeseries)

test_attributes = np.random.random(size = (1, attributes.shape[1]))
timeseries = model.generate_timeseries(test_attributes, nb_of_samples=100)
timeseries

array([[[0.50949858, 0.03405682, 0.82178775, ..., 0.82794544,
         0.68002814, 0.1131569 ],
        [0.53663628, 0.63484605, 0.29854691, ..., 0.17171469,
         0.70286616, 0.39224721],
        [0.76924495, 0.82666538, 0.55089984, ..., 0.43513758,
         0.78075136, 0.18882439],
        ...,
        [0.33166547, 0.8048977 , 0.5573701 , ..., 0.97335676,
         0.26030587, 0.07771656],
        [0.63312946, 0.5382988 , 0.75668396, ..., 0.20077779,
         0.45015677, 0.77075608],
        [0.86713973, 0.96776444, 0.76527476, ..., 0.5031114 ,
         0.20024022, 0.9533432 ]]])