In [1]:
import numpy as np

In [2]:
from aeon.datasets import load_italy_power_demand
X, y = load_italy_power_demand(return_type='numpy2d')

In [3]:
from splitter import get_parameter_value, get_candidate_splitter
distances = get_parameter_value(X)
distances

[{'euclidean': {}},
 {'dtw': {'window': 0.064}},
 {'ddtw': {'window': 0.181}},
 {'wdtw': {'g': 0.282}},
 {'wddtw': {'g': 0.908}},
 {'erp': {'g': 0.616}},
 {'lcss': {'epsilon': 0.644, 'window': 0.208}},
 {'twe': {'lmbda': 3, 'nu': 0.001}},
 {'msm': {'c': 0.01}}]

In [4]:
splitter = get_candidate_splitter(X,y,distances)
splitter

[[{'1': array([-0.96811603, -1.3994791 , -1.4583013 , -1.5955532 , -1.5955532 ,
          -1.3994791 , -1.1641901 , -0.22303434,  0.58086961,  1.0122327 ,
           1.2083068 ,  1.0514475 ,  0.87498082,  0.5612622 ,  0.48283254,
           0.62008443,  0.58086961,  0.93380306,  1.0514475 ,  0.97301789,
           0.50243996,  0.12989911, -0.18381951, -0.57596778])},
  {'2': array([-0.80377593, -1.1948021 , -1.4033493 , -1.4815546 , -1.4554861 ,
          -1.3251441 , -1.5597598 , -0.77770752,  0.57784972,  1.3077652 ,
           1.359902  ,  1.4381072 ,  0.94280745,  0.65605495,  0.78639699,
           0.78639699,  0.8124654 ,  0.55178132,  0.36930246,  0.1868236 ,
           0.03041314,  0.212892  ,  0.26502882, -0.28240776])}],
 {'erp': {'g': 0.616}}]

In [5]:
X[0]

array([-0.71051757, -1.1833204 , -1.3724416 , -1.5930829 , -1.4670021 ,
       -1.3724416 , -1.0887599 ,  0.04596695,  0.92853223,  1.0861332 ,
        1.2752543 ,  0.96005242,  0.61333034,  0.01444676, -0.6474772 ,
       -0.26923494, -0.20619456,  0.61333034,  1.3698149 ,  1.4643754 ,
        1.054613  ,  0.58181015,  0.1720477 , -0.26923494])

In [6]:
type(y[0])

numpy.str_

In [7]:
np.unique(y)

array(['1', '2'], dtype='<U1')

In [8]:
X.shape

(1096, 24)

In [9]:
y

array(['1', '1', '2', ..., '2', '2', '2'], dtype='<U1')

In [10]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_labels = label_encoder.fit_transform(y)
y_labels

array([0, 0, 1, ..., 1, 1, 1], dtype=int64)

In [11]:
np.unique(y_labels)

array([0, 1], dtype=int64)

In [12]:
new_y = label_encoder.inverse_transform(y_labels)
print(new_y==y)

[ True  True  True ...  True  True  True]


In [13]:
X_sample = X[:3]
y_sample = y_labels[:3].reshape(-1,1)
X_combined = np.concatenate([X_sample, y_sample], axis=1)
X_combined.shape

(3, 25)

In [14]:
# Cobining the data with it's labels
X_combined = np.concatenate([X,y_labels.reshape(-1,1)], axis=1)
X_combined.shape

(1096, 25)

In [15]:
y_labels.shape

(1096,)

In [16]:
for label in np.unique(y_labels):
    print(label)

0
1


In [19]:
exemplars = []
for label in np.unique(y):
    y_new = y[y==label]
    X_new = X[y==label]
    n = np.random.randint(0,X_new.shape[0])
    exemplars.append({y_new[n]: X_new[n,:]})
    print(X_new.shape)
exemplars

IndexError: boolean index did not match indexed array along dimension 0; dimension is 0 but corresponding boolean dimension is 1096

In [None]:
y[:10]

In [None]:
import numpy as np

# Example data (ensure X and y have matching first dimensions)
X = np.random.rand(1096, 24)
y = np.random.choice(['label1', 'label2', 'label3'], 1096)

# Check dimensions
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Apply masking based on unique labels in y
for label in np.unique(y):
    X_new = X[y == label]
    print(f"Shape for label '{label}': {X_new.shape}")


In [3]:
import timeit

In [3]:
def get_parameter_value(X=None):
    """For a list of distance measures, generate a dictionary 
    of parameterized distances.
    
    Parameters
    ----------
    X : np.ndarray of shape (n_cases, n_timepoints)
        
    Returns
    -------
    distance_param : a list of distances and their 
        parameters.
    """
    X_std = X.std()
    param_ranges = {
        "euclidean": {},
        "dtw": {"window": (0,0.25)},
        "ddtw": {"window": (0,0.25)},
        "wdtw": {"g": (0,1)},
        "wddtw": {"g": (0,1)},
        "erp": {"g": (X_std/5,X_std)},
        "lcss": {"epsilon": (X_std/5,X_std),
                 "window": (0,0.25)}
    }
    random_params = []
    for measure, ranges in param_ranges.items():
        random_params.append({measure : {param: np.round(np.random.uniform(low,high),3)
                                  for param, (low,high) in ranges.items()}})
        
    # For TWE
    lmbda = np.random.randint(0,9)
    exponent_range = np.arange(1,6)  # Exponents from -5 to 1 (inclusive)
    random_exponent = np.random.choice(exponent_range)
    nu = 1/10**random_exponent
    random_params.append({ "twe" : {"lmbda": lmbda,
                            "nu": nu}})
    
    # For MSM
    base = 10
    # Exponents from -2 to 2 (inclusive)
    exponents = np.arange(-2, 3, dtype=np.float64)

    # Randomly select an index from the exponent range
    random_index = np.random.randint(0, len(exponents))
    c = base ** exponents[random_index]
    random_params.append({"msm" : {"c": c}})
    
    return random_params


def get_candidate_splitter(X, y, paramterized_distances):
    """Generate candidate splitter.
    
    Takes a time series dataset and a set of parameterized 
    distance measures to create a candidate splitter, which 
    contains a parameterized distance measure and a set of exemplars.
    
    Parameters
    ----------
    X : np.ndarray shape (n_cases, n_timepoints)
        The training input samples.
    y : np.array shape (n_cases,) or (n_cases,1)
    parameterized_distances : list
        Contains the distances and their parameters.

    Returns
    -------
    splitter : list of two dictionaries
        A distance and its parameter values and a set of exemplars.
    """
    _X = X
    _y = y
    #label_encoder = LabelEncoder()
    #_y_label = label_encoder.fit_transform(_y)
    #if _y_label.ndim == 1:
    #    _y_label = _y_label.reshape(-1,1)
    #_X_y = np.concatenate([_X,_y_label], axis=1)

    # Now, I need to create a dictionary
    # where the keys will be unique classes and values will be a random
    # data of that class
    exemplars = []
    for label in np.unique(_y):
        y_new = _y[_y==label]
        X_new = _X[_y==label]
        id = np.random.randint(0, X_new.shape[0])
        exemplars.append({y_new[id] : X_new[id,:]})

    # Create a list with first element exemplars and second element a random 
    # parameterized distance measure
    n = np.random.randint(0,9)
    splitter = [exemplars, paramterized_distances[n]]

    
    return splitter

In [4]:
import time

start = time.time()
distance = get_parameter_value(X)
end = time.time()
print("Elapsed (with compilation) = {}s".format((end - start)))

Elapsed (with compilation) = 0.004015445709228516s


In [5]:
start = time.time()
distance = get_parameter_value(X)
end = time.time()
print("Elapsed (after compilation) = {}s".format((end - start)))

Elapsed (after compilation) = 0.0009999275207519531s


In [6]:
start = time.time()
splitter = get_candidate_splitter(X,y,distance)
end = time.time()
print("Elapsed (with compilation) = {}s".format((end - start)))

Elapsed (with compilation) = 0.0009999275207519531s


In [7]:
start = time.time()
splitter = get_candidate_splitter(X,y,distance)
end = time.time()
print("Elapsed (after compilation) = {}s".format((end - start)))

Elapsed (after compilation) = 0.0013093948364257812s


In [8]:
from splitter import get_parameter_value, get_candidate_splitter

start = time.time()
distance = get_parameter_value(X)
end = time.time()
print("Elapsed (with compilation) = {}s".format((end - start)))

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1mInvalid use of getiter with parameters (Tuple(Tuple(unicode_type, DictType[undefined,undefined]<iv={}>), Tuple(unicode_type, DictType[unicode_type,Tuple(int64, float64)]<iv=None>), Tuple(unicode_type, DictType[unicode_type,Tuple(int64, float64)]<iv=None>), Tuple(unicode_type, DictType[unicode_type,UniTuple(int64 x 2)]<iv=None>), Tuple(unicode_type, DictType[unicode_type,UniTuple(int64 x 2)]<iv=None>), Tuple(unicode_type, DictType[unicode_type,UniTuple(float64 x 2)]<iv=None>), Tuple(unicode_type, DictType[unicode_type,UniTuple(float64 x 2)]<iv=None>)))
[0m
[0m[1mDuring: typing of intrinsic-call at D:\Python\Project\Aeon\PF_Python\Code\splitter.py (33)[0m
[1m
File "splitter.py", line 33:[0m
[1mdef get_parameter_value(X=None):
    <source elided>
    random_params = []
[1m    for measure, ranges in param_ranges.items():
[0m    [1m^[0m[0m
