In [1]:
import numpy as np
from t2f.extraction.extractor import feature_extraction
from t2f.utils.importance_old import feature_selection
from t2f.model.clustering import ClusterWrapper

# 10 multivariate time series with 100 timestamps and 3 signals each
arr = np.random.randn(10, 100, 3)
arr[5:] = arr[5:] * 100

labels = {}  # unsupervised mode
# labels = {0: 'a', 1: 'a', 5: 'b', 6: 'b'}  # semi-supervised mode
n_clusters = 2  # Number of clusters

transform_type = 'std'  # preprocessing step
model_type = 'KMeans'  # clustering model

# Feature extraction
df_feats = feature_extraction(arr, batch_size=100, p=1)

# Feature selection
context = {'model_type': model_type, 'transform_type': transform_type}
top_feats = feature_selection(df_feats, labels=labels, context=context)
df_feats = df_feats[top_feats]

# Clustering
model = ClusterWrapper(n_clusters=n_clusters, model_type=model_type, transform_type=transform_type)
y_pred = model.fit_predict(df_feats)
print(y_pred.shape)

Feature Extraction: 100%|███████████████████████| 30/30 [00:01<00:00, 23.33it/s]


(10,)


In [4]:
df_feats

Unnamed: 0,single__2__ar_coefficient__coeff_3__k_10,"single__1__agg_linear_trend__attr_""slope""__chunk_len_50__f_agg_""mean""",single__1__mean_n_absolute_max__number_of_maxima_7,single__2__lempel_ziv_complexity__bins_100,"single__0__fft_aggregated__aggtype_""variance""",single__0__autocorrelation__lag_0,"single__1__cwt_coefficients__coeff_8__w_20__widths_(2, 5, 10, 20)"
0,0.070021,-0.056648,2.357387,0.71,212.232075,1.0,-0.213576
1,-0.146963,-0.005951,2.078256,0.69,194.305797,1.0,-1.528318
2,0.039793,0.219127,2.220321,0.73,238.714274,1.0,-1.30461
3,0.062166,-0.005576,1.989285,0.72,189.865385,1.0,-0.830582
4,0.108353,-0.00701,2.117425,0.73,239.526741,1.0,0.495027
5,-0.047486,-27.231725,233.707032,0.73,228.592039,1.0,77.573277
6,-0.071047,-20.685505,220.766032,0.73,203.612743,1.0,-109.816596
7,-0.022374,16.281599,210.153648,0.72,222.193248,1.0,-106.919632
8,-0.033961,-9.918357,245.149817,0.78,210.651814,1.0,7.50247
9,-0.030439,18.778244,238.182731,0.73,207.609241,1.0,47.567132


## Proposed RL Formulation:

Objective is to select the n features that provide the best possible time-series clustering

- State: currently selected features
- Action: select a new feature
- Reward: let $m$ be a selected metric, reward is gain in that metric
$$r_t = m_{t+1} - m_t$$
- Episode ends when current state has n features

In [7]:
df_all_feats = feature_extraction(arr, batch_size=100, p=1)

Feature Extraction: 100%|███████████████████████| 30/30 [00:01<00:00, 22.96it/s]


In [8]:
df_all_feats

Unnamed: 0,single__0__variance_larger_than_standard_deviation,single__0__has_duplicate_max,single__0__has_duplicate_min,single__0__has_duplicate,single__0__sum_values,single__0__abs_energy,single__0__mean_abs_change,single__0__mean_change,single__0__mean_second_derivative_central,single__0__median,...,pair__euclidean__0__2,pair__minkowski__0__2,pair__braycurtis__1__2,pair__canberra__1__2,pair__chebyshev__1__2,pair__cityblock__1__2,pair__correlation__1__2,pair__cosine__1__2,pair__euclidean__1__2,pair__minkowski__1__2
0,1.0,0.0,0.0,0.0,5.287112,111.3751,1.129893,0.006999,0.015111,0.007505,...,13.794133,13.794133,1.123022,74.025354,4.735953,126.516417,1.141892,1.14132,16.01384,16.01384
1,1.0,0.0,0.0,0.0,2.068156,103.9971,1.253439,0.006673,-0.009878,0.033491,...,15.146962,15.146962,0.861979,67.474582,4.674118,106.893237,0.933346,0.932998,13.676409,13.676409
2,0.0,0.0,0.0,0.0,7.503391,86.30008,1.034175,0.013956,0.011032,0.040575,...,12.149357,12.149357,1.090455,73.174954,4.153944,112.189184,1.144307,1.159014,14.628981,14.628981
3,1.0,0.0,0.0,0.0,-8.966891,105.3236,1.199555,-0.035013,-0.004603,-0.105902,...,14.632499,14.632499,1.026952,72.55265,4.057403,113.484986,1.004748,1.005606,14.190062,14.190062
4,0.0,0.0,0.0,0.0,-14.906289,98.27242,1.139964,0.003293,0.000899,-0.06387,...,14.101204,14.101204,0.847787,66.761631,4.368102,98.32847,0.818581,0.856547,12.838124,12.838124
5,1.0,0.0,0.0,0.0,-576.356023,1068407.0,104.102263,-0.841255,-0.962745,-9.807734,...,1387.904896,1387.904896,1.165971,76.820549,395.811105,12078.702406,1.171225,1.178059,1530.681163,1530.681163
6,1.0,0.0,0.0,0.0,-1071.474327,924604.6,99.68474,-1.28528,-0.580773,-16.818979,...,1401.824443,1401.824443,0.873381,66.811104,401.916664,10067.210355,0.923999,0.920526,1277.177579,1277.177579
7,1.0,0.0,0.0,0.0,1157.564511,783801.6,90.229913,-0.230378,1.240627,6.903363,...,1379.817058,1379.817058,0.925196,70.789737,300.462223,10772.123029,0.925638,0.927301,1314.225935,1314.225935
8,1.0,0.0,0.0,0.0,-613.676264,1025893.0,124.630406,2.613572,-0.354406,-11.688875,...,1382.592814,1382.592814,0.973159,70.242658,429.83879,11811.313153,1.002994,1.003369,1470.915645,1470.915645
9,1.0,0.0,0.0,0.0,936.855888,970294.8,107.730888,-1.254408,1.065826,2.51328,...,1415.026027,1415.026027,1.24956,77.929981,421.719805,12611.965902,1.244068,1.249233,1604.27004,1604.27004


In [65]:
import random
from tsfresh.utilities.dataframe_functions import impute
import gym
from gym import spaces

from tslearn.clustering import silhouette_score

In [13]:
impute(df_all_feats)

Unnamed: 0,single__0__variance_larger_than_standard_deviation,single__0__has_duplicate_max,single__0__has_duplicate_min,single__0__has_duplicate,single__0__sum_values,single__0__abs_energy,single__0__mean_abs_change,single__0__mean_change,single__0__mean_second_derivative_central,single__0__median,...,pair__euclidean__0__2,pair__minkowski__0__2,pair__braycurtis__1__2,pair__canberra__1__2,pair__chebyshev__1__2,pair__cityblock__1__2,pair__correlation__1__2,pair__cosine__1__2,pair__euclidean__1__2,pair__minkowski__1__2
0,1.0,0.0,0.0,0.0,5.287112,111.3751,1.129893,0.006999,0.015111,0.007505,...,13.794133,13.794133,1.123022,74.025354,4.735953,126.516417,1.141892,1.14132,16.01384,16.01384
1,1.0,0.0,0.0,0.0,2.068156,103.9971,1.253439,0.006673,-0.009878,0.033491,...,15.146962,15.146962,0.861979,67.474582,4.674118,106.893237,0.933346,0.932998,13.676409,13.676409
2,0.0,0.0,0.0,0.0,7.503391,86.30008,1.034175,0.013956,0.011032,0.040575,...,12.149357,12.149357,1.090455,73.174954,4.153944,112.189184,1.144307,1.159014,14.628981,14.628981
3,1.0,0.0,0.0,0.0,-8.966891,105.3236,1.199555,-0.035013,-0.004603,-0.105902,...,14.632499,14.632499,1.026952,72.55265,4.057403,113.484986,1.004748,1.005606,14.190062,14.190062
4,0.0,0.0,0.0,0.0,-14.906289,98.27242,1.139964,0.003293,0.000899,-0.06387,...,14.101204,14.101204,0.847787,66.761631,4.368102,98.32847,0.818581,0.856547,12.838124,12.838124
5,1.0,0.0,0.0,0.0,-576.356023,1068407.0,104.102263,-0.841255,-0.962745,-9.807734,...,1387.904896,1387.904896,1.165971,76.820549,395.811105,12078.702406,1.171225,1.178059,1530.681163,1530.681163
6,1.0,0.0,0.0,0.0,-1071.474327,924604.6,99.68474,-1.28528,-0.580773,-16.818979,...,1401.824443,1401.824443,0.873381,66.811104,401.916664,10067.210355,0.923999,0.920526,1277.177579,1277.177579
7,1.0,0.0,0.0,0.0,1157.564511,783801.6,90.229913,-0.230378,1.240627,6.903363,...,1379.817058,1379.817058,0.925196,70.789737,300.462223,10772.123029,0.925638,0.927301,1314.225935,1314.225935
8,1.0,0.0,0.0,0.0,-613.676264,1025893.0,124.630406,2.613572,-0.354406,-11.688875,...,1382.592814,1382.592814,0.973159,70.242658,429.83879,11811.313153,1.002994,1.003369,1470.915645,1470.915645
9,1.0,0.0,0.0,0.0,936.855888,970294.8,107.730888,-1.254408,1.065826,2.51328,...,1415.026027,1415.026027,1.24956,77.929981,421.719805,12611.965902,1.244068,1.249233,1604.27004,1604.27004


In [115]:
class EpsilonGreedy:
    """Epsilon Greedy Exploration Strategy."""

    def __init__(self, initial_epsilon=1.0, min_epsilon=0.0, decay=0.99):
        """Initialize Epsilon Greedy Exploration Strategy."""
        self.initial_epsilon = initial_epsilon
        self.epsilon = initial_epsilon
        self.min_epsilon = min_epsilon
        self.decay = decay

    def choose(self, q_table, state, action_space, legal_actions):
        """Choose action based on epsilon greedy strategy."""
        if np.random.rand() < self.epsilon:
            action = random.choice(legal_actions)
        else:
            legal_actions_index = np.argmax([q_table[state][legal] for legal in legal_actions])
            action = legal_actions[legal_actions_index]

        self.epsilon = max(self.epsilon * self.decay, self.min_epsilon)
        # print(self.epsilon)
        return action

    def reset(self):
        """Reset epsilon to initial value."""
        self.epsilon = self.initial_epsilon


class QLAgent:
    """Q-learning Agent class."""

    def __init__(self, starting_state, state_space, action_space, alpha=0.5, gamma=0.95, exploration_strategy=EpsilonGreedy()):
        """Initialize Q-learning agent."""
        self.state = starting_state
        self.state_space = state_space
        self.action_space = action_space
        self.action = None
        self.alpha = alpha
        self.gamma = gamma
        self.q_table = {self.state: [0 for _ in range(action_space.n)]}
        self.exploration = exploration_strategy
        self.acc_reward = 0

    def act(self, legal_actions):
        """Choose action based on Q-table."""
        self.action = self.exploration.choose(self.q_table, self.state, self.action_space, legal_actions)
        return self.action

    def learn(self, next_state, reward, done=False):
        """Update Q-table with new experience."""
        if next_state not in self.q_table:
            self.q_table[next_state] = [0 for _ in range(self.action_space.n)]

        s = self.state
        s1 = next_state
        a = self.action
        self.q_table[s][a] = self.q_table[s][a] + self.alpha * (
            reward + self.gamma * max(self.q_table[s1]) - self.q_table[s][a]
        )
        self.state = s1
        self.acc_reward += reward

In [161]:
class FeatureSelectionEnvironment(gym.Env):

    def __init__(self, df_features, n_features, arr, clustering_model) -> None:
        self.observation_space = spaces.Box(0, 1, shape=(len(df_features.columns),), dtype=np.float32)
        self.action_space = spaces.Discrete(len(df_features.columns))
        
        self.all_features = df_features.copy()
        self.current_state = np.zeros(len(self.all_features.columns))
        self.n_features = n_features
        self.clustering_model = clustering_model
        
        self.past_reward = 0

    
    def _get_obs(self) -> np.array:
        return self.current_state

    def _get_info(self) -> dict:
        return {
            'legal_actions': [action for action in range(self.action_space.n) if not self.current_state[action]]
        }
    
    def reset(self, seed=None, options=None):
        self.current_state = np.zeros(len(self.all_features.columns))
        observation = self._get_obs()
        info = self._get_info()
        return observation, info
    
    def _get_reward(self, action):
        selected_features = [feature for i, feature in enumerate(self.all_features.columns) if env.current_state[i]]
        y_pred = self.clustering_model.fit_predict(self.all_features[selected_features])
        
        try:
            score = silhouette_score(arr, y_pred)
        except ValueError:
            # happens when all labels have same value, thus no real "clustering" has occurred
            score = -1
        
        gain = score - self.past_reward
        self.past_reward = score
        
        return gain
    
    def step(self, action):
        self.current_state[action] = 1
        reward = self._get_reward(action)
        observation = self._get_obs()
        info = self._get_info()
        
        if len(self.current_state[self.current_state == 1]) < self.n_features:
            terminated = False
        else:
            terminated = True

        return observation, reward, terminated, False, info
    
    def render(self):
        pass

    def _render_frame(self):
        pass
    
    def close(self):
        pass


In [189]:
episodes = 2
n_features = 3

env = FeatureSelectionEnvironment(
    df_features=df_all_feats,
    n_features=n_features,
    arr=arr,
    clustering_model=model,
)

obs, info = env.reset()
done = False

agent = QLAgent(starting_state=tuple(obs), state_space=env.observation_space, action_space=env.action_space)

for episode in range(episodes):
    print(f'Episode {episode}')
    while not done:
        action = agent.act(info['legal_actions'])
        print('Action:', action)
        next_obs, reward, terminated, truncated, info = env.step(action)
        print(reward)
        agent.learn(tuple(next_obs), reward, done)
        done = terminated or truncated
    obs, info = env.reset()
    done = False

Episode 0
Action: 0
-0.12502438094105928
Action: 1
0.0
Action: 2
0.0
Episode 1
Action: 1779
0.4507225871642881
Action: 0
-0.4507225871642881
Action: 1
0.0


In [None]:
# Focus on feature selection, with unsupervised metrics
# Let's start with hierarchical clustering