# Examples on how to use the code for Agglomerative clustering

In [None]:
from src.agglomerative_dtw_clustering import AgglomerativeTSClustering
from src.continuous_series import Cols
from src.stats import DailyTimeseries
from src.reshape_resampled_data_into_timeseries import ReshapeResampledDataIntoTimeseries
from src.configurations import Hourly, Configuration, GeneralisedCols
from src.read_preprocessed_df import ReadPreprocessedDataFrame

## Single variate clustering

multivariate not yet implemented

### Read the data

Examples for different configurations are given in the comments

In [11]:
# data to read
zip_id = ''  # provide a string of the zip_id you want to cluster

# resampling rules, alternative src.stats.WeeklyTimeseries
daily_ts = DailyTimeseries()  # clusters 'daily' time series that have 24 datapoints with at least a reading every 60min

# y label
y_sub_label = Cols.Mean

# read hourly resampled data for zip id
raw_df = ReadPreprocessedDataFrame(sampling=Hourly(), zip_id=zip_id).df

# Which variates to plot. Here mean iob, mean cob, mean bg. Alternatives are std, min, max ...
variates = Configuration.resampled_mean_columns()
cluster_by = [GeneralisedCols.mean_iob.value]  # which variate to cluster by

# class to translate the resampled data into x train and provides other convenient functions
translate = ReshapeResampledDataIntoTimeseries(raw_df, daily_ts, variates)

### Cluster the data

In [None]:
# create 1d and 3d numpy array for x_train of shape (n_ts, ts_length, dimensions)
x_train = translate.to_x_train(cluster_by)  # ts for variate to cluster by
x_full = translate.to_x_train()  # ts for all variates

# cluster data
ac = AgglomerativeTSClustering(x_train=x_train, x_train_column_names=['IOB'], timeseries_description=daily_ts,
                               x_full=x_full, x_full_column_names=["IOB", "COB", "BG"])

### Different visualisations of outcome

In [None]:
ac.no_clusters  # number of resulting clusters

In [None]:
# resulting clusters with barycenter
only_display_clusters_with_multiple_ts = True  # there might be too many single ts clusters if this is set to false
ac.plot_clusters_in_grid(y_sub_label, only_display_multiple_ts_clusters=only_display_clusters_with_multiple_ts)

In [None]:
# dendrogram
ac.plot_dendrogram(p=5, no_labels=True)

In [None]:
# silhouette analysis for non singleton clusters
ac.plot_silhouette_analysis()

### Change Distance Metric
By default, AgglomerativeTSClustering uses DTW as distance measure. You can provide additional parameters to specify a Sakoe Chiba, see dtw metrics from tslearn which is used to calculate the distance matrix.

In [None]:
constraint = "sakoe_chiba"
band_size = 2  # allows for max 2h of time warping
metric = "dtw"
ac_sk = AgglomerativeTSClustering(x_train=x_train, x_train_column_names=['IOB'], timeseries_description=daily_ts,
                                  x_full=x_full, x_full_column_names=["IOB", "COB", "BG"],
                                  distance_constraint=constraint, sakoe_chiba_radius=band_size)

In [None]:
ac_sk.plot_dendrogram(p=5, no_labels=True)

In [None]:
ac_sk.plot_silhouette_analysis()