# Overview

In [None]:
from typing import List
from dataclasses import dataclass

import numpy as np

from distance import euclidean

In [None]:
t1 = np.array([1, 1, 1, 2], dtype=np.float32)
t2 = np.array([1, 2, 1, 2], dtype=np.float32)
t3 = np.array([1, 4, 1, 2], dtype=np.float32)
t4 = np.array([1, 2, 1, 8], dtype=np.float32)
t5 = np.array([1, 4, 1, 6], dtype=np.float32)

t_input = np.array([t1, t2, t3, t4, t5])
t_input.shape

(5, 4)

- **Input**: a list of time series data

# Step 1 - Computing Pairwise Distance

Compute pair-wise distance of all the time series data

In [None]:
def get_distances(ts: np.ndarray) -> np.ndarray:
    t_count, _ = ts.shape
    output = np.zeros((t_count, t_count), dtype=np.float32)
    
    for y in range(t_count):
        for x in range(t_count):
            output[y][x] = euclidean(ts[y], ts[x]) 

    return output


get_distances(t_input)

array([[0.       , 1.       , 3.       , 6.0827627, 5.       ],
       [1.       , 0.       , 2.       , 6.       , 4.472136 ],
       [3.       , 2.       , 0.       , 6.3245554, 4.       ],
       [6.0827627, 6.       , 6.3245554, 0.       , 2.828427 ],
       [5.       , 4.472136 , 4.       , 2.828427 , 0.       ]],
      dtype=float32)

# Step 2 - Clustering

1. Every time series data starts in a cluster by itself
2. Merge clusters with the smallest distance
3. Re-evaluate distances
4. Repeat process

## Linkage

In each step, we merge a pair of clusters with the smallest distance. Linkage determines how this distance is calculated

- **Complete linkage (farthest neighbor)**: the **distance between two clusters** is the **maximum pairwise distance** between elements in the two clusters
  - $\max\left\{ d\left( a, b \right): a \in A, b \in B \right\}$
- **Single linkage (nearest neighbor)**: the **distance between two clusters** is the **minimum pairwise distance** between elements in the two clusters
  - $\min\left\{ d\left( a, b \right): a \in A, b \in B \right\}$

In [None]:
@dataclass(frozen=True)
class Cluster:
    data: np.ndarray
    left: "Cluster"
    right: "Cluster"

@dataclass(frozen=True)
class Candidate:
    cluster1: List[np.ndarray] 
    cluster2: List[np.ndarray]
    distance: float

    def merge(self) -> List[np.ndarray]:
        return self.cluster1 + self.cluster2


SyntaxError: expected ':' (1299740732.py, line 8)

In [None]:
def cluster(ts: np.ndarray, clusters_count: int) -> list:
    distances = get_distances(ts)
    clusters = [t for t in ts]  # type: List[np.ndarray]

    def create_candidate(cluster1: int, cluster2: int) -> Candidate:
        return Candidate(clusters[cluster1], cluster2[cluster2], 0.0) 

    while len(clusters) != clusters_count:
        candidates = [create_candidate(cluster1, cluster2)
                      for cluster1 in range(len(clusters))
                      for cluster2 in range(len(clusters)) if cluster1 != cluster2]

        candidate = min(candidates, key=lambda c: c.distance)
        
        for c in candidate.cluster1:
            clusters.remove(c)

        for c in candidate.cluster2:
            clusters.remove(c)

        clusters = clusters + candidate.merge()

    return clusters 