In [1]:
from tools import OUT_PATH, open_meta_df
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from tqdm.auto import tqdm
from pathlib import Path
from functools import reduce
from operator import mul
import numpy_indexed as npi

In [2]:
meta_df = open_meta_df()
video_id = np.load(OUT_PATH / 'video_id.npy')
landmarks = np.load(OUT_PATH / 'landmarks.npy')
LEN_CLUSTERS = 10

In [3]:
new_landmarks_shape = (landmarks.shape[0], reduce(mul, landmarks.shape[1:]))
new_landmarks_shape

(147205, 99)

In [4]:
videos_centroids = np.zeros((len(meta_df.index), LEN_CLUSTERS, new_landmarks_shape[1]))
videos_centroids.shape

(1058, 10, 99)

In [5]:
reshaped_landmarks = landmarks.reshape(new_landmarks_shape)
reshaped_landmarks.shape

(147205, 99)

In [7]:
pose_clusters = np.zeros(reshaped_landmarks.shape[0])

for i in tqdm(meta_df.index):
    data_i = reshaped_landmarks[video_id == i]
    kmeans = KMeans(n_clusters=LEN_CLUSTERS, random_state=42, n_init='auto')
    prediction = kmeans.fit_predict(data_i)
    pose_clusters[video_id == i] = prediction
    frame_index = np.arange(data_i.shape[0])
    centroids = kmeans.cluster_centers_
    median_frame_per_centroid = npi.group_by(prediction).median(frame_index)[1]
    videos_centroids[i] = centroids[median_frame_per_centroid.argsort()]

  0%|          | 0/1058 [00:00<?, ?it/s]

In [8]:
pose_clusters[:10]

array([0., 4., 4., 4., 4., 4., 4., 2., 2., 2.])

In [9]:
videos_centroids[videos_centroids == 0].any()

False

In [10]:
videos_centroids[:5,:5,:5]

array([[[ 0.52630234,  0.37640212, -0.74534531,  0.53798417,
          0.34380546],
        [ 0.52669251,  0.37396876, -0.61175384,  0.53762975,
          0.34276031],
        [ 0.52394984,  0.37040039, -0.54449829,  0.53543784,
          0.33973884],
        [ 0.52109573,  0.37270567, -0.62886719,  0.53314732,
          0.34281625],
        [ 0.51893888,  0.38486941, -0.58462744,  0.53153155,
          0.34882127]],

       [[ 0.5213431 ,  0.37912015, -0.61652294,  0.53269162,
          0.34605468],
        [ 0.52081872,  0.37991759, -0.67531037,  0.53272256,
          0.34639908],
        [ 0.5176312 ,  0.36193799, -0.39678858,  0.52953316,
          0.33261918],
        [ 0.51749281,  0.3634546 , -0.50517845,  0.52935344,
          0.33418927],
        [ 0.51814898,  0.36386488, -0.64655181,  0.52996747,
          0.33534366]],

       [[ 0.51914605,  0.38226161, -0.7385072 ,  0.53054488,
          0.34744609],
        [ 0.5180881 ,  0.37170506, -0.66115797,  0.53029671,
          0

In [11]:
np.save(OUT_PATH / 'videos_centroids', videos_centroids)
np.save(OUT_PATH / 'pose_clusters', pose_clusters)