In [1]:
import numpy as np
import torch

from tqdm.auto import tqdm
from pathlib import Path

import itertools

import os
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv(), verbose=True)

True

In [2]:
from sklearn.cluster import KMeans, AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

In [3]:
TEMP_DIR = Path('/data/huze/ray_results/algonauts2021/tmp/notebook-010/')

In [4]:
backbone_names = np.load(TEMP_DIR.joinpath(Path(f'all-loaded_backbone_names.npy')))

In [5]:
backbone_names

array(['2d_densnet_warp_3d', '3d_swin', '2d_bdcnvgg_warp_3d', 'i3d_flow',
       '2d_moby_swin_warp_3d', 'i3d_rgb', '2d_seg_swin_warp_3d',
       'audio_vggish', '2d_pyconvsegnet_warp_3d', '2d_simclr_warp_3d',
       '2d_colorizer_warp_3d'], dtype='<U23')

In [8]:
k = 100

In [11]:
km_save_dir = Path('./tmp/notebook011/')
km_save_dir.mkdir(parents=True, exist_ok=True)

In [12]:
loaded_ve_shapes = []

In [13]:
for backbone_name in tqdm(backbone_names):
    print('running...', backbone_name)
    voxel_embeddings = np.load(TEMP_DIR.joinpath(Path(f'{backbone_name}-voxel_embeddings.npy')))
    loaded_ve_shapes.append(voxel_embeddings.shape)
    # voxel_scores = np.load(TEMP_DIR.joinpath(Path(f'{backbone_name}-voxel_scores.npy')))
    # predictions = np.load(TEMP_DIR.joinpath(Path(f'{backbone_name}-predictions.npy')))
    
    
    km = KMeans(
        n_clusters=k,
        init='random',
        n_init=10,
        max_iter=300,
        tol=0.0001,
        verbose=0,
        copy_x=False,
        algorithm='elkan',
    )

    km.fit(voxel_embeddings)
    
    torch.save(km, km_save_dir.joinpath(Path(f'{backbone_name}-kmeans.pt')))
    
    del voxel_embeddings

  0%|          | 0/11 [00:00<?, ?it/s]

running... 2d_densnet_warp_3d
running... 3d_swin
running... 2d_bdcnvgg_warp_3d
running... i3d_flow
running... 2d_moby_swin_warp_3d
running... i3d_rgb
running... 2d_seg_swin_warp_3d
running... audio_vggish
running... 2d_pyconvsegnet_warp_3d
running... 2d_simclr_warp_3d
running... 2d_colorizer_warp_3d


In [32]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:           503G         13G        200G        1.8M        289G        486G
Swap:          7.6G        816M        6.8G


In [30]:
# all

In [17]:
loaded_ve_shapes = np.asarray(loaded_ve_shapes)

In [27]:
full_shape = (loaded_ve_shapes[0, 0], loaded_ve_shapes[:, 1].sum())

In [28]:
full_shape

(161326, 391168)

In [29]:
full_voxel_embedding = np.zeros(shape=full_shape, dtype=np.float32)

In [31]:
ws = np.load(TEMP_DIR.joinpath(Path(f'all-backbone_ensemble_ws.npy')))

In [33]:
start = 0
for i, backbone_name in enumerate(tqdm(backbone_names)):
    print('loading...', backbone_name)
    voxel_embeddings = np.load(TEMP_DIR.joinpath(Path(f'{backbone_name}-voxel_embeddings.npy')))
    voxel_embeddings *= ws[i]
    
    end = start + voxel_embeddings.shape[1]
    full_voxel_embedding[:, start:end] = voxel_embeddings
    
    start = end

  0%|          | 0/11 [00:00<?, ?it/s]

loading... 2d_densnet_warp_3d
loading... 3d_swin
loading... 2d_bdcnvgg_warp_3d
loading... i3d_flow
loading... 2d_moby_swin_warp_3d
loading... i3d_rgb
loading... 2d_seg_swin_warp_3d
loading... audio_vggish
loading... 2d_pyconvsegnet_warp_3d
loading... 2d_simclr_warp_3d
loading... 2d_colorizer_warp_3d


In [44]:
del voxel_embeddings

NameError: name 'voxel_embeddings' is not defined

In [43]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:           503G        265G        8.3G        1.7M        229G        234G
Swap:          7.6G        847M        6.8G


In [None]:
# k-means initilization use double memory

In [None]:
km = KMeans(
    n_clusters=k,
    init='random',
    n_init=10,
    max_iter=300,
    tol=0.0001,
    verbose=1,
    copy_x=False,
    algorithm='elkan',
)

km.fit(full_voxel_embedding)

torch.save(km, km_save_dir.joinpath(Path(f'all-kmeans.pt')))

Initialization complete
Iteration 0, inertia 3171.67431640625
Iteration 1, inertia 1600.1148681640625
Iteration 2, inertia 1598.8662109375
Iteration 3, inertia 1598.6185302734375
Iteration 4, inertia 1598.5238037109375
Iteration 5, inertia 1598.469482421875
Iteration 6, inertia 1598.43603515625
Iteration 7, inertia 1598.4169921875
Iteration 8, inertia 1598.4044189453125
Iteration 9, inertia 1598.3974609375
Iteration 10, inertia 1598.39306640625
Iteration 11, inertia 1598.39013671875
Iteration 12, inertia 1598.387939453125
Iteration 13, inertia 1598.386474609375
Iteration 14, inertia 1598.385009765625
Iteration 15, inertia 1598.384033203125
Iteration 16, inertia 1598.3834228515625
Iteration 17, inertia 1598.3828125
Iteration 18, inertia 1598.382568359375
Iteration 19, inertia 1598.38232421875
Iteration 20, inertia 1598.3819580078125
Iteration 21, inertia 1598.381591796875
Iteration 22, inertia 1598.3817138671875
Iteration 23, inertia 1598.38134765625
Iteration 24, inertia 1598.381225585

In [None]:
km