In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

import trimesh

scene = trimesh.Scene()

In [3]:
import my_code.diffusion_training_sign_corr.data_loading as data_loading
import my_code.datasets.shape_dataset as shape_dataset
import my_code.datasets.template_dataset as template_dataset


dataset_single = shape_dataset.SingleFaustDataset(
    phase='train',
    data_root = 'data_with_smpl_corr/FAUST_r',
    centering = 'bbox',
    num_evecs=128,
    lb_cache_dir=f'/home/s94zalek_hpc/shape_matching/data_with_smpl_corr/FAUST_r/diffusion',
    # lb_cache_dir=f'{tmp_dir}/FAUST_r/diffusion'
    return_evecs=False
)

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


# Cache to local ZIP

In [5]:
# create a zip file in /home/s94zalek_hpc/shape_matching/notebooks/31.07.2024 and open it
import zipfile

zip_file = zipfile.ZipFile('/home/s94zalek_hpc/shape_matching/notebooks/31.07.2024/FAUST_r.zip',
                           'w', compression=zipfile.ZIP_STORED)

In [8]:
import utils.geometry_util as geometry_util
import scipy.sparse

def get_operators(verts, faces, k=120, normals=None,
                  cache_zip=None, overwrite_cache=False):
    """
    See documentation for compute_operators().
    This essentailly just wraps a call to compute_operators, using a cache if possible.
    All arrays are always computed using double precision for stability,
    then truncated to single precision floats to store on disk,
    and finally returned as a tensor with dtype/device matching the `verts` input.
    """
    assert verts.dim() == 2, 'Please call get_all_operators() for a batch of vertices'
    device = verts.device
    dtype = verts.dtype
    verts_np = geometry_util.torch2np(verts)
    faces_np = geometry_util.torch2np(faces) if faces is not None else None

    if np.isnan(verts_np).any():
        raise ValueError('detect NaN vertices.')

    found = False
    if cache_zip:
        # assert osp.isdir(cache_dir), f'Invalid cache directory: {cache_zip}'
        hash_key_str = str(geometry_util.hash_arrays((verts_np, faces_np)))

        # Search through buckets with matching hashes.
        # When the loop exits,
        # this is the bucket index of the file we should write to.
        i_cache = 0
        while True:
            # From the name of the file to check
            search_path = (hash_key_str+'_'+str(i_cache)+'.npz')
            

            try:
                # npzfile = np.load(search_path, allow_pickle=True)
                
                # get the file from the zip
                npzfile = np.load(cache_zip.open(search_path), allow_pickle=True)
                
                cache_verts = npzfile['verts']
                cache_faces = npzfile['faces']
                cache_k = npzfile['k_eig'].item()

                # If the cache doesn't match, keep searching
                if (not np.array_equal(verts, cache_verts)) or (not np.array_equal(faces, cache_faces)):
                    i_cache += 1
                    print('collision detected')
                    continue

                # Delete previous file and overwrite it
                if overwrite_cache or cache_k < k:
                    # os.remove(search_path)
                    # remove the file from zip
                    cache_zip.remove(search_path)
                    break

                def read_sp_mat(prefix):
                    data = npzfile[prefix + '_data']
                    indices = npzfile[prefix + '_indices']
                    indptr = npzfile[prefix + '_indptr']
                    shape = npzfile[prefix + '_shape']
                    mat = scipy.sparse.csc_matrix((data, indices, indptr), shape=shape)
                    return mat

                # this entry matches. return it.
                frames = npzfile['frames']
                mass = npzfile['mass']
                L = read_sp_mat('L')
                evals = npzfile['evals'][:k]
                evecs = npzfile['evecs'][:, :k]
                gradX = read_sp_mat('gradX')
                gradY = read_sp_mat('gradY')

                frames = torch.from_numpy(frames).to(device=device, dtype=dtype)
                mass = torch.from_numpy(mass).to(device=device, dtype=dtype)
                L = geometry_util.sparse_np_to_torch(L).to(device=device, dtype=dtype)
                evals = torch.from_numpy(evals).to(device=device, dtype=dtype)
                evecs = torch.from_numpy(evecs).to(device=device, dtype=dtype)
                gradX = geometry_util.sparse_np_to_torch(gradX).to(device=device, dtype=dtype)
                gradY = geometry_util.sparse_np_to_torch(gradY).to(device=device, dtype=dtype)

                found = True
                break
            except KeyError:
                # not found, create a new file
                break

    if not found:
        # recompute
        frames, mass, L, evals, evecs, gradX, gradY = geometry_util.compute_operators(verts, faces, k, normals)

        dtype_np = np.float32

        # save
        if cache_zip:
            frames_np = geometry_util.torch2np(frames).astype(dtype_np)
            mass_np = geometry_util.torch2np(mass).astype(dtype_np)
            evals_np = geometry_util.torch2np(evals).astype(dtype_np)
            evecs_np = geometry_util.torch2np(evecs).astype(dtype_np)
            L_np = geometry_util.sparse_torch_to_np(L).astype(dtype_np)
            gradX_np = geometry_util.sparse_torch_to_np(gradX).astype(dtype_np)
            gradY_np = geometry_util.sparse_torch_to_np(gradY).astype(dtype_np)

            # save to zip
            np.savez(
                cache_zip.open(search_path, 'w'),
                verts=verts_np,
                faces=faces_np,
                k_eig=k,
                frames=frames_np,
                mass=mass_np,
                evals=evals_np,
                evecs=evecs_np,
                L_data=L_np.data,
                L_indices=L_np.indices,
                L_indptr=L_np.indptr,
                L_shape=L_np.shape,
                gradX_data=gradX_np.data,
                gradX_indices=gradX_np.indices,
                gradX_indptr=gradX_np.indptr,
                gradX_shape=gradX_np.shape,
                gradY_data=gradY_np.data,
                gradY_indices=gradY_np.indices,
                gradY_indptr=gradY_np.indptr,
                gradY_shape=gradY_np.shape,
            )

    return frames, mass, L, evals, evecs, gradX, gradY

In [11]:
from tqdm import tqdm

for _ in range(3):
    iterator = tqdm(range(len(dataset_single)))
    for i in iterator:
        data_i = dataset_single[i]
        verts = data_i['verts']
        faces = data_i['faces']
        frames, mass, L, evals, evecs, gradX, gradY = get_operators(verts, faces, k=128, cache_zip=zip_file)
        
    iterator.close()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [01:04<00:00,  1.24it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 11.32it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:06<00:00, 11.55it/s]


# Compare with reading from the disk

In [12]:
import my_code.diffusion_training_sign_corr.data_loading as data_loading
import my_code.datasets.shape_dataset as shape_dataset
import my_code.datasets.template_dataset as template_dataset


dataset_single_evecs = shape_dataset.SingleFaustDataset(
    phase='train',
    data_root = 'data_with_smpl_corr/FAUST_r',
    centering = 'bbox',
    num_evecs=128,
    lb_cache_dir=f'/home/s94zalek_hpc/shape_matching/data_with_smpl_corr/FAUST_r/diffusion',
    # lb_cache_dir=f'{tmp_dir}/FAUST_r/diffusion'
    return_evecs=True
)

In [13]:
from tqdm import tqdm

for _ in range(3):
    iterator = tqdm(range(len(dataset_single_evecs)))
    for i in iterator:
        data_i = dataset_single_evecs[i]
        verts = data_i['verts']
        faces = data_i['faces']
        # frames, mass, L, evals, evecs, gradX, gradY = get_operators(verts, faces, k=128, cache_zip=zip_file)
        
    iterator.close()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:11<00:00,  7.00it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 10.97it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:05<00:00, 13.34it/s]


# Cache to ZIP on Lustre

In [14]:
# create a zip file in /home/s94zalek_hpc/shape_matching/notebooks/31.07.2024 and open it
import zipfile

zip_file_lustre = zipfile.ZipFile('/lustre/mlnvme/data/s94zalek_hpc-shape_matching/FAUST_r.zip',
                           'w', compression=zipfile.ZIP_STORED)

In [15]:
from tqdm import tqdm

for _ in range(3):
    iterator = tqdm(range(len(dataset_single)))
    for i in iterator:
        data_i = dataset_single[i]
        verts = data_i['verts']
        faces = data_i['faces']
        frames, mass, L, evals, evecs, gradX, gradY = get_operators(
            verts, faces, k=128, cache_zip=zip_file_lustre)
        
    iterator.close()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [01:14<00:00,  1.08it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:06<00:00, 11.71it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:06<00:00, 11.71it/s]


In [16]:
# get size of /home/s94zalek_hpc/shape_matching/notebooks/31.07.2024/FAUST_r.zip
import os

zip_file_path = '/home/s94zalek_hpc/shape_matching/notebooks/31.07.2024/FAUST_r.zip'
zip_file_size = os.path.getsize(zip_file_path)

print(f'zip file size: {zip_file_size / 1024 / 1024} MB')

zip file size: 302.5829086303711 MB


In [18]:
# get directory size /home/s94zalek_hpc/shape_matching/data_with_smpl_corr/FAUST_r/diffusion

import shutil

dir_path = '/home/s94zalek_hpc/shape_matching/data_with_smpl_corr/FAUST_r/diffusion'
# dir_size = shutil.disk_usage(dir_path)
total, used, free = shutil.disk_usage(dir_path)

print(f'total: {total / 1024 / 1024 / 1024} GB')
print(f'used: {used / 1024 / 1024 / 1024} GB')
print(f'free: {free / 1024 / 1024 / 1024} GB')

total: 199552.078125 GB
used: 8851.46875 GB
free: 190700.609375 GB


# Subdivide directories

In [24]:
# create a zip file in /home/s94zalek_hpc/shape_matching/notebooks/31.07.2024 and open it
import zipfile

zip_test = zipfile.ZipFile('/home/s94zalek_hpc/shape_matching/notebooks/31.07.2024/test.zip',
                           'w', compression=zipfile.ZIP_STORED)

In [25]:
str_1 = 'A13KSJDNN2'
str_2 = 'A22KJSCJCW'
str_3 = 'B1ASCKJSCI'

for str_i in [str_1, str_2, str_3]:
    dir_1 = str_i[0]
    dir_2 = str_i[1]
    dir_3 = str_i[2]
    
    zip_test.writestr(f'{dir_1}/{dir_2}/{dir_3}/{str_i}.txt', str_i)

In [26]:
# close the zip file
zip_test.close()

In [33]:
# print the content of the zip file
with zipfile.ZipFile('/home/s94zalek_hpc/shape_matching/notebooks/31.07.2024/test.zip', 'r') as zip_test:
    # print(zip_test.iterdir())
    
    # zip_path = zipfile.Path(zip_test)
    # print(list(zip_path.iterdir()))

    # read the file str_1
    print(zip_test.read('A/1/3/A13KSJDNN2.txt'))

    # for file_name in zip_test.namelist():
    #     with zip_test.open(file_name) as file:
    #         print(file.read())

b'A13KSJDNN2'


# Merge 2 zip files

In [35]:
dataset_single_2 = shape_dataset.SingleFaustDataset(
    phase='train',
    data_root = 'data_with_smpl_corr/FAUST_original',
    centering = 'bbox',
    num_evecs=128,
    lb_cache_dir=f'/home/s94zalek_hpc/shape_matching/data_with_smpl_corr/FAUST_r/diffusion',
    # lb_cache_dir=f'{tmp_dir}/FAUST_r/diffusion'
    return_evecs=False
)

In [36]:
# create a zip file in /home/s94zalek_hpc/shape_matching/notebooks/31.07.2024 and open it
import zipfile

zip_file_2 = zipfile.ZipFile('/home/s94zalek_hpc/shape_matching/notebooks/31.07.2024/FAUST_orig.zip',
                           'w', compression=zipfile.ZIP_STORED)

In [37]:
from tqdm import tqdm

for _ in range(3):
    iterator = tqdm(range(len(dataset_single_2)))
    for i in iterator:
        data_i = dataset_single_2[i]
        verts = data_i['verts']
        faces = data_i['faces']
        frames, mass, L, evals, evecs, gradX, gradY = get_operators(
            verts, faces, k=128, cache_zip=zip_file_2)
        
    iterator.close()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [01:30<00:00,  1.13s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:10<00:00,  7.86it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:10<00:00,  7.63it/s]


In [None]:
# open this file /home/s94zalek_hpc/3D-CODED/data/datas_surreal_train.pth


In [3]:
import torch

surreal_shapes = torch.load(
    '/lustre/mlnvme/data/s94zalek_hpc-shape_matching/mmap_datas_surreal_train.pth',
    # '/home/s94zalek_hpc/3D-CODED/data/datas_surreal_train.pth',
    mmap=True)
# print('saving')
# torch.save(surreal_shapes, '/lustre/mlnvme/data/s94zalek_hpc-shape_matching/mmap_datas_surreal_train.pth')

In [2]:
# !ls /lustre/mlnvme/data/s94zalek_hpc-shape_matching

data_FAUST  datas_surreal_train.pth  FAUST_r.zip  mmap_datas_surreal_train.pth


In [1]:
from my_code.datasets.surreal_dataset_3dc import TemplateSurrealDataset3DC

# create the dataset
dataset = TemplateSurrealDataset3DC(
    shape_path=f'/home/s94zalek_hpc/3D-CODED/data/datas_surreal_train.pth',
    num_evecs=128,
    use_cuda=False,
    cache_lb_dir=None,
    return_evecs=True
)    

In [2]:
dataset[12]

{'first': {'id': tensor(-1),
  'verts': tensor([[ 0.0643,  0.5906,  0.1324],
          [ 0.0606,  0.5795,  0.1361],
          [ 0.0685,  0.5792,  0.1295],
          ...,
          [-0.0187,  0.5694,  0.0521],
          [-0.0190,  0.5701,  0.0534],
          [-0.0200,  0.5678,  0.0546]]),
  'faces': tensor([[   3,    0,    2],
          [   2,    0,    1],
          [   5,    4,    1],
          ...,
          [4805, 3511, 6309],
          [3511, 1330, 6309],
          [6309, 1330, 4687]]),
  'corr': tensor([   0,    1,    2,  ..., 6887, 6888, 6889]),
  'evecs': tensor([[-1.0000, -0.9561,  0.0091,  ...,  0.6545, -0.0469,  0.0680],
          [-1.0000, -0.9537,  0.0091,  ...,  0.0461, -0.0636,  0.0184],
          [-1.0000, -0.9533,  0.0091,  ...,  0.0358, -0.1624,  0.0464],
          ...,
          [-1.0000, -0.9409,  0.0086,  ...,  1.5209,  1.3726,  0.5910],
          [-1.0000, -0.9412,  0.0086,  ...,  1.5111,  1.3603,  0.5940],
          [-1.0000, -0.9410,  0.0086,  ...,  1.4704,  1.352

: 