## Experiments to import Datasets from the DeepLagrangianFluids Repository
https://github.com/isl-org/DeepLagrangianFluids/blob/master/datasets/dataset_reader_physics.py

In [2]:
# %pip install tensorpack zstandard msgpack msgpack_numpy

In [3]:
import os
import sys
import numpy as np
from glob import glob
import tensorpack.dataflow as dataflow
import numpy as np
import zstandard as zstd
import msgpack
import msgpack_numpy
msgpack_numpy.patch()

Failed to import tensorflow.


In [4]:
# Source: https://github.com/isl-org/DeepLagrangianFluids/blob/d651c6fdf2aca3fac9abe3693b20981b191b4769/datasets/dataset_reader_physics.py

class PhysicsSimDataFlow(dataflow.RNGDataFlow):
    """Data flow for msgpacks generated from SplishSplash simulations.
    """

    def __init__(self, files, random_rotation=False, shuffle=False, window=2):
        if not len(files):
            raise Exception("List of files must not be empty")
        if window < 1:
            raise Exception("window must be >=1 but is {}".format(window))
        self.files = files
        self.random_rotation = random_rotation
        self.shuffle = shuffle
        self.window = window

    def __iter__(self):
        decompressor = zstd.ZstdDecompressor()
        files_idxs = np.arange(len(self.files))
        if self.shuffle:
            self.rng.shuffle(files_idxs)

        for file_i in files_idxs:
            # read all data from file
            with open(self.files[file_i], 'rb') as f:
                data = msgpack.unpackb(decompressor.decompress(f.read()),
                                       raw=False)

            data_idxs = np.arange(len(data) - self.window + 1)
            if self.shuffle:
                self.rng.shuffle(data_idxs)

            # get box from first item. The box is valid for the whole file
            box = data[0]['box']
            box_normals = data[0]['box_normals']

            for data_i in data_idxs:

                if self.random_rotation:
                    angle_rad = self.rng.uniform(0, 2 * np.pi)
                    s = np.sin(angle_rad)
                    c = np.cos(angle_rad)
                    rand_R = np.array([c, 0, s, 0, 1, 0, -s, 0, c],
                                      dtype=np.float32).reshape((3, 3))

                if self.random_rotation:
                    sample = {
                        'box': np.matmul(box, rand_R),
                        'box_normals': np.matmul(box_normals, rand_R)
                    }
                else:
                    sample = {'box': box, 'box_normals': box_normals}

                for time_i in range(self.window):

                    item = data[data_i + time_i]

                    for k in ('pos', 'vel'):
                        if self.random_rotation:
                            sample[k + str(time_i)] = np.matmul(item[k], rand_R)
                        else:
                            sample[k + str(time_i)] = item[k]

                    for k in ('m', 'viscosity', 'frame_id', 'scene_id'):
                        sample[k + str(time_i)] = item[k]

                yield sample
    
    
def read_data_val(files, **kwargs):
    return read_data(files=files,
                     batch_size=1,
                     repeat=False,
                     shuffle_buffer=None,
                     num_workers=1,
                     **kwargs)


def read_data_train(files, batch_size, random_rotation=True, **kwargs):
    return read_data(files=files,
                     batch_size=batch_size,
                     random_rotation=random_rotation,
                     repeat=True,
                     shuffle_buffer=512,
                     **kwargs)


def read_data(files=None,
              batch_size=1,
              window=2,
              random_rotation=False,
              repeat=False,
              shuffle_buffer=None,
              num_workers=1,
              cache_data=False):
    print(files[0:20], '...' if len(files) > 20 else '')

    # caching makes only sense if the data is finite
    if cache_data:
        if repeat == True:
            raise Exception("repeat must be False if cache_data==True")
        if random_rotation == True:
            raise Exception("random_rotation must be False if cache_data==True")
        if num_workers != 1:
            raise Exception("num_workers must be 1 if cache_data==True")

    df = PhysicsSimDataFlow(
        files=files,
        random_rotation=random_rotation,
        shuffle=True if shuffle_buffer else False,
        window=window,
    )

    if repeat:
        df = dataflow.RepeatedData(df, -1)

    if shuffle_buffer:
        df = dataflow.LocallyShuffleData(df, shuffle_buffer)

    if num_workers > 1:
        df = dataflow.MultiProcessRunnerZMQ(df, num_proc=num_workers)

    df = dataflow.BatchData(df, batch_size=batch_size, use_list=True)

    if cache_data:
        df = dataflow.CacheData(df)

    df.reset_state()
    return df

In [5]:
os.path.exists('../datasets/data/dpi_dam_break/train/0_00.msgpack.zst')

True

In [6]:
# datasets/data/dpi_dam_break/train/0_00.msgpack.zst
flow = PhysicsSimDataFlow(files=glob('../datasets/data/dpi_dam_break/train/*.msgpack.zst'), random_rotation=False, shuffle=False, window=2)


In [7]:
flow.files

['../datasets/data/dpi_dam_break/train/0_06.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_02.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_03.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_09.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_07.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_10.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_08.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_00.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_04.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_01.msgpack.zst',
 '../datasets/data/dpi_dam_break/train/0_05.msgpack.zst']

In [8]:
x = [item for item in flow]

In [9]:
x[0].keys()

import numpy as np

# print keys and length or shapes
for i in range(2):
    print('---')
    for k, v in x[i].items():
        if (type(v) == np.ndarray):
            print(k, v.shape)
        else:
            print(k, v)

---
box (10683, 3)
box_normals (10683, 3)
pos0 (3456, 3)
vel0 (3456, 3)
m0 (3456,)
viscosity0 (3456,)
frame_id0 60
scene_id0 0
pos1 (3456, 3)
vel1 (3456, 3)
m1 (3456,)
viscosity1 (3456,)
frame_id1 61
scene_id1 0
---
box (10683, 3)
box_normals (10683, 3)
pos0 (3456, 3)
vel0 (3456, 3)
m0 (3456,)
viscosity0 (3456,)
frame_id0 61
scene_id0 0
pos1 (3456, 3)
vel1 (3456, 3)
m1 (3456,)
viscosity1 (3456,)
frame_id1 62
scene_id1 0


In [10]:
# Positions are NOT the same between consecutive frames!

# check if masses in x[0]['m0'] are all the same
for i in range(1, len(x[0]['m0'])):
    assert (x[0]['m0'][0] == x[0]['m0'][i]).all()

for i in range(len(x) - 1):
    print("All positions are the same between consecutive frames:", (x[i]['pos1'] == x[i+1]['pos0']).all())
    print("Masses stay constant between frames:", (x[i]['m0'] == x[i+1]['m0']).all())
    print("Masses stay constant in frame:", (x[i]['m0'] == x[i+1]['m1']).all())



All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between consecutive frames: True
Masses stay constant between frames: True
Masses stay constant in frame: True
All positions are the same between

In [11]:
import plotly
import plotly.graph_objs as go

def plot_particles(positions):
    """
    Plots the positions in a 3D scatter plot.
    """

    fig = go.Figure(data=[go.Scatter3d(
        x=positions[:, 0],
        y=positions[:, 2],
        z=positions[:, 1],
        mode='markers',
        marker=dict(
            size=5,
            # color=particles.get_masses(),  # set color to an array/list of desired values
            colorscale='Viridis',  # choose a colorscale
            opacity=0.8
        )
    )])

    fig.show()

plot_particles(x[0]['pos0'])  # Initial frame
plot_particles(x[0]['box'])
plot_particles(x[0]['box_normals'])
plot_particles(x[9]['pos0'])  # Frame when box appears out of nowhere

The datasets don't contain density. But they contain border (= Box) particles!
Box normals are a map that contain the direction in which the boarder is facing
Masses for some reason are 0 everywhere.

In [12]:
x[50]['m1']

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [15]:
default_flow = PhysicsSimDataFlow(files=glob('../datasets/data/ours_default_data/train/*.msgpack.zst'), random_rotation=False, shuffle=False, window=2)
default_x = [item for item in default_flow]

In [14]:
default_x[0].keys()

dict_keys(['box', 'box_normals', 'pos0', 'vel0', 'm0', 'viscosity0', 'frame_id0', 'scene_id0', 'pos1', 'vel1', 'm1', 'viscosity1', 'frame_id1', 'scene_id1'])