In [1]:
import matplotlib.pyplot as plt
import numpy as np
import sys, os
import json
sys.path.append('../src/')

from datasets import PDEBenchDataset
from utils import get_ceph_dir

In [2]:
dataset_name = 'compNS_turb'
fields = ['Vx', 'Vy', 'density', 'pressure']

In [3]:
dataset = PDEBenchDataset(dataset_name,
                          set_type='train',
                          split_ratios=(1.0, 0.0, 0.0),
                          fields=fields,
                          timesteps=21,
                          random_start=False,
                          shuffle=False)
print(len(dataset))

Raw dataset compNS_turb has 1000 samples of shape (512, 512) and 21 timesteps.
1000


In [4]:
max_per_field = -np.inf*np.ones(len(fields))
min_per_field = np.inf*np.ones(len(fields))
mean_per_field = np.zeros(len(fields))
mean_std_per_field = np.zeros(len(fields))

for idx, data in enumerate(dataset):
    if idx % 100 == 0:
        print(idx)
    for i, field in enumerate(fields):
        max_per_field[i] = max(max_per_field[i], data[i].max())
        min_per_field[i] = min(min_per_field[i], data[i].min())
        mean_per_field[i] += data[i].mean()
        mean_std_per_field[i] += data[i].std()

for i, field in enumerate(fields):
    mean_per_field[i] /= len(dataset)
    mean_std_per_field[i] /= len(dataset)

0
100
200
300
400
500
600
700
800
900


In [5]:
for i, field in enumerate(fields):
    print('Field: {}'.format(field))
    print('Max: {}'.format(max_per_field[i]))
    print('Min: {}'.format(min_per_field[i]))
    print('Mean: {}'.format(mean_per_field[i]))
    print('Std: {}'.format(mean_std_per_field[i]))
    print('')

Field: Vx
Max: 4.206423759460449
Min: -4.465677261352539
Mean: 3.985787741839886e-06
Std: 0.7780172729492187

Field: Vy
Max: 4.17794942855835
Min: -4.403666973114014
Mean: -9.103563614189625e-06
Std: 0.7804127807617187

Field: density
Max: 1.2293452024459839
Min: 0.884306788444519
Mean: 1.0
Std: 0.02079853057861328

Field: pressure
Max: 84.73867797851562
Min: 48.89590072631836
Mean: 60.02755078125
Std: 2.0841865234375



In [6]:
d = {}
for i, field in enumerate(fields):
    d[field] = {'max': max_per_field[i],
                'min': min_per_field[i],
                'mean': mean_per_field[i],
                'std': mean_std_per_field[i]}
with open(os.path.join(get_ceph_dir(), 'pdebench_misc', f'{dataset_name}_basic_stats.json'), 'w') as f:
    json.dump(d, f, indent=4)