In [2]:
import torch
import h5py
from torch_geometric.loader import DataLoader
from utils.qm7x_data import QM7X_Dataset
from utils.rotation_utils import rotate_positions, superfib_augment_batch
from utils.learned_variance_utils import get_orbit_variance

In [None]:
#SET PARAMETERS (What do you want to keep?)
TARGET = #Something
torch.manual_seed(42)

dataset = (root='data/qm7x_processed.h5')#?

dataset.data.y = dataset.data.y[:, TARGET].unsqueeze(-1)

In [3]:
with h5py.File("data/qm7x_processed.h5", "r") as f:
    N = f["start"].shape[0]
    print("Number of conformers:", N)

Number of conformers: 4027779


In [5]:
with h5py.File("data/qm7x_processed.h5", "r") as f:
    mols = len(set(f["mol_index"][...]))
    print("Number of molecules:", mols)

Number of molecules: 6950


In [6]:
import h5py
import numpy as np

path = "data/qm7x_processed.h5"

with h5py.File(path, "r") as f:
    print("=== Keys in file ===")
    print(list(f.keys()))
    print()

    # Basic properties
    z = f["z"]
    pos = f["pos"]
    dip = f["dipole"]
    quad = f["quadrupole"]
    pol = f["polarizability"]
    e_pbe0 = f["e_pbe0"]

    start = f["start"]
    length = f["length"]

    print("Total atoms stored:", len(z))
    print("Total conformers:", len(start))
    print()

    # Inspect first entry
    i = 0
    s = int(start[i])
    n = int(length[i])
    e = s + n

    print(f"Conformer 0: atoms {n}, indices [{s}:{e}]")
    print("Z:", z[s:e])
    print("pos shape:", pos[s:e].shape)
    print("dipole:", dip[i])
    print("quadrupole:", quad[i])
    print("polarizability:", pol[i])
    print("e_pbe0:", e_pbe0[i])
    print()

    # Inspect a random conformer
    import random
    j = random.randint(0, len(start)-1)
    s = int(start[j])
    n = int(length[j])
    e = s + n

    print(f"Conformer {j}: atoms {n}, indices [{s}:{e}]")
    print("Z snippet:", z[s:e][:10])
    print("pos snippet:", pos[s:e][:2])
    print("dipole:", dip[j])
    print("quadrupole:", quad[j])
    print("polarizability:", pol[j])
    print("e_pbe0:", e_pbe0[j])


=== Keys in file ===
['conf_index', 'dipole', 'e_pbe0', 'length', 'mol_index', 'polarizability', 'pos', 'quadrupole', 'start', 'z']

Total atoms stored: 67942296
Total conformers: 4027779

Conformer 0: atoms 5, indices [0:5]
Z: [6 1 1 1 1]
pos shape: (5, 3)
dipole: [-0.00094   0.012355  0.006423]
quadrupole: [-0.012005  0.002731  0.009274]
polarizability: [17.148888  0.175311  0.138676  0.175311 17.275997 -0.153899  0.138676
 -0.153899 16.734692]
e_pbe0: -1101.2457

Conformer 3294891: atoms 18, indices [56569221:56569239]
Z snippet: [6 7 6 6 6 6 8 1 1 1]
pos snippet: [[ 0.365099 -1.014224  2.757824]
 [-0.035853 -0.164821  1.683814]]
dipole: [-0.151082  0.176937 -0.01394 ]
quadrupole: [-3.69292   0.758154  2.934766]
polarizability: [ 96.553696 -16.690695  10.472252 -16.690695  82.897385  -9.299498
  10.472252  -9.299498 110.75571 ]
e_pbe0: -8892.068
