In [1]:
import numpy as np
from netCDF4 import Dataset
import h5py
import time
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt
import multiprocessing as mp
import time

In [5]:
    '''
    Transforming seismogram nc files and harmonics pkl files to a single HDF5 file
    '''
    path = '../DataSet/10000LatinSphericalHarmonicsElasticBall'
    model_name = "LatinSphericalHarmonicsElasticBall"
    hdf5_file_path = f"{path}/all_data.h5"
    num_models = 10  # Total number of models

    disp_dims = (num_models, 37, 37, 3, 150)
    station_coords_cartesian_dims = (num_models, 37, 37, 3)
    station_coords_spherical_dims = (num_models, 37, 37, 3)
    time_dims = (num_models, 150)
    harmonics_dims = (num_models, 1215)

    with h5py.File(hdf5_file_path, "w") as data_hdf5:
        # Preallocate datasets for each variable
        disp_data = data_hdf5.create_dataset('disp', disp_dims, dtype='float32')
        station_coords_cartesian_data = data_hdf5.create_dataset("station_coords_cartesian", station_coords_cartesian_dims, dtype='float32')
        station_coords_spherical_data = data_hdf5.create_dataset("station_coords_spherical", station_coords_spherical_dims, dtype='float32')
        time_data = data_hdf5.create_dataset("time", time_dims, dtype='float32')
        harmonics_data = data_hdf5.create_dataset("harmonics", harmonics_dims, dtype='float32')
        
        for model_id in range(0, num_models):
            print(f"Processing model {model_id}")
            
            # Define paths for seismogram and harmonics
            seis_nc_path = f"{path}/{model_name}{model_id:0>4d}/seismogram_displacement_SYN.nc"
            harmonics_path = f"{path}/{model_name}{model_id:0>4d}/Spherical_Harmonics.pkl"

            # Attempt to read files
            try:
                seismogram_nc = Dataset(seis_nc_path, "r")
                harmonics_pkl = pickle.load(open(harmonics_path, "rb"))
            except FileNotFoundError as e:
                print(f"File not found: {e.filename}")
                continue

            disp_data[model_id, :, :, :, :] = np.array(seismogram_nc['disp'])
            station_coords_cartesian_data[model_id, :, :, :] = np.array(seismogram_nc['station_coords_cartesian'])
            station_coords_spherical_data[model_id, :, :, :] = np.array(seismogram_nc['station_coords_spherical'])
            time_data[model_id, :] = np.array(seismogram_nc['time'])
            harmonics_data[model_id, :] = np.array(harmonics_pkl['Value'])
            
            # Close the NetCDF file
            seismogram_nc.close()
            
        print("Finished processing all models.")

Processing model 0
Processing model 1
Processing model 2
Processing model 3
Processing model 4
Processing model 5
Processing model 6
Processing model 7
Processing model 8
Processing model 9
Finished processing all models.


In [28]:
    '''
    Transforming wavefield nc files and harmonics pkl files to a single HDF5 file
    '''
    path = '../DataSet/10000LatinSphericalHarmonicsElasticBall'
    model_name = "LatinSphericalHarmonicsElasticBall"
    hdf5_file_path = f"{path}/wf_data.h5"
    num_models = 10  # Total number of models

    element_coords_sz_dims = (3648, 2)
    disp_coef_dims = (num_models, 15, 3648, 16, 3)
    harmonics_dims = (num_models, 1215)

    with h5py.File(hdf5_file_path, "w") as data_hdf5:
        # Preallocate datasets for each variable
        dtype = 'float32'
        element_coords_sz_data = data_hdf5.create_dataset("element_coords_sz", element_coords_sz_dims, dtype='float64')
        disp_coef_data = data_hdf5.create_dataset("disp_coef", disp_coef_dims, dtype=dtype)
        harmonics_data = data_hdf5.create_dataset("harmonics", harmonics_dims, dtype=dtype)
        
        for model_id in range(0, num_models):
            print(f"Processing model {model_id}")
            for snapshot_id in range(0, 15):
                # Define paths for seismogram and harmonics
                wf_nc_path = f"{path}/{model_name}{model_id:0>4d}/snapshot_coeff/disp_coef_time{snapshot_id}.nc"
                # Attempt to read files
                try:
                    wf_nc = Dataset(wf_nc_path, "r")
                except FileNotFoundError as e:
                    print(f"File not found: {e.filename}")
                    continue
                coef = np.array(wf_nc['disp_coef']).transpose(1,0,2)
                coords = np.array(wf_nc['element_coords_sz'])
                if model_id == 0 and snapshot_id == 0:
                    coords0 = coords
                    dist = np.sqrt(np.sum(coords0**2, axis=1))
                    idx = np.argsort(dist)
                    coords0 = coords0[idx]
                    coef = coef[idx,:,:]
                    element_coords_sz_data[:, :] = coords0
                    # if snapshot_id == 10:
                    #     colors = plt.cm.viridis(coef[:, 0]*1e5)
                    #     plt.figure()
                    #     plt.scatter(x=coords0[:,0], y=coords0[:,1], c=colors, cmap='viridis')
                    #     plt.savefig(f'wf_{model_id}_{snapshot_id}.png')
                else:
                    mapping = {tuple(point): index for index, point in enumerate(coords)}
                    indices = [mapping[tuple(point)] for point in coords0]
                    coords = coords[indices]
                    assert (coords == coords0).all()
                    coef = coef[indices,:,:]
                    # if snapshot_id == 10:
                    #     colors = plt.cm.viridis(coef[:, 0]*1e5)
                    #     plt.figure()
                    #     plt.scatter(x=coords[:,0], y=coords[:,1], c=colors, cmap='viridis')
                    #     plt.savefig(f'wf_{model_id}_{snapshot_id}.png')

                disp_coef_data[model_id, snapshot_id, :, :, :] = coef

                # Close the NetCDF file
                wf_nc.close()

            harmonics_path = f"{path}/{model_name}{model_id:0>4d}/Spherical_Harmonics.pkl"
            try:
                harmonics_pkl = pickle.load(open(harmonics_path, "rb"))
            except FileNotFoundError as e:
                print(f"File not found: {e.filename}")
                continue
            harmonics_data[model_id, :] = np.array(harmonics_pkl['Value'])
        print("Finished processing all models.")

Processing model 0
Processing model 1
Processing model 2
Processing model 3
Processing model 4
Processing model 5
Processing model 6
Processing model 7
Processing model 8
Processing model 9
Finished processing all models.


In [31]:

    '''
    Transforming wavefield nc files and harmonics pkl files to a single HDF5 file
    '''
    path = '../DataSet/10000LatinSphericalHarmonicsElasticBall'
    model_name = "LatinSphericalHarmonicsElasticBall"
    hdf5_file_path = f"{path}/wf_slice_data.h5"
    num_models = 10  # Total number osf models

    element_coords_cartesian_dims = (16, 3648, 3)
    disp_dims = (num_models, 15, 16, 3648, 3) # model_id, snapshot_id, slice_id, element_id, component_id
    harmonics_dims = (num_models, 1215)

    with h5py.File(hdf5_file_path, "w") as data_hdf5:
        # Preallocate datasets for each variable
        dtype = 'float32'
        element_coords_cartesian_data = data_hdf5.create_dataset("element_coords_cartesian", element_coords_cartesian_dims, dtype='float64')
        disp_data = data_hdf5.create_dataset("disp", disp_dims, dtype=dtype)
        harmonics_data = data_hdf5.create_dataset("harmonics", harmonics_dims, dtype=dtype)
        for model_id in range(0, num_models):
            print(f"Processing model {model_id}")
            for snapshot_id in range(0, 15):
                for slice_id in range(0, 16):
                    # Define paths for seismogram and harmonics
                    wf_nc_path = f"{path}/{model_name}{model_id:0>4d}/snapshot/time{snapshot_id}/disp_slice{slice_id}.nc"
                    # Attempt to read files
                    try:
                        wf_nc = Dataset(wf_nc_path, "r")
                    except FileNotFoundError as e:
                        print(f"File not found: {e.filename}")
                        continue
                    disp = np.array(wf_nc['disp'])
                    coords = np.array(wf_nc['element_coords_cartesian'])
                    if model_id == 0 and snapshot_id == 0:
                        dist = np.sqrt(np.sum(coords**2, axis=1))
                        idx = np.argsort(dist)
                        coords = coords[idx]
                        element_coords_cartesian_data[slice_id, :, :] = coords
                        disp = disp[idx]
                    else:
                        # print(element_coords_cartesian_data[slice_id, :, :], coords)
                        mapping = {tuple(point): index for index, point in enumerate(coords)}
                        indices = [mapping[tuple(point)] for point in element_coords_cartesian_data[slice_id, :, :]]
                        assert (coords[indices] == element_coords_cartesian_data[slice_id, :, :]).all()
                        disp = disp[indices]

                    disp_data[model_id, snapshot_id, slice_id, :] = disp

                    # Close the NetCDF file
                    wf_nc.close()
                    # import pdb; pdb.set_trace()
                    
            harmonics_path = f"{path}/{model_name}{model_id:0>4d}/Spherical_Harmonics.pkl"
            try:
                harmonics_pkl = pickle.load(open(harmonics_path, "rb"))
            except FileNotFoundError as e:
                print(f"File not found: {e.filename}")
                continue
            harmonics_data[model_id, :] = np.array(harmonics_pkl['Value'])
        print("Finished processing all models.")

Processing model 0
Processing model 1
Processing model 2
Processing model 3
Processing model 4
Processing model 5
Processing model 6
Processing model 7
Processing model 8
Processing model 9
Finished processing all models.


In [17]:
coef.shape()

TypeError: 'tuple' object is not callable

In [27]:
np.shape(np.array(wf_nc['disp_coef']).transpose(1,0,2))

(3648, 16, 3)