# Exploring Provided Waveform Inversion Data

This notebook loads and visualizes the seismic data (`.npy` files) and velocity model data (`.npy` files) provided in the `waveform_inversion_data` directory.

In [4]:
import numpy as np
import os

data_dir = os.path.join("..", "waveform_inversion_data")
seismic_file_train = os.path.join(data_dir, "seis2_1_0.npy")
velocity_file_train = os.path.join(data_dir, "vel2_1_0.npy")
seismic_file_test = os.path.join(data_dir, "000039dca2.npy")

In [11]:
seismic_data_train = np.load(seismic_file_train, mmap_mode='r')
velocity_data_train = np.load(velocity_file_train, mmap_mode='r') 
seismic_data_test = np.load(seismic_file_test, mmap_mode='r')

In [10]:
print(velocity_data_train)

[[[[1760.     1760.     1760.     ... 1760.     1760.     1760.    ]
   [1760.     1760.     1760.     ... 1760.     1760.     1760.    ]
   [1760.     1760.     1760.     ... 1760.     1760.     1760.    ]
   ...
   [3455.     3455.     3455.     ... 3455.     3455.     3455.    ]
   [3455.     3455.     3455.     ... 3455.     3455.     3455.    ]
   [3455.     3455.     3455.     ... 3455.     3455.     3455.    ]]]


 [[[2988.     2988.     2988.     ... 2988.     2988.     2988.    ]
   [2988.     2988.     2988.     ... 2988.     2988.     2988.    ]
   [2988.     2988.     2988.     ... 2988.     2988.     2988.    ]
   ...
   [4404.     4404.     4404.     ... 4404.     4404.     4404.    ]
   [4404.     4404.     4404.     ... 4404.     4404.     4404.    ]
   [4404.     4404.     4404.     ... 4404.     4404.     4404.    ]]]


 [[[1676.     1676.     1676.     ... 1676.     1676.     1676.    ]
   [1676.     1676.     1676.     ... 1676.     1676.     1676.    ]
   [1676.   

In [12]:
print(seismic_data_test)

[[[-4.3723936e-04  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00
    0.0000000e+00  0.0000000e+00]
  [-1.4025457e-03 -1.5501239e-05  9.6809958e-07 ...  0.0000000e+00
    0.0000000e+00  0.0000000e+00]
  [-2.9656747e-03 -7.8596757e-05  4.3638470e-06 ...  0.0000000e+00
    0.0000000e+00  0.0000000e+00]
  ...
  [-2.1746773e-03 -2.2501172e-03 -2.3082583e-03 ...  4.8055989e-03
    5.8574509e-03  6.0836617e-03]
  [-2.1236797e-03 -2.2126497e-03 -2.2827697e-03 ...  5.3145797e-03
    6.0396837e-03  5.9555816e-03]
  [-2.0684560e-03 -2.1702910e-03 -2.2520840e-03 ...  5.7014483e-03
    6.1006825e-03  5.7227770e-03]]

 [[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00
    0.0000000e+00  0.0000000e+00]
  [ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00
    0.0000000e+00  0.0000000e+00]
  [ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00
    0.0000000e+00  0.0000000e+00]
  ...
  [-2.2970506e-03 -2.3217648e-03 -2.3477431e-03 ... -6.2175114e-03
   -7.0539

In [16]:
# Ensure the data (seismic_data_train, velocity_data_train, seismic_data_test) 
# is loaded from the previous cell before running this.
# If you get a NameError, re-run the cell that loads the .npy files.

import numpy as np # Make sure numpy is imported in this cell or a previous one

print("="*20 + " Inspecting Training Seismic Data (seis2_1_0.npy) " + "="*20)
if 'seismic_data_train' in locals() and hasattr(seismic_data_train, 'shape') and seismic_data_train.shape[0] > 0:
    # First sample, first source, first geophone, first 10 time steps
    print("\\n--- First sample, first source, first geophone, first 10 time steps ---")
    sample_waveform_start = seismic_data_train[0, 0, 0, :10]
    print(sample_waveform_start)
    print(f"Shape of this slice: {sample_waveform_start.shape}")

    # First sample, first geophone, 500th time step, across all 5 sources
    print("\\n--- First sample, first geophone, 500th time step, for all 5 sources ---")
    sample_timestep_allsources = seismic_data_train[0, :, 499, 0] # 499 is index for 500th step
    print(sample_timestep_allsources)
    print(f"Shape of this slice: {sample_timestep_allsources.shape}")
    
    print("\\n--- Overall statistics for seismic_data_train ---")
    print(f"Mean: {np.mean(seismic_data_train):.4e}") 
    print(f"Std Dev: {np.std(seismic_data_train):.4e}")
    print(f"Min: {np.min(seismic_data_train):.4e}") 
    print(f"Max: {np.max(seismic_data_train):.4e}")

else:
    print("seismic_data_train not found, is not a numpy array, or is empty. Please ensure it's loaded correctly in a previous cell.")

print("\\n" + "="*20 + " Inspecting Training Velocity Data (vel2_1_0.npy) " + "="*20)
if 'velocity_data_train' in locals() and hasattr(velocity_data_train, 'shape') and velocity_data_train.shape[0] > 0:
    # First sample, first channel (only one), top-left 5x5 patch
    print("\\n--- First sample, top-left 5x5 patch of the velocity map ---")
    sample_velocity_patch = velocity_data_train[0, 0, :5, :5]
    print(sample_velocity_patch)
    print(f"Shape of this slice: {sample_velocity_patch.shape}")

    print("\\n--- Overall statistics for velocity_data_train ---")
    print(f"Mean: {np.mean(velocity_data_train):.4f}") 
    print(f"Std Dev: {np.std(velocity_data_train):.4f}")
    print(f"Min: {np.min(velocity_data_train):.4f}")
    print(f"Max: {np.max(velocity_data_train):.4f}")
else:
    print("velocity_data_train not found, is not a numpy array, or is empty. Please ensure it's loaded correctly in a previous cell.")

print("\\n" + "="*20 + " Inspecting Test Seismic Data (000039dca2.npy) " + "="*20)
if 'seismic_data_test' in locals() and hasattr(seismic_data_test, 'shape'):
    print(f"Actual shape of seismic_data_test: {seismic_data_test.shape}") # IMPORTANT: Check this output

    # Standardize to 4D (add batch dimension if it's 3D) for consistent processing logic later if needed
    # This is a common preprocessing step.
    current_test_data = seismic_data_test
    if seismic_data_test.ndim == 3:
        # Assuming 3D is (C, T, G), add a batch dimension at the start
        current_test_data = np.expand_dims(seismic_data_test, axis=0)
        print(f"Expanded seismic_data_test to 4D with shape: {current_test_data.shape}")
    
    if current_test_data.ndim == 4 and current_test_data.shape[0] > 0 :
        # Now current_test_data should be 4D: (S, C, T, G), where S is likely 1
        print("\\n--- First sample, first source, first geophone, first 10 time steps (from potentially expanded 4D array) ---")
        # Accessing as [sample_idx, source_idx, geophone_idx, time_idx_slice]
        # Note: The previous error indicated (C,T,G), so if original was (5,1000,70), expanded is (1,5,1000,70)
        # Accessing 1st sample (index 0), 1st source (index 0), 1st geophone (index 0), first 10 time steps.
        sample_test_waveform_start = current_test_data[0, 0, 0, :10] 
        print(sample_test_waveform_start)
        print(f"Shape of this slice: {sample_test_waveform_start.shape}")

        print("\\n--- First sample, first geophone, 500th time step, for all 5 sources (from potentially expanded 4D array) ---")
        sample_test_timestep_allsources = current_test_data[0, :, 499, 0] # 0th sample, all sources, 500th time, 0th geophone
        print(sample_test_timestep_allsources)
        print(f"Shape of this slice: {sample_test_timestep_allsources.shape}")

    else:
        print(f"Test data (current_test_data) has an unexpected shape or dimension after potential expansion: {current_test_data.shape}. Cannot proceed with slicing as defined.")

    print("\\n--- Overall statistics for seismic_data_test (original or expanded) ---")
    print(f"Mean: {np.mean(current_test_data):.4e}")
    print(f"Std Dev: {np.std(current_test_data):.4e}")
    print(f"Min: {np.min(current_test_data):.4e}")
    print(f"Max: {np.max(current_test_data):.4e}")
else:
    print("seismic_data_test not found or is not a numpy array. Please ensure it's loaded correctly in a previous cell.")


\n--- First sample, first source, first geophone, first 10 time steps ---
[-0.00050937  0.          0.          0.          0.          0.
  0.          0.          0.          0.        ]
Shape of this slice: (10,)
\n--- First sample, first geophone, 500th time step, for all 5 sources ---
[-0.1284725  -0.40384248 -1.4559536   0.912251   -0.5993699 ]
Shape of this slice: (5,)
\n--- Overall statistics for seismic_data_train ---
Mean: -4.3087e-04
Std Dev: 1.5608e+00
Min: -2.6059e+01
Max: 5.2216e+01
\n--- First sample, top-left 5x5 patch of the velocity map ---
[[1760. 1760. 1760. 1760. 1760.]
 [1760. 1760. 1760. 1760. 1760.]
 [1760. 1760. 1760. 1760. 1760.]
 [1760. 1760. 1760. 1760. 1760.]
 [1760. 1760. 1760. 1760. 1760.]]
Shape of this slice: (5, 5)
\n--- Overall statistics for velocity_data_train ---
Mean: 3020.6394
Std Dev: 851.3628
Min: 1501.0000
Max: 4500.0000
Actual shape of seismic_data_test: (5, 1000, 70)
Expanded seismic_data_test to 4D with shape: (1, 5, 1000, 70)
\n--- First s