In [1]:
import numpy as np

# Load the original data
loaded_data = np.load('E_field_data_combined_extended.npz', allow_pickle=True)
E_data_combined = loaded_data['data']

# Define the full set of angles
all_angles = {0, 30, 60, 90}

# Define filter sets for training (leave-one-out approach)
filter_sets = [
    {30, 60, 90},  # Training: {30, 60, 90} → Testing: {0}
    {0, 60, 90},   # Training: {0, 60, 90} → Testing: {30}
    {0, 30, 90},   # Training: {0, 30, 90} → Testing: {60}
    {0, 30, 60}    # Training: {0, 30, 60} → Testing: {90}
]

# Iterate through each filter set
for idx, train_angles in enumerate(filter_sets):
    test_angles = all_angles - train_angles  # Find the test angle (only one value)

    train_data = []
    test_data = []

    # Filter the dataset
    for i in E_data_combined:
        j, k, l = i[:3]  # Extract first three values (j, k, l)
        if j in train_angles:
            train_data.append(i)
        elif j in test_angles:
            test_data.append(i)

    # Convert lists to NumPy arrays
    train_data = np.array(train_data)
    test_data = np.array(test_data)

    # Save train and test datasets
    filename_train = f"E_field_data_filtered_{idx+1}.npz"
    filename_test = f"E_field_data_filtered_test_{idx+1}.npz"

    np.savez(filename_train, data=train_data)
    np.savez(filename_test, data=test_data)

    print(f"Saved {filename_train} with {len(train_data)} records (Train angles: {train_angles})")
    print(f"Saved {filename_test} with {len(test_data)} records (Test angle: {test_angles})")


Saved E_field_data_filtered_1.npz with 6708 records (Train angles: {90, 60, 30})
Saved E_field_data_filtered_test_1.npz with 2236 records (Test angle: {0})
Saved E_field_data_filtered_2.npz with 6708 records (Train angles: {0, 90, 60})
Saved E_field_data_filtered_test_2.npz with 2236 records (Test angle: {30})
Saved E_field_data_filtered_3.npz with 6708 records (Train angles: {0, 90, 30})
Saved E_field_data_filtered_test_3.npz with 2236 records (Test angle: {60})
Saved E_field_data_filtered_4.npz with 6708 records (Train angles: {0, 60, 30})
Saved E_field_data_filtered_test_4.npz with 2236 records (Test angle: {90})


In [2]:
import numpy as np

# Number of datasets
num_datasets = 4  

# Iterate through each dataset
for idx in range(1, num_datasets + 1):
    train_file = f"E_field_data_filtered_{idx}.npz"
    test_file = f"E_field_data_filtered_test_{idx}.npz"

    # Load training and testing data
    train_data = np.load(train_file, allow_pickle=True)['data']
    test_data = np.load(test_file, allow_pickle=True)['data']

    # Extract features (conditions) and target (matrices)
    X_train = train_data[:, :-1].astype(np.float32)
    y_train = np.array([np.array(matrix, dtype=np.float32) for matrix in train_data[:, -1]])
    y_train = y_train[:, 2:99, 2:179]  # Extract the required submatrix

    X_test = test_data[:, :-1].astype(np.float32)
    y_test = np.array([np.array(matrix, dtype=np.float32) for matrix in test_data[:, -1]])
    y_test = y_test[:, 2:99, 2:179]

    # Print shapes for verification
    print(f"Dataset {idx}:")
    print(f"  X_train Shape: {X_train.shape}, y_train Shape: {y_train.shape}")
    print(f"  X_test Shape: {X_test.shape}, y_test Shape: {y_test.shape}")
    print("-" * 40)


Dataset 1:
  X_train Shape: (6708, 4), y_train Shape: (6708, 97, 177)
  X_test Shape: (2236, 4), y_test Shape: (2236, 97, 177)
----------------------------------------
Dataset 2:
  X_train Shape: (6708, 4), y_train Shape: (6708, 97, 177)
  X_test Shape: (2236, 4), y_test Shape: (2236, 97, 177)
----------------------------------------
Dataset 3:
  X_train Shape: (6708, 4), y_train Shape: (6708, 97, 177)
  X_test Shape: (2236, 4), y_test Shape: (2236, 97, 177)
----------------------------------------
Dataset 4:
  X_train Shape: (6708, 4), y_train Shape: (6708, 97, 177)
  X_test Shape: (2236, 4), y_test Shape: (2236, 97, 177)
----------------------------------------


this is Update version with sol

In [3]:
import numpy as np

def load_and_preprocess_data(train_file, test_file, samples_per_range=86, train_ratio=0.9):
    """
    Loads and preprocesses the dataset, including train-validation split and normalization.

    Parameters:
    - train_file (str): Path to the training data file (.npz).
    - test_file (str): Path to the test data file (.npz).
    - samples_per_range (int): Number of samples per range.
    - train_ratio (float): Proportion of data used for training (default 90%).

    Returns:
    - X_train, y_train (normalized)
    - X_val, y_val (normalized)
    - X_test, y_test (normalized)
    """
    
    # Load the training and test data
    train_data = np.load(train_file, allow_pickle=True)['data']
    test_data = np.load(test_file, allow_pickle=True)['data']
    
    total_samples = train_data.shape[0]
    num_ranges = total_samples // samples_per_range

    # Shuffle and split training-validation data
    all_ranges = np.arange(num_ranges)
    np.random.shuffle(all_ranges)
    
    train_ranges = all_ranges[:int(train_ratio * num_ranges)]
    val_ranges = all_ranges[int(train_ratio * num_ranges):]  # Remaining for validation
    print("train_ranges :" , train_ranges)
    print("val_ranges :", val_ranges)

    def get_indices_from_ranges(ranges, samples_per_range):
        indices = []
        for r in ranges:
            start = r * samples_per_range
            end = start + samples_per_range
            indices.extend(range(start, end))
        return indices

    # Get indices for train and validation sets
    train_indices = get_indices_from_ranges(train_ranges, samples_per_range)
    val_indices = get_indices_from_ranges(val_ranges, samples_per_range)
    
    np.random.shuffle(train_indices)
    np.random.shuffle(val_indices)

    # Extract features (X) and target (y)
    X_full = train_data[:, :-1].astype(np.float32)
    y_full = np.array([np.array(matrix, dtype=np.float32) for matrix in train_data[:, -1]])
    y_full = y_full[:, 2:99, 2:179]  # Extract submatrix

    # Train-Validation Split
    X_train, X_val = X_full[train_indices], X_full[val_indices]
    y_train, y_val = y_full[train_indices], y_full[val_indices]

    # Load test data
    X_test = test_data[:, :-1].astype(np.float32)
    y_test = np.array([np.array(matrix, dtype=np.float32) for matrix in test_data[:, -1]])
    y_test = y_test[:, 2:99, 2:179]

    # **Normalization Using Min-Max Scaling**
    y_min = y_train.min()
    y_max = y_train.max()

    y_train = (y_train - y_min) / (y_max - y_min)
    y_val = (y_val - y_min) / (y_max - y_min)
    y_test = (y_test - y_min) / (y_max - y_min)  # Normalize using y_train's min-max

    # Print shapes and min-max values for verification
    print(f"X_train Shape: {X_train.shape}, y_train Shape: {y_train.shape}")
    print(f"X_val Shape: {X_val.shape}, y_val Shape: {y_val.shape}")
    print(f"X_test Shape: {X_test.shape}, y_test Shape: {y_test.shape}")
    print(f"y_train min: {y_train.min()}, max: {y_train.max()}")
    print(f"y_val min: {y_val.min()}, max: {y_val.max()}")
    print(f"y_test min: {y_test.min()}, max: {y_test.max()}")

    return X_train, y_train, X_val, y_val, X_test, y_test

# Example Usage:
train_file = "E_field_data_filtered_1.npz"
test_file = "E_field_data_filtered_test_1.npz"

X_train, y_train, X_val, y_val, X_test, y_test = load_and_preprocess_data(train_file, test_file)


train_ranges : [39 60  0 76 65 25 52 10 57 22 61 63 17  1 45 34 44 59 46 66 28 68 73  3
 29 53 30 56 55 47 75 18 77 20 51 38  5 26  7 41  4 49 33 40 74  9 12 13
 50 11 70 14 42 67 71 23 27 21 48 36  6  2 16  8 62 24 43 72 15 19]
val_ranges : [64 35 58 31 37 69 32 54]
X_train Shape: (6020, 4), y_train Shape: (6020, 97, 177)
X_val Shape: (688, 4), y_val Shape: (688, 97, 177)
X_test Shape: (2236, 4), y_test Shape: (2236, 97, 177)
y_train min: 0.0, max: 1.0
y_val min: 0.01858055405318737, max: 0.873414933681488
y_test min: 0.16905874013900757, max: 0.9943581223487854


## Now , we are update the code and use the the check point and droupout and earily stopping  

plot the best_model.h5 

In [4]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LeakyReLU, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import matplotlib.pyplot as plt
import os

# Model Architecture
def model_architecture():
    alpha = 0.0  # Leaky ReLU parameter

    def dense_block(input, num_units):
        x = Dense(num_units)(input)
        x = LeakyReLU(alpha=alpha)(x)
        return x

    inputs = Input(shape=(4,), name='data')

    x = dense_block(inputs, 8)
    x = dense_block(x, 32)
    x = dense_block(x, 128)
    x = dense_block(x, 512)
    x = dense_block(x, 2048)
    x = dense_block(x, 8192)
    x = Dense(97 * 177, activation='sigmoid')(x)

    outputs = Reshape((97, 177, 1))(x)

    model = Model(inputs, outputs, name="fcn_conv")
    
    return model

# Train model with batch size
def train_with_batch_size(batch_size, X_train, X_val, y_train, y_val, file_name):
    print(f"\nTraining on {file_name} with batch size: {batch_size}")

    # Build and compile model
    model = model_architecture()
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mae')

    # Reshape labels
    y_train_reshaped = np.expand_dims(y_train, axis=-1)
    y_val_reshaped = np.expand_dims(y_val, axis=-1)

    # Remove .npz from file_name
    file_base = os.path.splitext(file_name)[0]  # Removes the .npz extension

    # Define checkpoint filename
    checkpoint_filename = f'best_model_{file_base}_batch_{batch_size}.keras'

    # Define callbacks
    checkpoint = ModelCheckpoint(checkpoint_filename, monitor='val_loss', save_best_only=True, verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, verbose=1)

    # Train model
    history = model.fit(
        X_train, y_train_reshaped,
        validation_data=(X_val, y_val_reshaped),
        epochs=1000, batch_size=batch_size, verbose=1,
        callbacks=[checkpoint, early_stopping]
    )

    return history

# Example usage:
# file_name = "E_field_data_filtered_1.npz"  # Example file
# history = train_with_batch_size(32, X_train, X_val, y_train, y_val, file_name)


2025-03-19 04:43:06.476866: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-19 04:43:06.549543: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
from tensorflow.keras.models import load_model

# List of training and test files
train_files = [
    "E_field_data_filtered_1.npz",
    "E_field_data_filtered_2.npz",
    "E_field_data_filtered_3.npz",
    "E_field_data_filtered_4.npz"
]

test_files = [
    "E_field_data_filtered_test_1.npz",
    "E_field_data_filtered_test_2.npz",
    "E_field_data_filtered_test_3.npz",
    "E_field_data_filtered_test_4.npz"
]

# Different batch sizes to experiment with
batch_sizes = [16]

# Train on each file
for train_file, test_file in zip(train_files, test_files):
    print(f"\nProcessing: {train_file} with Test File: {test_file}")
    
    # Load and preprocess data
    X_train, y_train, X_val, y_val, X_test, y_test = load_and_preprocess_data(train_file, test_file)
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    
    # Remove .npz extension from filenames
    train_file_base = os.path.splitext(train_file)[0]  # Removes the .npz extension

    for batch_size in batch_sizes:
        # Train and get history
        history, model = train_with_batch_size(batch_size, X_train, X_val, y_train, y_val, train_file)

        # Save the trained model
        model_filename = f"best_model_{train_file_base}_batch_{batch_size}.keras"
        model.save(model_filename)
        print(f"Model saved as: {model_filename}")

        # Plot the loss curves
        plt.figure(figsize=(8, 5))
        plt.plot(history.history['loss'], label='Train Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'Loss Curve - {train_file_base} (Batch {batch_size})')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid()
        plt.show()

        # Evaluate on test data
        y_test_reshaped = np.expand_dims(y_test, axis=-1)  # Reshape for evaluation
        test_loss = model.evaluate(X_test, y_test_reshaped, verbose=1)
        print(f"Test Loss on {test_file} (Batch {batch_size}): {test_loss:.4f}")

        # Load the trained model
        loaded_model = load_model(model_filename)

        # Make predictions
        y_pred_best_model = loaded_model.predict(X_test)

        # Ensure prediction shape matches y_test
        y_pred_best_model_squeezed = np.squeeze(y_pred_best_model)  # Remove extra dimension if necessary

        # Calculate MAE for each sample and its variance
        err = np.array([np.mean(np.abs(y_test[i, ...] - y_pred_best_model_squeezed[i, ...])) for i in range(y_test.shape[0])])
        test_mae = np.mean(err)
        test_variance = np.var(err)

        # Print the results
        print(f"Test MAE on {test_file}: {test_mae:.4f}")
        print(f"Variance of MAE across samples: {test_variance:.6f}")



Processing: E_field_data_filtered_1.npz with Test File: E_field_data_filtered_test_1.npz
train_ranges : [ 9 46 66 57 12 72 60 43  5 33  8 24 19 28 21 29  1 31 53 63 13 34 45 11
  2 30 56  4 20 74 17 58  3 70 65 64 49 68 55 59 75 51 22 26 41 47  7 48
 37 67 40 32 76 54 62  6 44 15 23 36 61 25 77 69 39 16  0 10 42 52]
val_ranges : [18 38 27 14 71 35 50 73]
X_train Shape: (6020, 4), y_train Shape: (6020, 97, 177)
X_val Shape: (688, 4), y_val Shape: (688, 97, 177)
X_test Shape: (2236, 4), y_test Shape: (2236, 97, 177)
y_train min: 0.0, max: 1.0
y_val min: 0.007142702117562294, max: 0.9295064210891724
y_test min: 0.16905874013900757, max: 0.9943581223487854
X_train shape: (6020, 4), y_train shape: (6020, 97, 177)

Training on E_field_data_filtered_1.npz with batch size: 16


2025-03-19 04:43:09.643858: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-19 04:43:09.792773: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 44197 MB memory:  -> device: 0, name: NVIDIA L40S, pci bus id: 0000:01:00.0, compute capability: 8.9


Epoch 1/1000


2025-03-19 04:43:11.464342: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2025-03-19 04:43:11.521905: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7eff927d0f50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-03-19 04:43:11.521940: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA L40S, Compute Capability 8.9
2025-03-19 04:43:11.532143: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-03-19 04:43:11.615481: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:234] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 8.9
2025-03-19 04:43:11.615508: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:237] Used ptxas at ptxas
202

Epoch 1: val_loss improved from inf to 0.03050, saving model to best_model_E_field_data_filtered_1_batch_16.keras
Epoch 2/1000
Epoch 2: val_loss did not improve from 0.03050
Epoch 3/1000
Epoch 3: val_loss improved from 0.03050 to 0.02349, saving model to best_model_E_field_data_filtered_1_batch_16.keras
Epoch 4/1000
Epoch 4: val_loss did not improve from 0.02349
Epoch 5/1000
Epoch 5: val_loss improved from 0.02349 to 0.01916, saving model to best_model_E_field_data_filtered_1_batch_16.keras
Epoch 6/1000
Epoch 6: val_loss did not improve from 0.01916
Epoch 7/1000
Epoch 7: val_loss did not improve from 0.01916
Epoch 8/1000
Epoch 8: val_loss improved from 0.01916 to 0.01574, saving model to best_model_E_field_data_filtered_1_batch_16.keras
Epoch 9/1000
Epoch 9: val_loss did not improve from 0.01574
Epoch 10/1000
Epoch 10: val_loss improved from 0.01574 to 0.01440, saving model to best_model_E_field_data_filtered_1_batch_16.keras
Epoch 11/1000
Epoch 11: val_loss did not improve from 0.0144

KeyboardInterrupt: 

: 