In [4]:
import json
import random
import sys
from pathlib import Path
import torch
import matplotlib.pyplot as plt
import numpy as np

# Add project source to path
project_root = Path.cwd().parent
src_path = project_root / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
from normalizer import DataNormalizer
from utils import load_config

plt.style.use('science.mplstyle')

# %%
# 2. LOAD ARTIFACTS
# --- Paths ---
CONFIG_FILE = project_root / "inputs/model_input_params.jsonc"
DATA_ROOT = project_root / "data"
config = load_config(CONFIG_FILE)
model_folder = DATA_ROOT / config["output_paths_config"]["fixed_model_foldername"]
normalized_data_folder = DATA_ROOT / config["data_paths_config"]["normalized_profiles_foldername"]
raw_data_folder = DATA_ROOT / config["data_paths_config"]["raw_profiles_foldername"]

# --- Load Model, Metadata, and Test Set list ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.jit.load(model_folder / "best_model_jit.pt", map_location=device)
model.eval()
with (normalized_data_folder / "normalization_metadata.json").open("r") as f:
    norm_metadata = json.load(f)
with (model_folder / "test_set_info.json").open("r") as f:
    test_filenames = json.load(f)["test_filenames"]

# --- Get variable lists from config ---
species_vars = sorted(config["species_variables"])
global_vars = sorted(config["global_variables"])
species_labels = [s.replace('_evolution', '') for s in species_vars]

print(f"✅ Setup complete. Model loaded on {device}.")

# %%
# 3. PREDICT AND PLOT FOR A SINGLE TEST PROFILE

# --- Select a random test file and load both versions ---
test_filename = random.choice(test_filenames)
with (normalized_data_folder / test_filename).open("r") as f:
    norm_profile = json.load(f)
with (raw_data_folder / test_filename).open("r") as f:
    raw_profile = json.load(f)

# --- Prepare the constant part of the input vector ---
initial_species = [norm_profile[key][0] for key in species_vars]
global_conds = [norm_profile[key] for key in global_vars]
base_input = torch.tensor(initial_species + global_conds, dtype=torch.float32)

# --- Predict for every time step in the profile ---
predicted_evolutions_norm = [] # Store the normalized predictions first
for norm_time_step in norm_profile["t_time"]:
    # Create the full input vector for this time step
    input_vector = torch.cat([base_input, torch.tensor([norm_time_step])]).unsqueeze(0).to(device)
    with torch.no_grad():
        norm_pred = model(input_vector).cpu().squeeze(0)
        predicted_evolutions_norm.append(norm_pred)

# --- Stack and Denormalize All Predictions at Once ---
# 1. Stack the list of 1D tensors into a 2D tensor of shape (num_timesteps, num_species)
predicted_evolutions_norm = torch.stack(predicted_evolutions_norm)

# 2. Transpose it to get shape (num_species, num_timesteps) for easier iteration
predicted_evolutions_norm = predicted_evolutions_norm.T

# 3. Denormalize each species' full time-series vector in one go. This is the fix.
predicted_evolutions_denorm = []
for i, key in enumerate(species_vars):
    # Pass the entire time-series tensor for one species to denormalize
    series_tensor = predicted_evolutions_norm[i]
    denorm_series = DataNormalizer.denormalize(series_tensor, norm_metadata, key)
    predicted_evolutions_denorm.append(denorm_series.numpy()) # Convert to numpy for plotting

# predicted_evolutions_denorm is now a list of numpy arrays, ready for plotting.
raw_times = raw_profile['t_time']

# --- Plot the results ---
fig, ax = plt.subplots(figsize=(14, 8))
colors = plt.cm.viridis(np.linspace(0, 1, len(species_vars)))

for i, key in enumerate(species_vars):
    # Plot true evolution from raw data
    ax.plot(raw_times, raw_profile[key], color=colors[i], label=species_labels[i] + " (True)")
    # Plot predicted evolution (denormalized)
    ax.scatter(raw_times, predicted_evolutions_denorm[i], color=colors[i], marker='x', s=30) # Changed marker for visibility

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("Time (s)", fontsize=14)
ax.set_ylabel("Species Abundance", fontsize=14)
ax.set_title(f"Predicted vs. True Species Evolution for {test_filename}", fontsize=16)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_ylim(1e-9, 2)
plt.tight_layout()
plt.show()

ValueError: The provided filename /Users/imalsky/Desktop/Chemulator/data/trained_model_final_mlp/best_model_jit.pt does not exist

In [None]:
import json
import random
import sys
import time
from pathlib import Path
import torch
import numpy as np
import pandas as pd

# --- Setup: Add project source to the Python path ---
project_root = Path.cwd().parent
src_path = project_root / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
    
from utils import load_config
from normalizer import DataNormalizer

def print_header(title: str):
    """Prints a formatted, centered header to the console."""
    width = 80
    padding = (width - len(title) - 2) // 2
    print("\n" + "=" * width)
    print(" " * padding, title, " " * padding)
    print("=" * width)

def prepare_batch_for_benchmark(batch_size: int, test_filenames: list, norm_data_folder: Path, species_vars: list, global_vars: list, device: torch.device):
    """
    Creates a batch of input tensors for benchmarking.
    Each item corresponds to the final time step of a randomly chosen test profile.
    """
    batch_inputs = []
    for _ in range(batch_size):
        test_filename = random.choice(test_filenames)
        with (norm_data_folder / test_filename).open("r") as f:
            norm_profile = json.load(f)
        initial_species = [norm_profile[key][0] for key in species_vars]
        global_conds = [norm_profile[key] for key in global_vars]
        final_norm_time = norm_profile["t_time"][-1]
        input_vector = torch.tensor(initial_species + global_conds + [final_norm_time], dtype=torch.float32)
        batch_inputs.append(input_vector)
    return torch.stack(batch_inputs).to(device)


def run_benchmark_and_validate():
    """
    Main function to load artifacts, run a performance benchmark, and then
    validate a single prediction with detailed output.
    """
    # 1. LOAD ARTIFACTS
    # --------------------------------------------------------------------------
    print_header("Initialization")
    print("Loading model and configuration artifacts...")
    CONFIG_FILE = project_root / "inputs/model_input_params.jsonc"
    DATA_ROOT = project_root / "data"
    
    config = load_config(CONFIG_FILE)
    if not config: print("Error: Could not load configuration. Exiting."); return
        
    model_folder = DATA_ROOT / config["output_paths_config"]["fixed_model_foldername"]
    normalized_data_folder = DATA_ROOT / config["data_paths_config"]["normalized_profiles_foldername"]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_path = model_folder / "best_model_jit.pt"
    if not model_path.exists(): print(f"Error: Model file not found at {model_path}. Exiting."); return

    model = torch.jit.load(model_path, map_location=device)
    model.eval()
    
    norm_meta_path = normalized_data_folder / "normalization_metadata.json"
    if not norm_meta_path.exists(): print(f"Error: Normalization metadata not found at {norm_meta_path}. Exiting."); return
    with norm_meta_path.open("r") as f:
        norm_metadata = json.load(f)

    test_info_path = model_folder / "test_set_info.json"
    if not test_info_path.exists(): print(f"Error: Test set info file not found at {test_info_path}. Exiting."); return
    with test_info_path.open("r") as f:
        test_filenames = json.load(f)["test_filenames"]

    species_vars = sorted(config["species_variables"])
    global_vars = sorted(config["global_variables"])
    print(f"Setup complete. Model loaded on device: {device.type.upper()}")
    
    # 2. RUN PERFORMANCE BENCHMARK
    # --------------------------------------------------------------------------
    print_header("Performance Benchmark")
    BATCH_SIZE = 512
    NUM_WARMUP_RUNS = 5
    NUM_TIMING_RUNS = 20

    batch_tensor = prepare_batch_for_benchmark(BATCH_SIZE, test_filenames, normalized_data_folder, species_vars, global_vars, device)
    
    # --- GPU Warmup ---
    with torch.no_grad():
        for _ in range(NUM_WARMUP_RUNS):
            _ = model(batch_tensor)
    if device.type == 'cuda': torch.cuda.synchronize()

    # --- Timed Runs ---
    timings = []
    with torch.no_grad():
        for _ in range(NUM_TIMING_RUNS):
            start_time = time.perf_counter()
            _ = model(batch_tensor)
            if device.type == 'cuda': torch.cuda.synchronize()
            end_time = time.perf_counter()
            timings.append(end_time - start_time)

    # 3. REPORT BENCHMARK RESULTS
    # --------------------------------------------------------------------------
    total_time = sum(timings)
    avg_batch_time_ms = (total_time / NUM_TIMING_RUNS) * 1000
    avg_prediction_time_us = (avg_batch_time_ms / BATCH_SIZE) * 1000

    print(f"{'Batch Size:':<28} {BATCH_SIZE}")
    print(f"{'Device:':<28} {device.type.upper()}")
    print(f"{'Average time per batch:':<28} {avg_batch_time_ms:.4f} ms")
    print(f"{'Average time per single pred:':<28} {avg_prediction_time_us:.4f} µs (microseconds)")

    # 4. RUN SINGLE PREDICTION VALIDATION
    # --------------------------------------------------------------------------
    print_header("Single Prediction Validation")
    test_filename = random.choice(test_filenames)
    print(f"  - Using profile: {test_filename}")
    with (normalized_data_folder / test_filename).open("r") as f:
        norm_profile = json.load(f)
        
    query_time_idx = random.randint(1, len(norm_profile["t_time"]) - 1)
    norm_time_to_predict = norm_profile["t_time"][query_time_idx]
    real_time_to_predict = DataNormalizer.denormalize(norm_time_to_predict, norm_metadata, "t_time")
    print(f"  - Predicting at time index {query_time_idx} (t ≈ {real_time_to_predict:.4e} s)")
    
    initial_species = [norm_profile[key][0] for key in species_vars]
    global_conds = [norm_profile[key] for key in global_vars]
    input_vector = torch.tensor(
        initial_species + global_conds + [norm_time_to_predict], dtype=torch.float32
    ).unsqueeze(0).to(device)
    
    with torch.no_grad():
        norm_prediction_tensor = model(input_vector).squeeze(0).cpu()

    norm_true_values_tensor = torch.tensor([norm_profile[key][query_time_idx] for key in species_vars])
    
    # 5. DENORMALIZE AND REPORT VALIDATION RESULTS
    # --------------------------------------------------------------------------
    results = []
    for i, key in enumerate(species_vars):
        # Use .item() to extract the scalar value from the 0-dim tensor
        predicted_val = DataNormalizer.denormalize(norm_prediction_tensor[i], norm_metadata, key).item()
        true_val = DataNormalizer.denormalize(norm_true_values_tensor[i], norm_metadata, key).item()
        
        results.append({
            "Species": key.replace('_evolution', ''),
            "Predicted Value": predicted_val,
            "True Value": true_val,
            "Abs. Error": abs(predicted_val - true_val),
            "Rel. Error (%)": abs(predicted_val - true_val) / (true_val + 1e-20) * 100
        })

    df = pd.DataFrame(results)
    
    # Configure pandas for clean, aligned output
    pd.options.display.float_format = '{:,.4e}'.format
    
    print("\n" + df.to_string(index=False))
    print("-" * 80 + "\n")


if __name__ == "__main__":
    run_benchmark_and_validate()


                                 Initialization                                 
Loading model and configuration artifacts...
Setup complete. Model loaded on device: CPU

                             Performance Benchmark                             
Batch Size:                  512
Device:                      CPU
Average time per batch:      1.2045 ms
Average time per single pred: 2.3526 µs (microseconds)

                          Single Prediction Validation                          
  - Using profile: prof_71020.json
  - Predicting at time index 22 (t ≈ 8.8862e+00 s)

Species  Predicted Value  True Value  Abs. Error  Rel. Error (%)
   C2H2       5.0318e-03  4.8384e-03  1.9345e-04      3.9983e+00
    CH4       6.2143e-04  5.4206e-04  7.9377e-05      1.4644e+01
    CO2       5.6723e-05  5.1188e-05  5.5355e-06      1.0814e+01
     CO       1.0783e-05  1.0757e-05  2.6513e-08      2.4647e-01
    H2O       1.3105e-02  1.3226e-02  1.2047e-04      9.1086e-01
     H2       9.7524e-01  9.7