# Demo: Using the SimulationRun Class in nat-zacros


This notebook demonstrates the recommended workflow for Zacros KMC analysis using the `SimulationRun` class from the nat-zacros package.


**Setup Instructions:**
- Update the `user_paths` dictionary in the first code cell to include your username and the path to your local nat-zacros package.
- Ensure your Zacros output data is available in the expected directory (default: `zacros_calculations`).
- Activate your Python environment with all required dependencies (see the main README).

If you encounter a `FileNotFoundError`, check that your paths are set correctly and that the required data files exist.


In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import pickle
from pathlib import Path
import platform
import time

# Add nat_zacros package to path 
user_paths={
    'a-DJA'  : Path.home() / 'GIT' / 'nat_zacros',
    'akandra': Path.home() / 'git' / 'nat_zacros',
}
username = Path.home().name
print(f"Operating System: {platform.system()}")
print(f"User            : {username}")
if username in user_paths:
    nat_zacros_pkg_dir = user_paths[username]
else:
    raise ValueError(f"Unknown user: {username}. Add your path to user_paths dictionary.")    
if nat_zacros_pkg_dir.exists() and str(nat_zacros_pkg_dir) not in sys.path:
    sys.path.insert(0, str(nat_zacros_pkg_dir))
    print(f"Added to path: {nat_zacros_pkg_dir}")
from nat_zacros import lattice, state, trajectory, load_trajectories_parallel, SimulationRun
dir_with_calculations = Path.cwd() /'zacros_calculations'
print(f"nat_zacros is at: {nat_zacros_pkg_dir.as_posix()}")
print(f"Data directory  : {dir_with_calculations.as_posix()}")

### 2. Specify directories for a group of simulations (Demo Version)

In [None]:
# Set directories
log_dir = dir_with_calculations / 'fn_3leed'
data_dir = log_dir / 'jobs'
results_dir = log_dir / 'results'
log_file = data_dir / 'jobs.log'
Path.mkdir(results_dir, exist_ok=True)
if not log_file.exists():
    raise FileNotFoundError(f"Log file not found: {log_file}")
with open(log_file, 'r') as f:
    log_header = f.readline().split()  # Read header
    log_entries = [json.loads(line) for line in f if line.strip()]
run_dirs = [data_dir / str(entry[0]) for entry in log_entries]
temperatures = [entry[4] for entry in log_entries]
lat_size = [entry[2][0]*entry[2][1] for entry in log_entries]
n_ads = [entry[3][0] for entry in log_entries]
interactions = ['-'.join(entry[5][1:]) for entry in log_entries]
coverages = [n_ads[i]/lat_size[i] for i in range(len(n_ads))]
print(f"Found {len(run_dirs)} simulation runs")

### 3. Using the SimulationRun Class (New Simplified Workflow)

In [None]:
# Import the new SimulationRun class
from nat_zacros import SimulationRun

# Create a SimulationRun object for the first job
# Note: Use fraction=0.5 based on equilibration analysis from cell 3
print("="*70)
print("Demonstrating SimulationRun Class")
print("="*70)

# Initialize with run directory and equilibration fraction
run_number = 0
run = SimulationRun(run_dirs[run_number], fraction=0.5)

# Clear cached data to force fresh loading (optional - comment out to use cache)
print("\nClearing cached data...")
run.clear_cache(trajectories=True, gref=False)
print()

# Display run information
print(f"\n{run}")
print(f"\nMetadata:")
print(f"  Temperature: {run.metadata['temperature']} K")
print(f"  Coverage: {run.metadata['coverage']:.3f} ML")
print(f"  Interactions: {run.metadata['interactions']}")
print(f"  Lattice size: {run.metadata['lattice_size']} sites")
print(f"  Number of trajectories: {len(run.traj_dirs)}")
print(f"  Results directory: {run.results_dir}")

# Load trajectories with caching
print("\n" + "="*70)
print("Loading Trajectories")
print("="*70)
import time
t_start = time.time()
run.load_trajectories(use_cache=True, parallel=True)
t_load = time.time() - t_start
print(f"\nLoading completed in {t_load:.2f} seconds")

# Compute ensemble-averaged RDF
print("\n" + "="*70)
print("Computing Ensemble-Averaged RDF")
print("="*70)

t_start = time.time()
r, g_avg, g_std = run.get_ensemble_rdf(r_max=40.0, dr=0.1)
t_rdf = time.time() - t_start

print(f"\nRDF computation completed in {t_rdf:.2f} seconds")
print(f"  Distance bins: {len(r)}")
print(f"  Peak position: {r[np.argmax(g_avg)]:.2f} Å")
print(f"  Peak height: {np.max(g_avg):.3f}")

# Compute ensemble-averaged energy vs time
print("\n" + "="*70)
print("Computing Ensemble-Averaged Energy vs Time")
print("="*70)

times, energy_avg, energy_std = run.get_ensemble_energy_vs_time(n_bins=100)

print(f"Energy statistics (equilibrated region):")
print(f"  Mean: {np.nanmean(energy_avg):.4f} eV")
print(f"  Std (temporal): {np.nanstd(energy_avg):.4f} eV")
print(f"  Avg std (across trajectories): {np.nanmean(energy_std):.4f} eV")

# Compare with manual workflow timing
print("\n" + "="*70)
print("COMPARISON: SimulationRun vs Manual Workflow")
print("="*70)

print(f"\n✅ SimulationRun provides the same functionality with cleaner API")
print("="*70)

In [None]:
# Plot RDF from SimulationRun
fig, ax = plt.subplots(figsize=(10, 6))

# Convert to lattice constant units
lattice_constant = 2.821135  # Pt(111) 1nn distance
r_a0 = r / lattice_constant
bar_width = (0.1 / lattice_constant) * 1.5

# Plot with error bars
ax.bar(r_a0, g_avg, width=bar_width, 
       ec='blue', fc='lightblue', alpha=0.7,
       label=f'SimulationRun ({len(run)} traj)', align='center')
ax.errorbar(r_a0, g_avg, yerr=g_std, 
           fmt='none', ecolor='black', capsize=0, 
           linewidth=1, alpha=0.6, zorder=10)

# Reference line
ax.axhline(1.0, color='k', linestyle='--', linewidth=1.5, 
          alpha=0.5, label='Ideal gas (g=1)')

# Formatting
ax.set_xlabel(r'Distance ($r / a_0$)', fontsize=12)
ax.set_ylabel('g(r)', fontsize=12)
ax.set_title(f'RDF using SimulationRun Class\n{run.metadata["interactions"]}, T={run.metadata["temperature"]} K, θ={run.metadata["coverage"]:.3f}',
            fontsize=13, fontweight='bold')
ax.legend(fontsize=10, loc='upper right')
ax.grid(True, alpha=0.3, axis='y')
ax.set_xlim(0, 40.0/lattice_constant)
ax.set_ylim(0, max(g_avg) * 1.1)

plt.tight_layout()
plt.show()