# Parallel MD Generation using Multiprocessing

This notebook generates molecular dynamics trajectories (`.xyz` files) for graphene nanoribbons.
It uses `multiprocessing` to run simulations for different parameters (Temperature, Length) in parallel, utilizing multiple CPU cores.

**Note:** The core simulation logic has been moved to `md_worker.py` to ensure compatibility with Python's `multiprocessing` on Windows.


In [None]:
import os
import multiprocessing
import numpy as np
import time

# Import the worker function from the external file
# This is required for multiprocessing on Windows
try:
    import md_worker
    from md_worker import worker_wrapper
except ImportError:
    print("Error: md_worker.py not found. Please ensure it is in the same directory.")


In [None]:
np.linspace(5,50,10).astype(int)

In [None]:
# --- CONFIGURATION ---

# 1. Define Parameters
temperatures = [300] # Kelvin
lengths = np.linspace(5, 20, 16).astype(int) # Unit cells

# 2. Simulation Steps
nsteps = 1000       # Total MD steps
dump_interval = 1   # Write to file every N steps

# 3. Output Directory
output_dir = "MD_files"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 4. Prepare Task List
# Format: (temp, length, filename, nsteps, interval)
tasks = []

print("Preparing simulation tasks:")
for T in temperatures:
    for L in lengths:
        filename = os.path.join(output_dir, f"md_T{T}_L{L}.xyz")
        # We need absolute path for safety in workers
        # abs_filename = os.path.abspath(filename) --- IGNORE ---
        
        # Tuple arguments for the worker_wrapper
        # args: (temp_K, ntile_length, filename, nsteps, dump_interval)
        task_args = (T, L, filename, nsteps, dump_interval)
        tasks.append(task_args)
        print(f"  - T={T}K, L={L} -> {filename}")

print(f"\nTotal tasks: {len(tasks)}")


In [None]:
# --- EXECUTION ---

def run_parameter_sweep_parallel():
    # Determine number of processes
    # Leave one core free for OS responsiveness if possible
    num_processes = max(1, multiprocessing.cpu_count() - 2)
    print(f"Starting pool with {num_processes} processes...")
    
    start_time = time.time()
    
    # Use context manager for safety
    with multiprocessing.Pool(processes=num_processes) as pool:
        # map_async allows us to catch errors or track progress if implemented, 
        # but map is simpler and blocks until done.
        results = pool.map(worker_wrapper, tasks)
        
    end_time = time.time()
    duration = end_time - start_time
    
    print(f"\nAll simulations completed in {duration:.2f} seconds.")
    return results

if __name__ == '__main__':
    # This check is good practice, though not strictly required inside the cell logic
    # unless exporting to script.
    completed_files = run_parameter_sweep_parallel()
    
    print("\nGenerated Files:")
    for f in completed_files:
        print(f"  {f}")


In [None]:
# --- VERIFICATION ---
from ase.io import read

print("Verifying randomly selected file...")
if len(tasks) > 0:
    test_file = tasks[0][2] # Get filename of first task
    if os.path.exists(test_file):
        try:
            atoms = read(test_file, index=':')
            print(f"File: {test_file}")
            print(f"Number of frames: {len(atoms)}")
            print(f"Number of atoms: {len(atoms[0])}")
            print("Status: OK")
        except Exception as e:
            print(f"Failed to read {test_file}: {e}")
    else:
        print(f"File {test_file} was not created.")
else:
    print("No tasks defined.")
