In [None]:
from dotenv import load_dotenv

load_dotenv()
from mlip_arena.tasks.thermal_conductivity.input import save_to_db

from ase.io import read

atoms_list = read("./phononDB-PBE-structures.extxyz", index=":", format="extxyz")

save_to_db(atoms_list, "phononDB-PBE.db")

## Demo: Run thermal conductivity task locally

This section demonstrates how to use predefined prefect task to run single thermal conductivity task locally. [Prefect](https://www.prefect.io/) offers [nice cache and idempotency features](https://www.prefect.io/blog/the-importance-of-idempotent-data-pipelines-for-resilience) that  avoid data duplication and ensure consistent results. 

As you can see in the following demo, the task directly retrieves the persistent result without rerunning the entire task from scratch again. 

In [None]:
from mlip_arena.tasks.thermal_conductivity.task import get_thermal_conductivity
from mlip_arena.tasks.thermal_conductivity.input import get_atoms_from_extxyz
from mlip_arena.models import MLIPEnum

atoms = next(get_atoms_from_extxyz("phononDB-PBE-structures.extxyz"))

for model in MLIPEnum:
    result = get_thermal_conductivity(
        atoms=atoms,
        calculator_name=model.name
    )
    break

[32m2024-12-23 13:05:45.253[0m | [1mINFO    [0m | [36mmlip_arena.tasks.thermal_conductivity[0m:[36m<module>[0m:[36m6[0m - [1m
The module 'mlip_arena.tasks.thermal_conductivity' is adapted from the repository: https://github.com/MPA2suite/k_SRME. 
By using this module, you agree to the terms and conditions specified in the following license: 

https://github.com/MPA2suite/k_SRME/blob/main/LICENSE

Additionally, please ensure proper attribution by citing the following reference: 

- Póta, B., Ahlawat, P., Csányi, G., & Simoncelli, M. (2024). Thermal Conductivity Predictions with Foundation Atomistic Models. arXiv preprint arXiv:2408.00755.

A local copy of the LICENSE file can be found at: /pscratch/sd/c/cyrusyc/mlip-arena/mlip_arena/tasks/thermal_conductivity/LICENSE.
[0m


No module named 'deepmd'


In [None]:
# and the result is still there
result

{'force': {'fc2_set': array([[[-5.37725165e-02, -1.98128669e-09, -1.09896064e-07],
          [ 1.30096143e-02,  2.37876330e-10,  0.00000000e+00],
          [-7.03117257e-05, -2.31597874e-09, -1.86264515e-09],
          ...,
          [ 2.67593423e-05,  3.38032842e-05,  3.80352139e-06],
          [-4.57148781e-05,  1.17830932e-05, -3.76254320e-07],
          [-5.19048845e-05, -9.46447253e-05, -6.86142594e-05]],
  
         [[ 1.13546950e-04, -1.12909055e-03, -1.12909451e-03],
          [ 2.28802848e-04,  9.35941120e-04,  9.35949385e-04],
          [-1.60758485e-04,  5.72525751e-05,  5.72502613e-05],
          ...,
          [-1.27753742e-06, -3.78973782e-05, -1.15111470e-06],
          [-3.55979341e-06,  3.13669443e-06,  4.61004674e-06],
          [ 4.16665458e-09, -1.86264515e-09,  3.72529030e-09]]],
        dtype=float32),
  'fc3_set': array([[[-5.38121052e-02,  1.42709689e-09,  1.86264515e-09],
          [ 2.57739089e-02, -1.68878511e-09, -1.86264515e-09],
          [ 1.42474935e-04,

## Production: Orchestrate HPC workers to scale up the computation

Now we will scale up the calculations in parallel jobs on HPC environment. This can be achieved by adaptive scaling using [dask cluster](https://docs.dask.org/en/stable/deploying.html#high-performance-computing). If resource manager other than SLURM is desired, feel free to modify the following code to suit your purpose.

In [None]:
from dotenv import load_dotenv

load_dotenv()

from dask.distributed import Client
from dask_jobqueue import SLURMCluster
from prefect_dask import DaskTaskRunner

from mlip_arena.tasks.thermal_conductivity.flow import run as TC


# Orchestrate your awesome dask workflow runner

nodes_per_alloc = 1
gpus_per_alloc = 4
ntasks = 1

cluster_kwargs = dict(
    cores=1,
    memory="64 GB",
    shebang="#!/bin/bash",
    account="matgen",
    walltime="00:30:00",
    job_mem="0",
    job_script_prologue=[
        "source ~/.bashrc",
        "module load python",
        "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
    ],
    job_directives_skip=["-n", "--cpus-per-task", "-J"],
    job_extra_directives=[
        "-J tc",
        "-q regular",
        f"-N {nodes_per_alloc}",
        "-C gpu",
        f"-G {gpus_per_alloc}",
    ],
)

cluster = SLURMCluster(**cluster_kwargs)
print(cluster.job_script())
cluster.adapt(minimum_jobs=10, maximum_jobs=20)
client = Client(cluster)

# Run the workflow on HPC cluster in parallel

results = TC.with_options(
    task_runner=DaskTaskRunner(address=client.scheduler.address),
    # log_prints=True,
)()