# Running Instructions

Below instructions tested in Ubuntu 22.04.3 (Windows Subsystem for Linux 2, WSL2). This notebook will likely also work on MaxOS X.
1. Download the GSEE benchmark from Github: https://github.com/isi-usc-edu/qb-gsee-benchmark/archive/refs/heads/main.zip
2. Using a terminal, clone the benchmark repository: `git clone https://github.com/isi-usc-edu/qb-gsee-benchmark.git`
3. Enter the benchmark folder: `cd qb-gsee-benchmark`
4. Create a clean Python 3.10 virtual environment ([Python Website](https://www.python.org/downloads/)) with this command: `python -m venv env_qb_benchmark`
5. Activate environment: `source env_qb_benchmark/bin/activate`
6. Install benchmark software: `python -m pip install -e .`
7. Enter examples folder: `cd examples`
8. Open this notebook: `jupyter notebook run_dmrg.ipynb` 
9. Copy and paste the provided URL to a web browser. The URL will look like `http://localhost:8888/tree?token=62cc0dcd33bf946ec260fed02b19b6a453f9b89107dd2d70`
10. Open `run_dmrg.ipynb`
11. Update relevant parameters as needed; see [General Parameters](#general-parameters) and [DMRG Parameters](#dmrg-parameters.)
12. Run all cells of the notebook. The whole notebook should run in less than 15 min on a laptop.



In [None]:
%reset -f 
# Install pip packages in the current Jupyter kernel (from https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/)
import sys
# Install DMRG
!{sys.executable} -m pip install --extra-index-url=https://block-hczhai.github.io/block2-preview/pypi/ git+https://github.com/jtcantin/dmrghandler

!{sys.executable} -m pip install paramiko h5py numpy 

# This cell took about 8 minutes on the authors machine for the first time, but only about 7 seconds after that.
# For the chosen problem instances, DMRG should take no more than 15min to run on a laptop.

In [2]:
import os
from pathlib import Path
import json
import dmrghandler.config_io as config_io
import dmrghandler.slurm_scripts as slurm_scripts
import h5py
import run_support
import qb_gsee_benchmark.utils

# General Parameters

In [3]:
remove_temp_files = True # IMPORTANT: If you want to keep the DRMG output files, not just the jsons, set this to False
environment_path=Path("../env_qb_benchmark")
ppk_path="/mnt/c/darpa-qb-key.ppk" # Update this to the path of your private key
sftp_username="darpa-qb" # Update this to your username
local_store_path = Path("dmrg_calculation_storage")
repository_url = "https://github.com/isi-usc-edu/qb-gsee-benchmark/archive/refs/heads/main.zip"

solution_save_location = "solution_jsons"
problem_instance_files_repository_path = (
    "problem_instances"
)

chosen_problem_instance_files = [
    # "problem_instance.mn_mono.cb40f3f7-ffe8-40e8-4544-f26aad5a8bd8.json",
    # "problem_instance.planted_solution_0007.3aaf0bb4-b412-4746-922a-c380b4024d00.json",
    # "problem_instance.planted_solution_0010.bf3fb654-72c7-4fd4-bfd1-9ffef5aaebd8.json",
    "problem_instance.planted_solution_0001.3d3b9e8a-7842-4ca1-bd82-eddb9804972d.json",
    "problem_instance.planted_solution_0008.b57eb979-5c09-4974-97b4-5862e109a1ae.json",
]


json_solution_schema_url = "https://raw.githubusercontent.com/isi-usc-edu/qb-gsee-benchmark/main/schemas/solution.schema.0.0.1.json"
json_solution_schema_url_file = json_solution_schema_url

contact_info = [{
    "name": "Example E. Example",
    "email": "example@example.ca",
    "institution": "University of Example",
}]
compute_details = {
    "computing_environment_name": "Example Computer",
    "cpu_description": 'Intel i5-1135G7 @ 2.40GHz',
    "ram_available_gb": "16GB",
    "clock_speed": "2.4 GHz",
    "total_num_cores" : 4,
}

solver_details = {
"solver_uuid":"13474cee-e648-48d3-9526-0314533ae30d",
"solver_short_name":"DMRG_surface_lowest_energy",
"compute_hardware_type":"classical_computer",
"classical_hardware_details":compute_details,
"algorithm_details":"DMRG with the lowest variational energy obtained so far.",
"software_details":"Block2 v0.5.3rc16 with dmrghandler, commit version d603fdc6409fc194a416aa3a519362d5d91790d9 or later.",
}

### Obtain Problem Instance and FCIDUMP Files

In [4]:
# Obtain problem instance files
repository_path = Path("../")
problem_instance_files_path = repository_path / problem_instance_files_repository_path
problem_instance_files = list(problem_instance_files_path.glob("*.json"))

In [None]:
# Download and save FCIDUMPs as listed in chosen_problem_instance_files
data_file_list = run_support.download_task_fcidump_files(
    chosen_problem_instance_files,
    problem_instance_files_path,
    local_store_path,
    ppk_path,
    sftp_username,
)
print(data_file_list)

# DMRG Parameters 

In [None]:
# DMRG parameters for dmrghanlder
# dmrghandler is a wrapper for Block2: https://block2.readthedocs.io/en/latest/#

# To simply run the script, you only need to update the following:
#       num_threads_list              # Ensure this is not more than the number of cores (or threads) on your machine
#       n_mkl_threads_list            # Ensure this is not more than the number of cores (or threads) on your machine
#       stack_mem                     # Ensure this is less than the total memory available on your machine; not a hard memory limit on Block2, though
#       python_environment_location   # Update this to the path of your python environment where this notebook is running; if the running instructions are followed, this need not be changed

# For more control of the calculation, consider also changing:
#       max_time_limit_sec_list
#       starting_bond_dimension_list
#       max_num_sweeps_list
#       sweep_schedule_bond_dims_parameters
#       sweep_schedule_noise_list
#       sweep_schedule_davidson_threshold_list
#       init_state_seed_list
#       symmetry_type_list
#       reordering_method_list
#       config_file_prefix
#       job_name

# For all list parameters, if a list with more than one value is given, the length of the list
# must be the same as the length of data_file_list
# If a single value is given, it will be used for all FCIDUMP files
config_dict = {
    "plot_filename_prefix_list": [
        Path(fcidump_file).stem for fcidump_file in data_file_list
    ],
    "main_storage_folder_path_prefix": str(local_store_path),
    "max_bond_dimension_list": [10],
    "max_time_limit_sec_list": [5 * 60],  # Max time limit per FCIDUMP file in seconds
    "min_energy_change_hartree_list": [
        1e-4
    ],  # Convergence threshold as the bond dimension is increased
    "extrapolation_type_list": ["discard_weights"],  # Auto extrapolation, unreliable
    "starting_bond_dimension_list": [4],
    "max_num_sweeps_list": [20],
    "energy_convergence_threshold_list": [1e-8],
    "sweep_schedule_bond_dims_parameters": [
        [(2, 4), (1, 5)]
    ],  # (division_factor, count),
    # e.g. [(2, 4), (1, 5)] and bond dimension of 3 -> [1, 1, 1, 1, 3, 3, 3, 3, 3]
    "sweep_schedule_noise_list": [[1e-4] * 4 + [1e-5] * 4 + [0]],
    "sweep_schedule_davidson_threshold_list": [[1e-10] * 9],
    "init_state_bond_dimension_division_factor_list": [2],
    "init_state_seed_list": [
        658724
    ],  # Random number generator seed for choosing the initial MPS state
    "initial_mps_method_list": ["random"],
    "factor_half_convention_list": [True],  # True for standard FCIDUMP files
    "symmetry_type_list": ["SU(2)"],  # "SZ" or "SU(2)"
    "num_threads_list": [4],
    "n_mkl_threads_list": [4],
    "track_mem": [False],
    "reordering_method_list": ["fiedler, interaction matrix"],
    "calc_v_score_bool_list": [True],
}

dmrg_advanced_config = {
    "occupancy_hint": None,
    "full_fci_space_bool": True,
    "init_state_direct_two_site_construction_bool": False,
    "davidson_type": None,  # Default is None, for "Normal"
    "eigenvalue_cutoff": 1e-20,  # Cutoff of eigenvalues, default is 1e-20
    "davidson_max_iterations": 4000,  # Default is 4000
    "davidson_max_krylov_subspace_size": 50,  # Default is 50
    "lowmem_noise_bool": False,  # Whether to use a lower memory version of the noise, default is False
    "sweep_start": 0,  # Default is 0, where to start sweep
    "initial_sweep_direction": None,  # Default is None, True means forward sweep (left-to-right)
    "stack_mem": 10
    * 1024
    * 1024
    * 1024,  # in bytes; ensure that this value is less than the total memory available
    "stack_mem_ratio": 0.9,
    # "do_single_calc": False,
    "num_states": 1,  # Number of states to calculate, default is 1, the ground state
}

# Generate configuration files
config_files_list, config_dict_single_file_list = config_io.gen_config_files(
    data_file_list=data_file_list,
    config_dict=config_dict,
    dmrg_advanced_config=dmrg_advanced_config,
    config_file_prefix="dmrg_example_run_",
)
print(f"config_files_list: {config_files_list}")
# print(f"config_dict_single_file_list: {config_dict_single_file_list}")

# Parameters for when using SLURM on a cluster
submit_dict = {
    "time_cap_string": "00-23:59:00",
    "job_name": "dmrg_example_run_",
    "email": "eample_email@example.com",
    "account_name": "example",
    "tasks_per_node": "1",
    "cpus_per_task": "40",
    "partition": "debug",
    "python_environment_location": "../env_qb_benchmark",
}

# Generate python and SLURM submission scripts
slurm_scripts.gen_run_files(submit_dict, config_dict_single_file_list)


# submit_commands only for use on a cluster with SLURM installed
submit_commands = slurm_scripts.gen_submit_commands(config_dict_single_file_list)
print(f"submit_commands: \n{submit_commands}")

### Run DMRG

In [None]:
# Run DMRG on the FCIDUMPs
scratch_sim_path = Path(local_store_path) / Path("scratch_sim")
scratch_sim_path.mkdir(parents=True, exist_ok=True)
scratch_sim_path_absolute = scratch_sim_path.resolve()
dmrg_hdf5_files = []
for config_dict in config_dict_single_file_list:

    data_config = config_dict["data_config"]
    python_run_file_name = data_config["python_run_file"]
    os.environ["SCRATCH"] = str(scratch_sim_path_absolute)

    # Run DMRG
    os.system(f"{str(environment_path)}/bin/python {python_run_file_name}")
    print("DMRG NOW EXITED")

    # Get results
    main_storage_folder_path = data_config["main_storage_folder_path"]
    hdf5_file_path = Path(main_storage_folder_path) / Path("dmrg_results.hdf5")
    dmrg_hdf5_files.append(hdf5_file_path)

    with h5py.File(hdf5_file_path, "r") as f:
        dmrg_energies = f["/final_dmrg_results/past_energies_dmrg"][:]
        dmrg_bond_dimensions = f["/final_dmrg_results/bond_dims_used"][:]
        discarded_weights = f["/final_dmrg_results/past_discarded_weights"][:]

        h_min_e_optket_norm = float(
            f["/first_preloop_calc/dmrg_results/h_min_e_optket_norm"][()]
        )
        variance = float(f["/first_preloop_calc/dmrg_results/optket_variance"][()])
        v_score_numerator = float(
            f["/first_preloop_calc/dmrg_results/v_score_numerator"][()]
        )
        deviation_init_ket = float(
            f["/first_preloop_calc/dmrg_results/deviation_init_ket"][()][0]
        )
        v_score_init_ket = float(
            f["/first_preloop_calc/dmrg_results/v_score_init_ket"][()][0]
        )
        hf_energy = float(f["/first_preloop_calc/dmrg_results/hf_energy"][()])
        deviation_hf = float(f["/first_preloop_calc/dmrg_results/deviation_hf"][()][0])
        v_score_hartree_fock = float(
            f["/first_preloop_calc/dmrg_results/v_score_hartree_fock"][()][0]
        )
        initial_ket_energy = float(
            f["/first_preloop_calc/dmrg_results/initial_ket_energy"][()]
        )

    print(f"dmrg_energies: {dmrg_energies}")
    print(f"dmrg_bond_dimensions: {dmrg_bond_dimensions}")
    print(f"discarded_weights: {discarded_weights}")
    print(f"h_min_e_optket_norm: {h_min_e_optket_norm}")
    print(f"variance: {variance}")
    print(f"v_score_numerator: {v_score_numerator}")
    print(f"deviation_init_ket: {deviation_init_ket}")
    print(f"v_score_init_ket: {v_score_init_ket}")
    print(f"hf_energy: {hf_energy}")
    print(f"deviation_hf: {deviation_hf}")
    print(f"v_score_hartree_fock: {v_score_hartree_fock}")
    print(f"initial_ket_energy: {initial_ket_energy}")

### Collect Solution Data

In [None]:
uuid_fcidump_mapping_dict = run_support.get_uuid_fcidump_mapping(submit_commands, local_store_path)
print(uuid_fcidump_mapping_dict)


In [None]:
all_dmrgh5_data = run_support.collect_dmrg_data(dmrg_hdf5_files, uuid_fcidump_mapping_dict)
all_dmrgh5_data = run_support.filter_lowest_energy_data(all_dmrgh5_data)

In [None]:
prob_inst_data_sol_dict = run_support.map_fcidump_to_problem_instances(
    chosen_problem_instance_files, problem_instance_files_path, all_dmrgh5_data
)
print(prob_inst_data_sol_dict)


### Save solution json files

In [None]:
# Save the solution files
saved_files = run_support.create_solution_files(
    prob_inst_data_sol_dict,
    solution_save_location,
    json_solution_schema_url_file,
    contact_info,
    solver_details,
)

# Validate the saved files
for sol_file in saved_files:
    with open(sol_file, "r") as f:
        sol_data = json.load(f)
        qb_gsee_benchmark.utils.validate_json(json_dict=sol_data)
        print(f"File {sol_file} is valid.")

### Remove Temporary Files

In [12]:
if remove_temp_files:
    os.remove("dmrghandler.log")
    os.system(f"rm -r {scratch_sim_path}")
    os.system(f"rm -r {local_store_path}")
    os.system(f"rm -r config_store")
    os.system(f"rm -r tmp_dir")
    