In [None]:
import djlib.djlib as dj
import numpy as np
import matplotlib.pyplot as plt
import os
import json 
import thermocore.io.casm as cio
import thermocore.geometry.hull as thull
import djlib.mc.mc as mc
from glob import glob
import djlib.clex.clex as cl
from djlib.plotting.hull_plotting import plot_stable_chemical_potential_windows_for_binary

In [None]:
#Load casm query data
with open('ZrN_FCC_1.2.0_8_body_10-5-2022.json') as f:
    query = json.load(f)
data = cio.regroup_query_by_config_property(query)
un_pruned_corr = np.array(data['corr'])
corr = np.array(data['corr'])
comp = np.array(data['comp'])
formation_energy = np.array(data['formation_energy'])
name = np.array(data['name'])



In [None]:
#Load the posteriror distribution from file. 
#A separate example in the djlib examples directory shows how to obtain quality fits, and posterior distributions.
with open('posterior_mean_weighted_LS_with_hullcorr.json') as f:
    posterior = json.load(f)
print(posterior.keys())
pruned_posterior_mean = np.array(posterior['posterior_mean'])
posterior_covariance = np.array(posterior['posterior_covariance'])


In [None]:
#Often, we will work with a large set of basis functions, and remove basis functions that are not needed. 
#However, CASM will expect an ECI vector that is the same length as the original set of basis functions.
#Maintaining an upscaling vector of booleans allows us to record the basis functions that are removed, and
#reconstruct the original ECI vector when needed.
upscaling_vector = np.array(posterior['upscaling_vector'])
print(upscaling_vector.shape)
pruned_corr = corr[:,upscaling_vector==1]
print(pruned_corr.shape)

In [None]:
#visualize the convex hull of the posterior distribution, after pruning zeroed ECI and their corresponding basis functions. 
from djlib.plotting.hull_plotting import general_binary_convex_hull_plotter
fig = general_binary_convex_hull_plotter(true_energies=formation_energy, composition=comp, predicted_energies=pruned_corr@pruned_posterior_mean)
plt.show()

In [None]:
#Visualize the convex hull of the un-pruned posterior distribution, to ensure that the pruned posterior is correct.  
fig = general_binary_convex_hull_plotter(true_energies=formation_energy, composition=comp, predicted_energies=un_pruned_corr@cl.upscale_eci_vector(pruned_posterior_mean,upscaling_vector))
plt.show()

In [None]:
#Visualize ECI uncertainty
plt.errorbar(list(range(len(pruned_posterior_mean))), pruned_posterior_mean, yerr=np.sqrt(np.diag(posterior_covariance)), fmt='.', capsize=2, color='r', label='1 standard deviation')
plt.errorbar(list(range(len(pruned_posterior_mean))), pruned_posterior_mean, yerr=2*np.sqrt(np.diag(posterior_covariance)), fmt='.', capsize=2, color='k', label='2 standard deviations')
plt.xlabel('ECI index', fontsize=21)
plt.ylabel('ECI value (eV)', fontsize=21)
plt.xticks(fontsize=21)
plt.yticks(fontsize=21)
plt.legend(fontsize=21)
fig = plt.gcf()
fig.set_size_inches(15, 12)

In [None]:
#This propagatin code was written to allow for the propagation of many ECI vectors. 
#However, the code is also convenient for managing a single propagation. This requires minimal modification, shown below. 

#Draw a set number of random samples from the posterior distribution
number_of_propagations = 1
#eci = np.random.multivariate_normal(pruned_posterior_mean, posterior_covariance, number_of_propagations)
eci = pruned_posterior_mean.reshape(1,-1)

propagation_directory = "/media/derick/big_storage/research_backup/DeoResearch/experiments/ZrN_FCC_1.2.0_ediffg_-0.02/posterior_phase_diagram" 

#Create a list of dictionaries that will be used to create the propagation directories
#This dictionary is formatted with keys that are expected by the sgcmc_casm_project_creator function defined below. 
propagation_info_dicts = [{"sample_index":int(i), 
"template_project_root_path":'/media/derick/big_storage/research_backup/DeoResearch/experiments/ZrN_FCC_1.2.0_propagation/template_project', 
"eci":cl.upscale_eci_vector(eci[i,:],upscaling_vector), 
"propagation_directory":propagation_directory} for i in range(eci.shape[0])]

In [None]:
#create a linearly spaced array of mu values. 
#For each mu value, a  LTE, heating and cooling run will be performed. 
 
#mu_list = np.linspace(-1.5,1.5,31).tolist()
#Create a new mu list that runs between 0.31 to 0.49, incrementing by 0.01
#mu_list = np.linspace(0.31,0.49,19).tolist()
#mu_list = np.linspace(0.01,0.29,29).tolist()
mu_list = np.linspace(-0.29,-0.21,9).tolist()

print(mu_list)

#Create a linearly spaced array of temperature values. 
#For each temperature value, two  constant temperature runs will be performed, one from low to high chemical potential, and one from high to low chemical potential.
#Often, it is only necessary to do one high temperature pair. 
T_list = [1700]
print(T_list)


In [None]:
#Define a setup function that the gridspace manager will use to set up heating, cooling, LTE and constant temperature runs.
#Specific details such as supercell shape, chemical potential ranges, temperature ranges, walltime, etc. are defined here. 


def sgcmc_setup(casm_root_path: str):
    """A very specific function: Writes all necessary files for heating and cooling runs for the ground state at 50% composition. This includes:
        -High temperature constant t runs from very low to very high chemical potential and very high to very low chemical potential
        -Cooling runs from the high temperature constant t runs
        -Low Temperature Expansion (LTE) runs
        -Heating runs that initialize from LTE runs


    Parameters
    ----------
    casm_root_path: str
        Path to the casm project root

    Returns
    -------
    None
    """

    # Create a dj.gridspace_manager object to control lte runs. Then format the run directories.
    lte_dir = os.path.join(casm_root_path, "grand_canonical_monte_carlo/MC_LTE")
    lte_param_list_of_dicts = [
        {
            "mu_start": mu,
            "mu_stop": mu,
            "mu_increment": 0.0,
            "T_start": 40.0,
            "T_stop": 40.0,
            "T_increment": 0.0,
            "supercell": [[24, 0, 0], [0, 24, 0], [0, 0, 24]],
            "hours": 24,
        }
    for mu in mu_list]
    lte_gs = dj.gridspace_manager(
        origin_dir=lte_dir,
        namer=mc.mc_run_namer,
        run_creator=mc.mc_lte_run_creator,
        status_updater=mc.mc_status_updater,
        run_submitter=mc.mc_run_submitter,
        grid_params=lte_param_list_of_dicts,
    )
    lte_gs.format_run_dirs()

    # Create a dj.gridspace_manager object to control high temperature constant t runs. Then format the run directories.
    t_const_dir = os.path.join(casm_root_path, "grand_canonical_monte_carlo/MC_t_const")
    t_const_param_list_of_dicts = [
        {
            "mu_start": 1.5,
            "mu_stop": -1.5,
            "mu_increment": -0.01,
            "T_start": T,
            "T_stop": T,
            "T_increment": 0.0,
            "supercell": [[24, 0, 0], [0, 24, 0], [0, 0, 24]],
            "hours": 50,
        }
    for T in T_list]

    t_const_param_list_of_dicts += [
        {
            "mu_start": -1.5,
            "mu_stop": 1.5,
            "mu_increment": 0.01,
            "T_start": T,
            "T_stop": T,
            "T_increment": 0.0,
            "supercell": [[24, 0, 0], [0, 24, 0], [0, 0, 24]],
            "hours": 50,
        }
    for T in T_list]
    t_const_gs = dj.gridspace_manager(
        origin_dir=t_const_dir,
        namer=mc.mc_run_namer,
        run_creator=mc.mc_run_creator,
        status_updater=mc.mc_status_updater,
        run_submitter=mc.mc_run_submitter,
        grid_params=t_const_param_list_of_dicts,
    )
    t_const_gs.format_run_dirs()

    # Create a dj.gridspace_manager object to control cooling runs
    cooling_dir = os.path.join(casm_root_path, "grand_canonical_monte_carlo/MC_cooling")
    cooling_param_list_of_dicts = [
        {
            "mu_start": mu,
            "mu_stop": mu,
            "mu_increment": 0.0,
            "T_start": 1700.0,
            "T_stop": 40.0,
            "T_increment": -10.0,
            "supercell": [[24, 0, 0], [0, 24, 0], [0, 0, 24]],
            "hours": 36,
        }
    for mu in mu_list]
    cooling_gs = dj.gridspace_manager(
        origin_dir=cooling_dir,
        namer=mc.mc_run_namer,
        run_creator=mc.mc_run_creator,
        status_updater=mc.mc_status_updater,
        run_submitter=mc.mc_run_submitter,
        grid_params=cooling_param_list_of_dicts,
    )
    cooling_gs.format_run_dirs()

    #Make sure the cooling run initializes from the constant temperature run with the closest chemical potential value
    #Assuming there are only two constant temperature runs, that they are at the high temperature, and that they cover the same chemical potential
    #values (in opposite orders)
    #First, look up the constant temperature run with the closest starting chemical potential. 
    #Then, find the index of the chemical potential with the closest value in the cooling run. 
    #This index marks the conditions directory that the cooling run should initialize from.
    
    t_const_runs = np.array(glob(os.path.join(t_const_dir, "mu_*")))
    t_const_mu = []
    t_const_temperatures = []
    for t_const_run in t_const_runs:
        mu_temporary = mc.read_mc_settings(os.path.join(t_const_run, "mc_settings.json"))[0]
        temperature_temporary = mc.read_mc_settings(os.path.join(t_const_run, "mc_settings.json"))[1][0]
        t_const_mu.append(mu_temporary)
        t_const_temperatures.append(temperature_temporary)
    t_const_mu = np.array(t_const_mu)
    t_const_temperatures = np.array(t_const_temperatures)

    #find the indices of t_const_temperatures that are equal to 1700 
    t_const_1700_indices = np.where(t_const_temperatures == 1700)[0]

    #Downsample t_const_mu to only include the mu values at 1700 K
    t_const_mu_1700 = t_const_mu[t_const_1700_indices]

    #Iterate through all cooling runs
    for cooling_run_path in glob(os.path.join(cooling_dir, "mu_*")):
        #Get the chemical potential from the cooling run
        cooling_mu = mc.read_mc_settings(os.path.join(cooling_run_path, "mc_settings.json"))[0][0]

        #Find the index of the constant temperature run with the closest initial chemical potential
        closest_t_const_index = 0

        #Find the closest conditions index in the constant temperature run to initialize the cooling run from. 
        closest_conditions_index = np.argmin(np.abs(t_const_mu_1700[closest_t_const_index] - cooling_mu))

        #First, check that the closest conditions file exists. Raise a warning if it does not.
        if not os.path.exists(os.path.join(t_const_runs[t_const_1700_indices][closest_t_const_index], "conditions.%d/final_state.json" % closest_conditions_index)):
            print("The closest conditions file does not exist. Check that the constant temperature runs have been run.")
        else:
            #Write the closest conditions index to the cooling run's mc_settings.json file
            with open(os.path.join(cooling_run_path, "mc_settings.json"), "r") as f:
                cooling_settings = json.load(f)
            cooling_settings["driver"]["motif"]["configdof"] = os.path.join(
                t_const_runs[closest_t_const_index], "conditions.%d/final_state.json" % closest_conditions_index
            )
            cooling_settings["driver"]["motif"].pop("configname", None)
            cooling_settings["driver"]["motif"].pop("_configname", None)
            cooling_settings["driver"]["motif"].pop("_configdof", None)
            with open(os.path.join(cooling_run_path, "mc_settings.json"), "w") as f:
                json.dump(cooling_settings, f, indent=4)


    # Create a dj.gridspace_manager object to control heating runs
    heating_dir = os.path.join(casm_root_path, "grand_canonical_monte_carlo/MC_heating")
    heating_param_list_of_dicts = [
        {
            "mu_start": mu,
            "mu_stop": mu,
            "mu_increment": 0.0,
            "T_start": 40.0,
            "T_stop": 1700.0,
            "T_increment": 10.0,
            "supercell": [[24, 0, 0], [0, 24, 0], [0, 0, 24]],
            "hours": 36,
        }
    for mu in mu_list]
    heating_gs = dj.gridspace_manager(
        origin_dir=heating_dir,
        namer=mc.mc_run_namer,
        run_creator=mc.mc_run_creator,
        status_updater=mc.mc_status_updater,
        run_submitter=mc.mc_run_submitter,
        grid_params=heating_param_list_of_dicts,
    )
    heating_gs.format_run_dirs()

def sgcmc_casm_project_creator(
    propagation_info_dict, propagation_project_root_path
):
    """Copies a pre-templated casm project, writes a specific eci vector to
    project_root/cluster_expansions/clex.formation_energy/calctype.default/ref.default/bset.default/eci.default/eci.json
    and creates all standard directories for typical grand canonical monte carlo simulations.

    Parameters
    ----------
    propagation_info_dict : dict
        Dictionary containing the following keys:
            'template_project_root_path' : str
                Path to the casm project root
            'markov_chain_index' : int
                Index of the eci selection in the posterior markov chain to be used. Also decides the name of the propagaiton directory.
            'eci' : np.ndarray
                ECI vector to write to the casm project
            'propagation_directory' : str
                Path to the directory which will contain all the propagation directories.

    Returns
    -------
    None
    """

    # Copy the template project to the propagation directory, and name it according to the markov chain index.
    template_project_root_path = propagation_info_dict["template_project_root_path"]
    os.system(
        "cp -r "
        + template_project_root_path
        + "/."
        + " "
        + propagation_project_root_path
    )

    # Load basis.json
    basis_json_path = os.path.join(
        template_project_root_path, "basis_sets/bset.default/basis.json"
    )
    with open(basis_json_path, "r") as f:
        basis_dict = json.load(f)

    # Append eci to the basis dictionary
    eci = propagation_info_dict["eci"]
    eci_dict = cio.append_ECIs_to_basis_data(ecis=eci, basis_data=basis_dict)

    # Write the dictionary to eci.json within the new project.
    if (
        os.path.isfile(
            os.path.join(
                propagation_project_root_path,
                "cluster_expansions/clex.formation_energy/calctype.default/ref.default/bset.default/eci.default/eci.json",
            )
        )
        == False
    ):
        with open(
            os.path.join(
                propagation_project_root_path,
                "cluster_expansions/clex.formation_energy/calctype.default/ref.default/bset.default/eci.default/eci.json",
            ),
            "w",
        ) as f:
            json.dump(eci_dict, f)
    else:
        print("ECI file already exists. Skipping.")
        
    # Create a grand canonical monte carlo directory within the new project.
    os.system(
        "mkdir "
        + os.path.join(propagation_project_root_path, "grand_canonical_monte_carlo")
    )

    # Write the propagation_info_dict to a json file to run_info.json within the grand_canonical_monte_carlo directory.
    tmp_propagation_info_dict = propagation_info_dict.copy()
    tmp_propagation_info_dict["eci"] = tmp_propagation_info_dict["eci"].tolist()
    with open(
        os.path.join(
            propagation_project_root_path, "grand_canonical_monte_carlo/run_info.json",
        ),
        "w",
    ) as f:
        json.dump(tmp_propagation_info_dict, f)
    del tmp_propagation_info_dict

    # If it doesn't exist, create a status.json file in the grand canonical monte carlo directory to keep track of all monte carlo run statuses.
    if not os.path.exists(
        os.path.join(
            propagation_project_root_path, "grand_canonical_monte_carlo/status.json"
        )
    ):
        with open(
            os.path.join(
                propagation_project_root_path, "grand_canonical_monte_carlo/status.json"
            ),
            "w",
        ) as f:
            json.dump({}, f)

    # Create an MC_cooling, MC_heating, MC_LTE, and MC_t_const directories within the new grand canonical monte carlo directory.
    os.system(
        "mkdir "
        + os.path.join(
            propagation_project_root_path, "grand_canonical_monte_carlo/MC_cooling"
        )
    )
    os.system(
        "mkdir "
        + os.path.join(
            propagation_project_root_path, "grand_canonical_monte_carlo/MC_heating"
        )
    )
    os.system(
        "mkdir "
        + os.path.join(
            propagation_project_root_path, "grand_canonical_monte_carlo/MC_LTE"
        )
    )
    os.system(
        "mkdir "
        + os.path.join(
            propagation_project_root_path, "grand_canonical_monte_carlo/MC_t_const"
        )
    )
    # Set up MC runs for my specific project
    sgcmc_setup(propagation_project_root_path)


In [None]:
import djlib.propagation.propagate_gcmc as pg
import djlib.djlib as dj

# Create a gridspace manager to manage the propagation of the grand canonical monte carlo simulations.
#A gridspace manager is an abstracted calss, made to handle many repetitive calculations across gridspaces. 
#Provided with the necessary user-made functions, it will handle directory creation, status updating, job submission, and result parsing. 
#The namer, status updater, submitter and parser are already written and generally applicable. However, the run creator and grid parameters vary from project to project.
#Please see the examples above to get a better idea of how these work. 
#For more information, please see the documentation for the gridspace manager class.

propagation_grid_space_manager = dj.gridspace_manager(origin_dir=propagation_directory,
    namer=pg.propagation_project_namer,
    status_updater=pg.propagation_casm_project_status_updater, 
    run_creator=sgcmc_casm_project_creator, 
    grid_params=propagation_info_dicts,
    run_submitter=pg.propagation_casm_project_submitter,
    run_parser=pg.propagation_project_parser,
    )

In [None]:
#Create and format all directories and files necesary for calculations.
#NOTE: If this function runs while jobs are active in slurm, the job will cancel and must be restarted.
propagation_grid_space_manager.format_run_dirs()

In [None]:
#Update the status of all runs. 
#NOTE: This function does not distinguish between jobs that still running but un-finished, and jobs that are not running and incomplete. Both are considered incomplete.
#NOTE: If the status of a heating run or LTE run is still "not_submitted" and you are sure that you submitted it, there was likely a segmentation fault in CASM. 
#This is a supercell shape issue. For a heating or LTE run at a given chemical potential,CASM will try to find a supercell that has the lowest formation energy. 
#If multiples of this supercell cannot fit inside the supercell specified in the mc_settings.json file, CASM will crash. 
propagation_grid_space_manager.update_status()

In [None]:
#Submit all jobs that are not submitted.
#Additionally, since cooling runs must initialize from high temperature constant temperature runs, 
#the cooling runs will not submit until the constant temperature runs are complete.
#You must first submit the constant temperature runs. Once they are complete, run the format_run_dirs() and update_status() methods again, and then run this method.
propagation_grid_space_manager.run_valid_calculations()

In [None]:
#Collect project data and examine relevant "diagnostic" plots for the propagation project.


from djlib.plotting.mc_plotting import sgcmc_full_project_diagnostic_plots
sample_index = 0  
sample_name = 'sample_index_' + str(sample_index)
project_path = os.path.join(propagation_directory, sample_name)
print(project_path)

#Collect data. By default, the parser will wait until all jobs are complete before parsing.
#You should NOT make a phase diagram with incomplete data. 
#If you only want to visualize cooling plots for a project, you can set incomplete_override=True to parse incomplete data.
data = pg.propagation_project_parser(project_path, incomplete_override=True)
print(data.keys())
integrated_data = mc.full_project_integration(data)

#Plot diagnostic plots.
fig = sgcmc_full_project_diagnostic_plots(integrated_data,show_legends=False)
plt.show()



In [None]:
#Write the integrated data to a pickle file. This contains all thermodynamic data from all runs in the project. 
import pickle
with open(os.path.join(project_path, 'ZrN_FCC_integrated_data.pkl'), 'wb') as f:
    pickle.dump(integrated_data, f)
    