In [None]:
import os
import csv
import subprocess

In [None]:
output_location = "/vera/u/jerbo/my_ptmp/L25n128_suite"
run_location = "/vera/u/jerbo/TNG-arepo/run/L25n128_suite"
template_location = run_location + "/template"

os.mkdir(output_location + "/test")
os.chdir(output_location)
os.getcwd()
print(os.listdir())
os.rmdir(output_location + "/test")
print(os.listdir())

In [None]:
assert False, "This cell is here to stop an accidental run all"

In [None]:
assert False, "Don't run this again"

######################## read grid file ###########################
cosmo_parameters = []

header = True
with open('grid_lhs_constrained.csv', newline='') as f:
    file = csv.reader(f, delimiter=',')
    for row in file:
        if header:
            header = False
            continue
        cosmo_parameters.append([float(i) for i in row])
        
# cosmo_parameters = cosmo_parameters[:10] # ATTETION: ONLY FOR TESTING!! remove this for finished script

##################### loop over grid points ######################

for counter, (Omega_m, Omega_b, Omega_L, hubble_par) in enumerate(cosmo_parameters):
    print("----------------------------------")
    print("Grid Point Nr.", counter)
    print(f"Omega_m = {Omega_m:.3f}, Omega_b = {Omega_b:.3f}, Omega_L = {Omega_L:.3f}, h = {hubble_par:.3f}")
    
    # create output directory
    output_gridpoint = output_location+f"/gridpoint{counter}"
    os.mkdir(output_gridpoint)
    
    # create run directory
    run_gridpoint = run_location+f"/gridpoint{counter}"
    os.mkdir(run_gridpoint)
    
    # copy template to run directory
    cmd = f"cp -r {template_location}/* {run_gridpoint}"
    os.system(cmd)
    
    # check if all expected files are there
    visible_files_template = [file for file in os.listdir(template_location) if not file.startswith('.')]
    visible_files_run = [file for file in os.listdir(run_gridpoint) if not file.startswith('.')]
    if not visible_files_template == visible_files_run:
        print("Error! -> Copying template failed")
        print("Skipping this gridpoint ...")
        continue
    
    # edit param.txt to match gridpoint values
    path_to_param_file = run_gridpoint + "/param_L25n128.txt"
    
    file_content = []
    with open(path_to_param_file, "r") as file:
        for row in file:
            if "Omega0" in row:
                row = f"Omega0	              {Omega_m:.4f}\n"
            if "OmegaLambda" in row:
                row = f"OmegaLambda           {Omega_L:.4f}\n"
            if "OmegaBaryon" in row:
                row = f"OmegaBaryon           {Omega_b:.4f}\n"
            if "HubbleParam" in row:
                row = f"HubbleParam           {hubble_par:.4f}\n"
            if "OutputDir" in row:
                row = f"OutputDir           {output_gridpoint}\n"
            file_content.append(row)

    with open(path_to_param_file, "w") as file:
        for row in file_content:
            file.write(row)
            
    # check if the edits have worked in 
    error = False
    with open(path_to_param_file, "r") as file:
        for row in file:
            if "Omega0" in row:
                if not row.split()[-1] == f"{Omega_m:.4f}":
                    print(f"Error! -> Omega0 not correctly set in {path_to_param_file}")
                    error = True
            if "OmegaLambda" in row:
                if not row.split()[-1] == f"{Omega_L:.4f}":
                    print(f"Error! -> OmegaL not correctly set in {path_to_param_file}")
                    error = True
            if "OmegaBaryon" in row:
                if not row.split()[-1] == f"{Omega_b:.4f}":
                    print(f"Error! -> OmegaB not correctly set in {path_to_param_file}")
                    error = True
            if "HubbleParam" in row:
                if not row.split()[-1] == f"{hubble_par:.4f}":
                    print(f"Error! -> HubblePar not correctly set in {path_to_param_file}")
                    error = True
            if "OutputDir" in row:
                if not row.split()[-1] == output_gridpoint:
                    print(f"Error! -> OutputDir not correctly set in {path_to_param_file}")
                    error = True
    
    if error:
        print("Skipping this gridpoint ...")
        continue
    else:
        print(f"{path_to_param_file} was edited successfully!")
            
    # edit param_muscic.txt to match gridpoint values
    path_to_param_music_file = run_gridpoint + "/param_music.txt"
    
    file_content = []
    with open(path_to_param_music_file, "r") as file:
        for row in file:
            if "Omega_m" in row:
                row = f"Omega_m           = {Omega_m:.4f}\n"
            if "Omega_L" in row:
                row = f"Omega_L           = {Omega_L:.4f}\n"
            if "Omega_b" in row:
                row = f"Omega_b           = {Omega_b:.4f}\n"
            if "H0" in row:
                row = f"H0                = {100*hubble_par:.2f}\n"
            file_content.append(row)

    with open(path_to_param_music_file, "w") as file:
        for row in file_content:
            file.write(row)
            
    # check if the edits have worked
    error = False
    with open(path_to_param_music_file, "r") as file:
        for row in file:
            if "Omega_m" in row:
                if not row.split()[-1] == f"{Omega_m:.4f}":
                    print(f"Error! -> Omega0 not correctly set in {path_to_param_music_file}")
                    error = True
            if "Omega_L" in row:
                if not row.split()[-1] == f"{Omega_L:.4f}":
                    print(f"Error! -> OmegaL not correctly set in {path_to_param_music_file}")
                    error = True
            if "Omega_b" in row:
                if not row.split()[-1] == f"{Omega_b:.4f}":
                    print(f"Error! -> OmegaB not correctly set in {path_to_param_music_file}")
                    error = True
            if "H0" in row:
                if not row.split()[-1] == f"{100*hubble_par:.2f}":
                    print(f"Error! -> HubblePar not correctly set in {path_to_param_music_file}")
                    error = True
    
    if error:
        print("Skipping this gridpoint ...")
        continue
    else:
        print(f"{path_to_param_music_file} was edited successfully!")
        
    # run custom_create.py to make the initial conditions
    os.chdir(run_gridpoint)
    cmd = f"python3 custom_create.py {run_gridpoint}"
    os.system(cmd)
    
    # check if ics.hdf5 file exists
    files_in_run_dir = os.listdir(run_gridpoint)
    if "ics.hdf5" in files_in_run_dir:
        print("ICs were sucessfully created!")
    else:
        print("Error! -> IC creation failed")
        print("Skipping this gridpoint ...")
        continue
    
    # edit name in script.slurm
    path_to_slurm_script = run_gridpoint + "/script.slurm"
    
    file_content = []
    with open(path_to_slurm_script, "r", encoding="utf-8") as file:
        for row in file:
            if "SBATCH -J" in row:
                row = f"#SBATCH -J CLIMB-GP-{counter}\n"
            file_content.append(row)

    with open(path_to_slurm_script, "w") as file:
        for row in file_content:
            file.write(row)
            
    # submit the job script to slurm
    slurm_script = "script.slurm"
    result = subprocess.run(["sbatch", slurm_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    sbatch_output = result.stdout.strip()
    
    with open(run_location+"/slurm_job_ids.txt", "a") as myfile:
        myfile.write(f"{counter}: {sbatch_output}\n")

In [None]:
# check which of the jobs are still pending
restart_job_ids = []
with open(run_location+"/slurm_job_ids.txt", "r") as file:
    for row in file:
        count = int(row.split()[0][:-1])
        job_id = row.split()[-1]
        print("GridPoint", count)
        result = subprocess.run(["sacct", "-j", f"{job_id}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
        sbatch_output = result.stdout.strip()
        if "PENDING" in sbatch_output:
            print(sbatch_output)
            restart_job_ids.append([count, job_id])
        print("---------------------------------")

In [None]:
print(restart_job_ids)

In [None]:
# cancle the pending jobs
for gp, job_id in restart_job_ids:
    print(gp, job_id)
    os.system(f"scancel {job_id}")

In [None]:
assert False, "don't run this"
# change the number of nodes and allowed time for the previously cancled jobs
for gp, job_id in restart_job_ids:
    run_gridpoint_path = run_location + f"/gridpoint{gp}"
    
    path_to_slurm_script = run_gridpoint_path + "/script.slurm"
    
    file_content = []
    with open(path_to_slurm_script, "r", encoding="utf-8") as file:
        for row in file:
            if "SBATCH --nodes=" in row:
                row = f"#SBATCH --nodes=1\n"
            file_content.append(row)

    with open(path_to_slurm_script, "w") as file:
        for row in file_content:
            file.write(row)
            
    path_to_param_file = run_gridpoint_path + "/param_L25n128.txt"
    
    file_content = []
    with open(path_to_param_file, "r") as file:
        for row in file:
            if "TimeLimitCPU" in row:
                row = f"TimeLimitCPU           36000\n"
            file_content.append(row)

    with open(path_to_param_file, "w") as file:
        for row in file_content:
            file.write(row)

In [None]:
assert False, "Don't run this again either"
# resubmit the jobs
for gp, job_id in restart_job_ids:
    run_gridpoint_path = run_location + f"/gridpoint{gp}"
    os.chdir(run_gridpoint_path)
    
    slurm_script = "script.slurm"
    result = subprocess.run(["sbatch", slurm_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    sbatch_output = result.stdout.strip()
    
    with open(run_location+"/slurm_job_ids.txt", "a") as myfile:
        myfile.write(f"{gp}: {sbatch_output}\n")

In [None]:
# check up on status
successfull_runs = []
with open(run_location+"/slurm_job_ids.txt", "r") as file:
    for row in file:
        count = int(row.split()[0][:-1])
        job_id = row.split()[-1]
        print("GridPoint", count)
        result = subprocess.run(["sacct", "-j", f"{job_id}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
        sbatch_output = result.stdout.strip()
        if not "FAILED" in sbatch_output and not "PENDING" in sbatch_output and not "CANCELLED" in sbatch_output and not "RUNNING" in sbatch_output:
            print(sbatch_output)
            successfull_runs.append(count)
        print("---------------------------------")
print(successfull_runs)

In [None]:
# check status of all runs:
completed_runs = []
failed_runs = []
cancelled_runs = []
still_running = []
pending_runs = []

with open(run_location+"/slurm_job_ids.txt", "r") as file:
    for row in file:
        count = int(row.split()[0][:-1])
        job_id = row.split()[-1]
        print("GridPoint", count)
        result = subprocess.run(["sacct", "-j", f"{job_id}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
        sbatch_output = result.stdout.strip()
        
        if "FAILED" in sbatch_output:
            print("Failed")
            print(sbatch_output)
            failed_runs.append(count)
        elif "CANCELLED" in sbatch_output:
            print("Cancelled")
            print(sbatch_output)
            cancelled_runs.append(count)
        elif "RUNNING" in sbatch_output:
            print("running")
            print(sbatch_output)
            still_running.append(count)
        elif "PENDING" in sbatch_output:
            print("pending")
            print(sbatch_output)
            pending_runs.append(count)
        if not "FAILED" in sbatch_output and not "PENDING" in sbatch_output and not "CANCELLED" in sbatch_output and not "RUNNING" in sbatch_output:
            print("success")
            print(sbatch_output)
            completed_runs.append(count)
        print("---------------------------------")

In [None]:
print(f"Completed runs: {len(completed_runs)}")
print(f"Failed runs: {len(failed_runs)}")
print(f"Cancelled runs: {len(cancelled_runs)}")
print(f"Still running: {len(still_running)}")
print(f"Pending runs: {len(pending_runs)}")

In [None]:
import numpy as np
grid_csv_file_path = "/vera/u/jerbo/code/TNG-arepo-scripts/grid_lhs_constrained.csv"
grid_point_indices = [i for i in failed_runs]

with open(grid_csv_file_path, newline='') as f:
    reader = csv.reader(f, delimiter=',')
    header = next(reader, None)
    interestingrows=[row for idx, row in enumerate(reader) if idx in grid_point_indices]

for i in range(len(interestingrows)):
    for j in range(len(interestingrows[i])):
        interestingrows[i][j] = float(interestingrows[i][j])
    
interestingrows = np.array(interestingrows).T
print("-------------- Analysis of failed runs --------------")
for i, x in enumerate(interestingrows):
    print("Parameter:", header[i])
    print(f"Mean = {x.mean():.5f}")
    print(f"std  = {x.std():.5f}")
    print("")
    print("List of values:")
    print(x)
    print("-----------------------------")

In [None]:
assert False, "Dont run this again"
for gp in failed_runs:
    run_gridpoint_path = run_location + f"/gridpoint{gp}"
    
    path_to_slurm_script = run_gridpoint_path + "/script.slurm"
    
    file_content = []
    with open(path_to_slurm_script, "r", encoding="utf-8") as file:
        for row in file:
            if "SBATCH --nodes=" in row:
                row = f"#SBATCH --nodes=1\n"
            file_content.append(row)

    with open(path_to_slurm_script, "w") as file:
        for row in file_content:
            file.write(row)
            
    path_to_param_file = run_gridpoint_path + "/param_L25n128.txt"
    
    file_content = []
    with open(path_to_param_file, "r") as file:
        for row in file:
            if "TimeLimitCPU" in row:
                row = f"TimeLimitCPU           36000\n"
            file_content.append(row)

    with open(path_to_param_file, "w") as file:
        for row in file_content:
            file.write(row)

In [None]:
assert False
#os.chdir("/vera/ptmp/gc/jerbo/babysitter_script_test_playground")
#assert os.getcwd() == "/vera/ptmp/gc/jerbo/babysitter_script_test_playground"
os.system("rm -rf /vera/ptmp/gc/jerbo/babysitter_script_test_playground/test_folder/*")

In [None]:
assert False
for gp in failed_runs:
    out_gridpoint_path = output_location + f"/gridpoint{gp}"
    cmd = f"rm -rf {out_gridpoint_path}/*"
    print(cmd)
    os.system(cmd)

In [None]:
assert False, "Dont run this again"
for gp in failed_runs:
    run_gridpoint_path = run_location + f"/gridpoint{gp}"
    os.chdir(run_gridpoint_path)
    
    slurm_script = "script.slurm"
    result = subprocess.run(["sbatch", slurm_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    sbatch_output = result.stdout.strip()
    
    with open(run_location+"/slurm_job_ids.txt", "a") as myfile:
        myfile.write(f"{gp}: {sbatch_output}\n")