Skip to content

Commit

Permalink
Merge branch 'aistairc:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
aramoto99 committed Jan 29, 2024
2 parents 361d042 + 353a4e0 commit 2b91899
Show file tree
Hide file tree
Showing 110 changed files with 1,186 additions and 1,150 deletions.
2 changes: 0 additions & 2 deletions aiaccel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
tensorboard,
util,
workspace,
wrapper_tools,
)
from aiaccel.util.aiaccel import Run

Expand All @@ -21,7 +20,6 @@
"module",
"parameter",
"workspace",
"wrapper_tools",
"abci",
"cli",
"optimizer",
Expand Down
2 changes: 0 additions & 2 deletions aiaccel/abci/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from aiaccel.abci.batch import create_abci_batch_file
from aiaccel.abci.qstat import parse_job_list, parse_qstat
from aiaccel.abci.qsub import create_qsub_command

__all__ = [
"create_abci_batch_file",
"parse_job_list",
"parse_qstat",
"create_qsub_command",
Expand Down
136 changes: 0 additions & 136 deletions aiaccel/abci/batch.py

This file was deleted.

13 changes: 11 additions & 2 deletions aiaccel/abci/qsub.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from omegaconf.dictconfig import DictConfig
from omegaconf.listconfig import ListConfig

from aiaccel.common import dict_output
from aiaccel.common import dict_stderr, dict_stdout

""" Example of stat
stat = {
Expand Down Expand Up @@ -36,7 +36,16 @@ def create_qsub_command(config: DictConfig, runner_file: Path) -> list[str]:
path = Path(config.generic.workspace).resolve()
job_execution_options = config.ABCI.job_execution_options

command = ["qsub", "-g", f"{config.ABCI.group}", "-j", "y", "-o", f"{path / dict_output}", str(runner_file)]
command = [
"qsub",
"-g",
f"{config.ABCI.group}",
"-o",
f"{path / dict_stdout}",
"-e",
f"{path / dict_stderr}",
str(runner_file),
]

#
# additional option
Expand Down
11 changes: 9 additions & 2 deletions aiaccel/cli/csv_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class CsvWriter:
def __init__(self, config: DictConfig):
self.config = config
self.workspace = Workspace(self.config.generic.workspace)
self.fp = self.workspace.retults_csv_file
self.fp = self.workspace.result_csv_file
self.trialid = TrialId(self.config)
self.storage = Storage(self.workspace.storage_file_path)
self.lock_file = {"result_txt": str(self.workspace.lock / "result_txt")}
Expand All @@ -49,7 +49,14 @@ def _get_zero_padding_trial_id(self, trial_id: int) -> str:
return self.trialid.zero_padding_any_trial_id(trial_id)

def create(self) -> None:
"""Creates repoprt."""
"""Creates repoprt.
Args:
None
Returns:
None
"""
data = []
header = []

Expand Down
1 change: 0 additions & 1 deletion aiaccel/cli/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def __init__(self, config: DictConfig):
self.workspace = Workspace(config.generic.workspace)
self.storage = Storage(self.workspace.storage_file_path)
self.goals = [item.value for item in config.optimize.goal]

self.cplt = EasyVisualizer()

def plot(self) -> None:
Expand Down
20 changes: 0 additions & 20 deletions aiaccel/cli/set_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def write_results_to_database(
storage_file_path: str | Path,
trial_id: int,
objective: list[str | float | int] | None,
error: str,
returncode: int | None,
start_time: str | None = None,
end_time: str | None = None,
Expand All @@ -27,8 +26,6 @@ def write_results_to_database(
storage.result.set_any_trial_objective(trial_id, objective)
if returncode is not None:
storage.returncode.set_any_trial_returncode(trial_id, returncode)
if error != "":
storage.error.set_any_trial_error(trial_id, error)
if start_time is not None:
storage.timestamp.set_any_trial_start_time(trial_id, start_time)
if end_time is not None:
Expand All @@ -42,7 +39,6 @@ def main() -> None:
parser.add_argument("--storage_file_path", type=str, required=True)
parser.add_argument("--trial_id", type=int, required=True)
parser.add_argument("--objective", nargs="+", type=str_or_float_or_int, default=None)
parser.add_argument("--error", type=str, default="")
parser.add_argument("--returncode", type=int, default=None)

args = parser.parse_known_args()[0]
Expand All @@ -61,33 +57,17 @@ def main() -> None:
"trial_id",
"config",
"objective",
"error",
"returncode",
]

for key in delete_keys:
if key in xs.keys():
del xs[key]

contents = {
"trial_id": args.trial_id,
"result": args.objective,
"parameters": xs,
"returncode": args.returncode,
"error": args.error,
}

if args.error == "":
del contents["error"]

# print(contents)

# create_yaml(args.file, contents)
write_results_to_database(
storage_file_path=args.storage_file_path,
trial_id=args.trial_id,
objective=args.objective,
error=args.error,
returncode=args.returncode,
)

Expand Down
4 changes: 4 additions & 0 deletions aiaccel/cli/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ def main() -> None: # pragma: no cover
if not manager.is_error_free():
break
if int((time.time() - time_s)) % 10 == 0:
returncodes = storage.returncode.get_all_trial_returncode()
if any(item != 0 for item in returncodes):
logger.error("Some trials are failed.")
break
num_ready, num_running, num_finished = storage.get_num_running_ready_finished()
available_pool_size = manager.get_available_pool_size(num_ready, num_running, num_finished)
now = datetime.now()
Expand Down
10 changes: 5 additions & 5 deletions aiaccel/cli/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class Viewer:
config (Config): Config object.
Attributes:
config_path (Path): Path to the config file.
workspace (Workspace): Workspace object.
storage (Storage): Storage object.
"""
Expand All @@ -45,10 +44,11 @@ def view(self) -> None:
job = self.storage.jobstate.get_any_trial_jobstate(trial_id)
result = self.storage.result.get_any_trial_objective(trial_id)

error = str(self.storage.error.get_any_trial_error(trial_id))
if error == "None":
error = ""
else:
error_file = self.workspace.get_error_output_file(trial_id)
error = ""
if error_file.exists():
with open(error_file, "r") as f:
error = f.read()
if len(error) > 4:
error = error[0:3] + "..."

Expand Down
45 changes: 7 additions & 38 deletions aiaccel/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,24 @@
Example: ::
from aiaccel.common import alive_optimizer
from aiaccel.common import dict_lock
"""

alive_optimizer = "optimizer.yml"
alive_manager = "manager.yml"

class_optimizer = "Optimizer"
class_manager = "Manager"

dict_work = "work_aiaccel"
dict_alive = "alive"
dict_pid = "pid"
dict_ready = "ready"
dict_running = "running"
dict_finished = "finished"
dict_hp = "hp"
dict_hp_ready = "hp/ready"
dict_hp_running = "hp/running"
dict_hp_finished = "hp/finished"
dict_srialize = "serialize"
dict_lock = "lock"
dict_log = "log"
dict_error = "error"
dict_output = "abci_output"
dict_jobstate = "jobstate"
dict_result = "result"
dict_stdout = "abci_stdout"
dict_stderr = "abci_stderr"
dict_runner = "runner"
dict_timestamp = "timestamp"
dict_storage = "storage"
dict_tensorboard = "tensorboard"
dict_mpi = "mpi"
dict_rank_log = "rank_log"

extension_hp = "hp"
extension_pickle = "pickle"
extension_resource = "res"
extension_result = "result"

file_configspace = "configspace"
file_final_result = "final_result.result"
file_hyperparameter = "hyperparameter.json"
file_numpy_random = "numpy_random"
file_numpy_random_extension = "npy"
file_final_result = "final_result.yaml"
file_best_result = "best_result.yaml"
file_result_csv = "result.csv"
file_storage = "storage.db"

file_hp_count = "count.txt"
file_hp_count_lock = "count.lock"
Expand All @@ -57,11 +31,6 @@
goal_maximize = "maximize"
goal_minimize = "minimize"

key_module_type = "module_type"
key_path = "path"
key_pid = "pid"
key_project_name = "project_name"

resource_type_local = "local"
resource_type_abci = "abci"
resource_type_mpi = "mpi"
Expand Down
Loading

0 comments on commit 2b91899

Please sign in to comment.