## HyperPod Training SDK Experience

In [None]:
import sys
import warnings

warnings.filterwarnings("ignore")
sys.path.insert(0, '/Users/pintaoz/workspace/private-sagemaker-hyperpod-cli-staging/sagemaker-hyperpod/src/sagemaker')
sys.path

### Create a HyperPodTrainingJob with a full spec

In [None]:
from sagemaker.hyperpod.training import (
    HyperPodPytorchJob,
    Containers,
    ReplicaSpec,
    Resources,
    RunPolicy,
    Spec,
    Template,
)
from sagemaker.hyperpod.common.config import Metadata


nproc_per_node="1"
replica_specs=[
    ReplicaSpec(
        name="pod",
        template=Template(
            spec=Spec(
                containers=[
                    Containers(
                        name="container-name",
                        image="448049793756.dkr.ecr.us-west-2.amazonaws.com/ptjob:mnist",
                        image_pull_policy="Always",
                        resources=Resources(
                            requests={"nvidia.com/gpu": "0"},
                            limits={"nvidia.com/gpu": "0"},
                        ),
                        # command=[]
                    )
                ]
            )
        ),
    )
]
run_policy=RunPolicy(clean_pod_policy="None")

pytorch_job = HyperPodPytorchJob(
    metadata=Metadata(name="demo"),
    nproc_per_node="1",
    replica_specs=replica_specs,
    run_policy=run_policy,
)
pytorch_job.create()

### Get the status of created jobs

In [None]:
import yaml
print("List all jobs:")
print(yaml.dump(HyperPodPytorchJob.list()))

print("Refresh job demo and check the status:")
pytorch_job.refresh()
print(yaml.dump(pytorch_job.status))

### Get model training logs from the pod

In [None]:
print("List all pods created for this job:")
print(pytorch_job.list_pods())

print("Check the logs from pod0:")
print(pytorch_job.get_logs_from_pod("demo-pod-0"))

### Get training operator logs

In [None]:
# get operator logs
print(pytorch_job.get_operator_logs(since_hours=0.1))

### Delete the job

In [None]:
pytorch_job.delete()