Skip to content

Commit

Permalink
Revert "fix launch when elastic run (PaddlePaddle#61847) (PaddlePaddl…
Browse files Browse the repository at this point in the history
…e#61878)"

This reverts commit f09d9d8.
  • Loading branch information
hanhaowen-mt committed May 13, 2024
1 parent ee178fe commit f6964b4
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions python/paddle/distributed/launch/controllers/collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ def build_pod(self):
):
return self._build_pod_with_args()
else:
if self.ctx.args.auto_parallel_config is None:
skip_run = True
# only when skip_run is Flase, should not reset pod
return self._build_pod_with_master(skip_run)
return self._build_pod_with_master()

def _build_pod_with_tuner(self):
auto_parallel_config = self.ctx.args.auto_parallel_config
Expand Down Expand Up @@ -151,7 +148,7 @@ def _build_pod_with_args(self):

return True

def _build_pod_with_master(self, reset_pod=True):
def _build_pod_with_master(self):
self.pod.replicas = self.pod_replicas()

# rank will be reset when restart
Expand Down Expand Up @@ -206,8 +203,7 @@ def _build_pod_with_master(self, reset_pod=True):

job_endpoints = [i['endpoints'] for i in peer_list]

if reset_pod:
self.pod.reset()
# self.pod.reset()
selected_dev_key = self.ctx.node.device.get_selected_device_key()
selected_dev_list = self.ctx.node.device.get_selected_devices(
self.ctx.args.devices
Expand Down

0 comments on commit f6964b4

Please sign in to comment.