You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm sorry that there is a program error report and I would like to ask you for advice. When running bash run_modelfree_rl.sh DQN, a connection error occurs. The error message is as follows:
2022-11-15 08:19:12,029 INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 0.4361 GB (100000.0 batches of size 1, 4361 bytes each), available system memory is 201.44095232 GB
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_1/kernel:0' shape=(256, 64) dtype=float32>
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_1/bias:0' shape=(64,) dtype=float32>
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_out/kernel:0' shape=(64, 284) dtype=float32>
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_out/bias:0' shape=(284,) dtype=float32>
2022-11-15 08:19:14,846 INFO multi_gpu_impl.py:143 -- Training on concatenated sample batches:
2022-11-15 08:19:14,846 INFO multi_gpu_impl.py:188 -- Divided 576 rollout sequences, each of length 1, among 1 devices.
Traceback (most recent call last):
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 438, in _error_catcher
yield
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 519, in read
data = self._fp.read(amt) if not fp_closed else b""
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/http/client.py", line 463, in read
n = self.readinto(b)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/http/client.py", line 507, in readinto
n = self.fp.readinto(b)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/socket.py", line 586, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/models.py", line 760, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 576, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 541, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/contextlib.py", line 99, in exit
self.gen.throw(type, value, traceback)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 455, in _error_catcher
raise ProtocolError("Connection broken: %r" % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionResetError(104, 'Connection reset by peer')", ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "modelfree_train.py", line 429, in
result = trainer.train()
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 643, in train
raise e
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 629, in train
result = Trainable.train(self)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/tune/trainable.py", line 237, in train
result = self.step()
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 170, in step
res = next(self.train_exec_impl)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 756, in next
return next(self.built_iterator)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 1075, in build_union
item = next(it)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 756, in next
return next(self.built_iterator)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/execution/rollout_ops.py", line 75, in sampler
yield workers.local_worker().sample()
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py", line 739, in sample
batches = [self.input_reader.next()]
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 101, in next
batches = [self.get_data()]
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 231, in get_data
item = next(self.rollout_provider)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 615, in _env_runner
sample_collector=sample_collector,
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 934, in _process_observations
env_id)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/env/base_env.py", line 368, in try_reset
return {_DUMMY_AGENT_ID: self.vector_env.reset_at(env_id)}
File "/home/wlxy/userfolder/RL4RS/rl4rs/utils/rllib_vector_env.py", line 44, in reset_at
self.reset_cache = self.env.reset()
File "/home/wlxy/userfolder/RL4RS/rl4rs/server/httpEnv.py", line 43, in reset
observation = self.client.env_reset(self.instance_id)
File "/home/wlxy/userfolder/RL4RS/rl4rs/server/gymHttpClient.py", line 67, in env_reset
resp = self._post_request(route, None)
File "/home/wlxy/userfolder/RL4RS/rl4rs/server/gymHttpClient.py", line 43, in _post_request
data=json.dumps(data))
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/sessions.py", line 577, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/sessions.py", line 529, in request
resp = self.send(prep, **send_kwargs)
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/sessions.py", line 687, in send
r.content
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/models.py", line 838, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/models.py", line 763, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(104, 'Connection reset by peer')", ConnectionResetError(104, 'Connection reset by peer'))
I would like to ask for your help, thank you very much.
The text was updated successfully, but these errors were encountered:
Many reasons may cause ConnectionResetError. I want to confirm whether this error is reported at the beginning or after a period of successful training?
I'm sorry that there is a program error report and I would like to ask you for advice. When running bash run_modelfree_rl.sh DQN, a connection error occurs. The error message is as follows:
2022-11-15 08:19:12,029 INFO replay_buffer.py:46 -- Estimated max memory usage for replay buffer is 0.4361 GB (100000.0 batches of size 1, 4361 bytes each), available system memory is 201.44095232 GB
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_1/kernel:0' shape=(256, 64) dtype=float32>
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_1/bias:0' shape=(64,) dtype=float32>
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_out/kernel:0' shape=(64, 284) dtype=float32>
2022-11-15 08:19:14,843 INFO tf_policy.py:712 -- Optimizing variable <tf.Variable 'default_policy/fc_out/bias:0' shape=(284,) dtype=float32>
2022-11-15 08:19:14,846 INFO multi_gpu_impl.py:143 -- Training on concatenated sample batches:
{ 'inputs': [ np.ndarray((576, 540), dtype=float32, min=-1.0, max=37.179, mean=-0.169),
np.ndarray((576, 540), dtype=float32, min=-1.0, max=38.907, mean=-0.207),
np.ndarray((576,), dtype=int64, min=1.0, max=283.0, mean=103.844),
np.ndarray((576,), dtype=float32, min=0.0, max=162.121, mean=7.551),
np.ndarray((576,), dtype=bool, min=0.0, max=1.0, mean=0.135),
np.ndarray((576,), dtype=float64, min=1.0, max=1.0, mean=1.0)],
'placeholders': [ <tf.Tensor 'default_policy/obs:0' shape=(?, 540) dtype=float32>,
<tf.Tensor 'default_policy/new_obs:0' shape=(?, 540) dtype=float32>,
<tf.Tensor 'default_policy/action:0' shape=(?,) dtype=int64>,
<tf.Tensor 'default_policy/rewards:0' shape=(?,) dtype=float32>,
<tf.Tensor 'default_policy/dones:0' shape=(?,) dtype=float32>,
<tf.Tensor 'default_policy/weights:0' shape=(?,) dtype=float32>],
'state_inputs': []}
2022-11-15 08:19:14,846 INFO multi_gpu_impl.py:188 -- Divided 576 rollout sequences, each of length 1, among 1 devices.
Traceback (most recent call last):
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 438, in _error_catcher
yield
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 519, in read
data = self._fp.read(amt) if not fp_closed else b""
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/http/client.py", line 463, in read
n = self.readinto(b)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/http/client.py", line 507, in readinto
n = self.fp.readinto(b)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/socket.py", line 586, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/models.py", line 760, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 576, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 541, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/contextlib.py", line 99, in exit
self.gen.throw(type, value, traceback)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/urllib3/response.py", line 455, in _error_catcher
raise ProtocolError("Connection broken: %r" % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionResetError(104, 'Connection reset by peer')", ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "modelfree_train.py", line 429, in
result = trainer.train()
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 643, in train
raise e
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 629, in train
result = Trainable.train(self)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/tune/trainable.py", line 237, in train
result = self.step()
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 170, in step
res = next(self.train_exec_impl)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 756, in next
return next(self.built_iterator)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 1075, in build_union
item = next(it)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 756, in next
return next(self.built_iterator)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/execution/rollout_ops.py", line 75, in sampler
yield workers.local_worker().sample()
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py", line 739, in sample
batches = [self.input_reader.next()]
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 101, in next
batches = [self.get_data()]
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 231, in get_data
item = next(self.rollout_provider)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 615, in _env_runner
sample_collector=sample_collector,
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 934, in _process_observations
env_id)
File "/home/wlxy/anaconda3/envs/rl4rs/lib/python3.6/site-packages/ray/rllib/env/base_env.py", line 368, in try_reset
return {_DUMMY_AGENT_ID: self.vector_env.reset_at(env_id)}
File "/home/wlxy/userfolder/RL4RS/rl4rs/utils/rllib_vector_env.py", line 44, in reset_at
self.reset_cache = self.env.reset()
File "/home/wlxy/userfolder/RL4RS/rl4rs/server/httpEnv.py", line 43, in reset
observation = self.client.env_reset(self.instance_id)
File "/home/wlxy/userfolder/RL4RS/rl4rs/server/gymHttpClient.py", line 67, in env_reset
resp = self._post_request(route, None)
File "/home/wlxy/userfolder/RL4RS/rl4rs/server/gymHttpClient.py", line 43, in _post_request
data=json.dumps(data))
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/sessions.py", line 577, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/sessions.py", line 529, in request
resp = self.send(prep, **send_kwargs)
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/sessions.py", line 687, in send
r.content
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/models.py", line 838, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/home/wlxy/.local/lib/python3.6/site-packages/requests/models.py", line 763, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(104, 'Connection reset by peer')", ConnectionResetError(104, 'Connection reset by peer'))
I would like to ask for your help, thank you very much.
The text was updated successfully, but these errors were encountered: