Skip to content

Commit

Permalink
fix typo in comments under torch/csrc/distributed (#96062)
Browse files Browse the repository at this point in the history
This PR fixes typos in comments and messages of `.cpp` and `.hpp` files under `torch/csrc/distributed` directory

Pull Request resolved: pytorch/pytorch#96062
Approved by: https://github.com/ngimel
  • Loading branch information
kiszk authored and cyyever committed Mar 12, 2023
1 parent b074b4c commit 1addf8d
Show file tree
Hide file tree
Showing 22 changed files with 32 additions and 32 deletions.
2 changes: 1 addition & 1 deletion torch/csrc/distributed/autograd/engine/dist_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static constexpr const char* kNumAutogradContexts = "num_autograd_contexts";

// This hook does 3 things:
// 1. Call pre hooks of the original AccumulateGrad to modify the input grad.
// 2. Accumuate the guard to RPC context.
// 2. Accumurate the guard to RPC context.
// 3. Call post hooks of the original AccumulateGrad.
class DistAccumulateGradCaptureHook
: public GraphTask::ExecInfo::Capture::GradCaptureHook {
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/autograd/engine/dist_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class TORCH_API DistEngine {
// traverse the GraphTask instead of using the GraphTask embedded
// cpu_ready_queue, this is because dist engine might run the same GraphTask
// from different SendFunctions concurrently in different threads. The method
// will only mark the GraphTask as completed when it needes to, which means it
// will only mark the GraphTask as completed when it needs to, which means it
// might not mark as completed for every call as dist engine would like to
// keep the GraphTask alive when it not receives all gradients.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class TORCH_API RpcWithProfilingResp : public rpc::RpcCommandBase {
std::vector<torch::autograd::profiler::LegacyEvent> profiledEvents,
rpc::ProfilingId profilingId);

// For receving RPCs. Used in from message when converting a message received
// For receiving RPCs. Used in from message when converting a message received
// over the wire.
RpcWithProfilingResp(
rpc::MessageType messageType,
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/c10d/Backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class TORCH_API Backend : public torch::CustomClassHolder {
}

// Gathers a single tensor inputBuffer into a single buffer outputBuffer that
// is interpreted as a contigious collection of size inputBuffer * WORLD_SIZE.
// is interpreted as a contiguous collection of size inputBuffer * WORLD_SIZE.
// For implementers of ProcessGroup API and advanced users only.
// Note: this function will be deprecated in near future.
virtual c10::intrusive_ptr<Work> _allgather_base(
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/c10d/ProcessGroup.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
}

// Gathers a single tensor inputBuffer into a single buffer outputBuffer that
// is interpreted as a contigious collection of size inputBuffer * WORLD_SIZE.
// is interpreted as a contiguous collection of size inputBuffer * WORLD_SIZE.
// For implementers of ProcessGroup API and advanced users only.
// Note: this function will be deprecated in near future.
virtual c10::intrusive_ptr<Work> _allgather_base(
Expand Down
6 changes: 3 additions & 3 deletions torch/csrc/distributed/c10d/ProcessGroupMPI.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ struct WorkEntry {
// MPI_THREAD_SERIALIZED, ProcessGroupMPI will only support a singe process
// group. In other words, no more than 1 process group can be created globally.
//
// If you would like to use multiple ProcessGroupMPI, it requres your MPI
// implemenation to have a thread support value of MPI_THREAD_MULTIPLE, that is,
// If you would like to use multiple ProcessGroupMPI, it requires your MPI
// implementation to have a thread support value of MPI_THREAD_MULTIPLE, that is,
// multiple threads may call MPI, with no restriction.
//
// Also note that ProcessGroupMPI only supports a single Tensor operation. In
Expand Down Expand Up @@ -229,7 +229,7 @@ class TORCH_API ProcessGroupMPI : public Backend {
c10::intrusive_ptr<Work> barrier(
const BarrierOptions& opts = BarrierOptions()) override;

// Creating a new ProcessGroupMPI, will initiialize MPI if not initialized
// Creating a new ProcessGroupMPI, will initialize MPI if not initialized
static c10::intrusive_ptr<ProcessGroupMPI> createProcessGroupMPI(
std::vector<int> ranks = {});

Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ void ProcessGroupNCCL::WorkNCCL::synchronizeInternal(
// So explicitly abort ncclComms here before throwing this timed out
// exception to users, after this, ncclCommWatchdog can detect nccl
// communicators are aborted and clean up devNCCLCommMap_ accordingly.
// if throwing timed out excepiton without aborting nccl communicators
// if throwing timed out exception without aborting nccl communicators
// here, it was observed that CUDA GPU will have 100% utilization and
// can not run new events successfully.

Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
void synchronizeStreams();

// Helper function used in CUDA Stream callbacks to complete WorkNCCL
// objects and throw exceptions when neeeded.
// objects and throw exceptions when needed.
void handleNCCLGuard(ErrorHandlingMode asyncErrorHandling);

// Helper function that checks if the NCCL kernels have finished
Expand Down Expand Up @@ -497,7 +497,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {

// Helper that encapsulates work shared across point-to-point communication
// primitives. It is the same structure as the helper used for collective
// communicaiton primitives.
// communication primitives.
template <typename Fn>
c10::intrusive_ptr<Work> pointToPoint(
std::vector<at::Tensor>& tensor,
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/distributed/c10d/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1494,7 +1494,7 @@ that adds a prefix to each key inserted to the store.
processGroup,
"Options",
R"(
Base class for all processs group options implementations, such as the nccl
Base class for all processes group options implementations, such as the nccl
options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
)")
.def(
Expand Down Expand Up @@ -2096,7 +2096,7 @@ Example::
``fut.then()`` will return another ``CUDAFuture`` that holds the return value of the
callback and a ``CUDAEvent`` that recorded the callback stream.
1. For CPU work, ``fut.done()`` returns true when work has been complted and value()
1. For CPU work, ``fut.done()`` returns true when work has been completed and value()
tensors are ready.
2. For GPU work, ``fut.done()`` returns true only whether the operation has been enqueued.
3. For mixed CPU-GPU work (e.g. sending GPU tensors with GLOO), ``fut.done()`` returns
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/c10d/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class TORCH_API Logger {
);
// Set stats that can be collected only during
// training loop. It is called at the beginning of forward call
// to record the run time stats of sampled iterations that previouly ran.
// to record the run time stats of sampled iterations that previously ran.
// GPU performance stats are collected only for single process
// single device program and single device module right now.
// TODO to support single process multiple devices and multi device modules,
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/c10d/reducer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,7 @@ void Reducer::initialize_bucket_views(Reducer::Bucket& bucket) {
if (grad.defined() && !grad.is_alias_of(bucket_view)) {
bucket_view.copy_(grad);
grad = bucket_view;
// The grad is modefied and needs to be written back.
// The grad is modified and needs to be written back.
return true;
}
// The grad is not modified and does not need to be written back.
Expand Down
8 changes: 4 additions & 4 deletions torch/csrc/distributed/c10d/reducer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ class TORCH_API Reducer {
// a call to this function can simply be omitted.
void prepare_for_backward(const std::vector<at::Tensor>& outputs);

// Called at the begginning of forward() inside DistributedDataParallel,
// right now it caputures the starting time of forward in each iteration.
// Called at the beginning of forward() inside DistributedDataParallel,
// right now it captures the starting time of forward in each iteration.
void prepare_for_forward();

// Returns the relative time in nanoseconds when gradients were ready,
Expand Down Expand Up @@ -153,7 +153,7 @@ class TORCH_API Reducer {

// An function for users to set sample_rate of collecting
// runtime stats. The time stats will be recorded for the
// first 10 iterations, after 10 iteratons time stats will be
// first 10 iterations, after 10 iterations time stats will be
// recorded once every "sample_rate" training iterations.
void set_ddp_runtime_logging_sample_rate(int sample_rate);

Expand Down Expand Up @@ -504,7 +504,7 @@ class TORCH_API Reducer {
// Retrieves parameter names that have not been marked as ready as part of
// previous iteration.
std::vector<std::string> getUnmarkedParamsForIteration();
// Retrives parameter indices that have not been marked as ready as part of
// Retrieves parameter indices that have not been marked as ready as part of
// previous iteration.
std::vector<size_t> getUnmarkedParamIndicesForIteration();
// Raises appropriate error if mark_variable_ready is called on the same
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/message.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ enum MessageType {
// to determine how to serialize them. This design is helpful for
// communicating super large tensors where serializing all the data at
// once leads to excessively large memory footprint. An implementation
// can then serialize and send tensors chunck-by-chunk, in the streaming
// can then serialize and send tensors chunk-by-chunk, in the streaming
// fashion.
// type (MessageType): type of the message.
// id (int64_t): message id, this is used to match request and response.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ TORCH_API extern mutexType currentStateStackEntryMutex;

// This class is used to implement a stack of ``State``s.
// It has 2 members.
// One is `prevPtr`, a shared_ptr poiniting to previous elememnt in the
// One is `prevPtr`, a shared_ptr pointing to previous element in the
// stack.
// The other is ``statePtr``, a shared_ptr pointing to ``State``.
class StateStackEntry {
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/request_callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ c10::intrusive_ptr<JitFuture> RequestCallback::operator()(
std::vector<c10::Stream> streams) const {
// NB: cannot clear autograd context id here because the processMessage method
// might pause waiting for all RRefs in the arguments to be confirmed by their
// owners and resumne processing in a different thread. Hence, the
// owners and resume processing in a different thread. Hence, the
// thread_local context id needs to be set and cleared in the thread that
// indeed carries out the processing logic.
return processMessage(request, std::move(streams));
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/request_callback_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ c10::intrusive_ptr<JitFuture> RequestCallbackImpl::runPythonFunction(
return asFuture(std::current_exception());
}

// After sync exection or failed async execution return the value as-is.
// After sync execution or failed async execution return the value as-is.
if (pythonRpcHandler.isRemoteException(result) || !isAsyncExecution) {
return asFuture(
c10::ivalue::ConcretePyObjectHolder::create(result),
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/request_callback_no_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ c10::intrusive_ptr<JitFuture> RequestCallbackNoPython::processMessage(
// of 10us.
auto serverProcessGlobalProfilerStateStackEntryPtr =
profiler::processglobal::StateStackEntry::current();
// If server global profiler is enabled, we futher pay the
// If server global profiler is enabled, we further pay the
// cost of thread local profiler state initialization.
if (serverProcessGlobalProfilerStateStackEntryPtr) {
// Initialize thread-local profiler state from process-global
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/rpc_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void RpcAgent::retryExpiredRpcs() {
}

// If there are no more RPC's set to be retried at the current timepoint,
// we can remove the corresponsing unordered_set from the retry map.
// we can remove the corresponding unordered_set from the retry map.
if (earliestRpcList.empty()) {
rpcRetryMap_.erase(earliestTimeout);
}
Expand Down
6 changes: 3 additions & 3 deletions torch/csrc/distributed/rpc/rpc_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ using steady_clock_time_point =
std::chrono::time_point<std::chrono::steady_clock>;
// Input is qualified name string, output is JIT StrongTypePtr
// Same as jit::TypeResolver, did not import jit::TypeResolver to here
// because it could instroduce cyclic dependencies.
// because it could introduce cyclic dependencies.
using TypeResolver =
std::function<c10::StrongTypePtr(const c10::QualifiedName&)>;

Expand Down Expand Up @@ -153,7 +153,7 @@ class TORCH_API RpcAgent {
const DeviceMap& deviceMap = {}) = 0;

// Retries sending the message up to maxRetries times until an ACK is
// receieved. The duration between consecutive sends is increased over
// received. The duration between consecutive sends is increased over
// time using an exponential backoff algorithm.
//
// Sends ``message`` to the ``RpcAgent`` of id ``to`` and returns a
Expand Down Expand Up @@ -232,7 +232,7 @@ class TORCH_API RpcAgent {
// Retrieve metrics as KV map
virtual std::unordered_map<std::string, std::string> getMetrics() = 0;

// Retrive debug info in addition to metrics as KV map
// Retrieve debug info in addition to metrics as KV map
virtual std::unordered_map<std::string, std::string> getDebugInfo();

// Flag to control whether GIL wait times
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/distributed/rpc/rref_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class TORCH_API RRefContext {
// been confirmed (i.e. is no longer in the pendingUsers_ map).
c10::intrusive_ptr<RRef> getPendingUser(const ForkId& forkId);

// Start recroding new pending UserRRefs. All pending UserRRefs introduced
// Start recording new pending UserRRefs. All pending UserRRefs introduced
// after this point will be put into the thread_local userTable_, which will
// then be consumed and cleared in waitForThreadLocalPendingRRefs().
void recordThreadLocalPendingRRefs();
Expand Down Expand Up @@ -264,7 +264,7 @@ class TORCH_API RRefContext {
RRefId::Hash>
forks_;

// This cond var is used by deleteAllUsers(), a event notificaton is sent if
// This cond var is used by deleteAllUsers(), a event notification is sent if
// number of pending UserRRef or UserRRef children is reduced, or
// number of owned OwnerRRef is reduced.
std::condition_variable deleteAllUsersCV_;
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/rref_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class TORCH_API PythonRRefFetchRet final : public RRefFetchRet {
const Message& message);
};

// UserRRef (regardless it's the creator or not) uses this message to notiify
// UserRRef (regardless it's the creator or not) uses this message to notify
// OwnerRRef on delete.
class TORCH_API RRefUserDelete final : public ForkMessageBase {
public:
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/distributed/rpc/script_remote_call.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using torch::jit::Operator;
// A ScriptRemoteCall instance represents an invocation of `dist.remote` on a
// builtin operator. Currently, it does not support using RRef as arguments yet.
// Besides the operator and a vector of arguments, ScriptRemoteCall also
// caontains the RRefId and the ForkId of the return value RRef.
// contains the RRefId and the ForkId of the return value RRef.
class TORCH_API ScriptRemoteCall final : public ScriptCall {
public:
// Constructor for builitin operator call.
Expand Down

0 comments on commit 1addf8d

Please sign in to comment.