diff --git a/chainer/function_node.py b/chainer/function_node.py
index b674b4aad8cb..0a320656d9e1 100644
--- a/chainer/function_node.py
+++ b/chainer/function_node.py
@@ -707,7 +707,7 @@ def grad(outputs, inputs, grad_outputs=None, grad_inputs=None, set_grad=False,
             If you set loss scaling factor, gradients of loss values are to be
             multiplied by the factor before backprop starts. The factor is
             propagated to whole gradients in a computational graph along the
-            backporp. The gradients of parameters are divided by the factor
+            backprop. The gradients of parameters are divided by the factor
             just before the parameters are to be updated.
 
     Returns:
diff --git a/chainer/training/updaters/parallel_updater.py b/chainer/training/updaters/parallel_updater.py
index 1db042afdc27..c3bf3d026575 100644
--- a/chainer/training/updaters/parallel_updater.py
+++ b/chainer/training/updaters/parallel_updater.py
@@ -46,7 +46,7 @@ class ParallelUpdater(standard_updater.StandardUpdater):
             If you set loss scaling factor, gradients of loss values are to be
             multiplied by the factor before backprop starts. The factor is
             propagated to whole gradients in a computational graph along the
-            backporp. The gradients of parameters are divided by the factor
+            backprop. The gradients of parameters are divided by the factor
             just before the parameters are to be updated.
 
     """
diff --git a/chainer/training/updaters/standard_updater.py b/chainer/training/updaters/standard_updater.py
index 25523420066d..6eae9eb8d5aa 100644
--- a/chainer/training/updaters/standard_updater.py
+++ b/chainer/training/updaters/standard_updater.py
@@ -40,7 +40,7 @@ class StandardUpdater(_updater.Updater):
             If you set loss scaling factor, gradients of loss values are to be
             multiplied by the factor before backprop starts. The factor is
             propagated to whole gradients in a computational graph along the
-            backporp. The gradients of parameters are divided by the factor
+            backprop. The gradients of parameters are divided by the factor
             just before the parameters are to be updated.
 
     Attributes:
diff --git a/chainer/variable.py b/chainer/variable.py
index 5527f6264376..2fd41b373155 100644
--- a/chainer/variable.py
+++ b/chainer/variable.py
@@ -892,7 +892,7 @@ def backward(self, retain_grad=False, enable_double_backprop=False,
                 training. If you set loss scaling factor, gradients of loss
                 values are to be multiplied by the factor before backprop
                 starts. The factor is propagated to whole gradients in a
-                computational graph along the backporp. The gradients of
+                computational graph along the backprop. The gradients of
                 parameters are divided by the factor just before the parameters
                 are to be updated.
         """