diff --git a/neural_nets/activations/activations.py b/neural_nets/activations/activations.py
index 9422b0e..987da85 100644
--- a/neural_nets/activations/activations.py
+++ b/neural_nets/activations/activations.py
@@ -149,3 +149,20 @@ def grad(self, x):
     def grad2(self, x):
         # 0 if x >= 0 else alpha * e^(z)
         return np.where(x >= 0, np.zeros_like(x), self.alpha * np.exp(x))
+
+
+class SoftPlus(ActivationBase):
+    def __init__(self):
+        super().__init__()
+
+    def __str__(self):
+        return "SoftPlus"
+
+    def fn(self, z):
+        return np.log(np.exp(z) + 1)
+
+    def grad(self, x):
+        return np.exp(x) / (np.exp(x) + 1)
+
+    def grad2(self, x):
+        return np.exp(x) / ((np.exp(x) + 1) ** 2)
diff --git a/neural_nets/activations/plots.py b/neural_nets/activations/plots.py
index d3e051e..1da888b 100644
--- a/neural_nets/activations/plots.py
+++ b/neural_nets/activations/plots.py
@@ -10,12 +10,12 @@
 sns.set_style("white")
 sns.set_context("notebook", font_scale=0.7)
 
-from activations import Affine, ReLU, LeakyReLU, Tanh, Sigmoid, ELU
+from activations import Affine, ReLU, LeakyReLU, Tanh, Sigmoid, ELU, SoftPlus
 
 
 def plot_activations():
-    fig, axes = plt.subplots(2, 3, sharex=True, sharey=True)
-    fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU()]
+    fig, axes = plt.subplots(2, 4, sharex=True, sharey=True)
+    fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU(), SoftPlus()]
     for ax, fn in zip(axes.flatten(), fns):
         X = np.linspace(-3, 3, 100).astype(float).reshape(100, 1)
         ax.plot(X, fn(X), label=r"$y$", alpha=0.7)
diff --git a/neural_nets/activations/tests.py b/neural_nets/activations/tests.py
index d3e051e..8e4d713 100644
--- a/neural_nets/activations/tests.py
+++ b/neural_nets/activations/tests.py
@@ -10,12 +10,12 @@
 sns.set_style("white")
 sns.set_context("notebook", font_scale=0.7)
 
-from activations import Affine, ReLU, LeakyReLU, Tanh, Sigmoid, ELU
+from activations import Affine, ReLU, LeakyReLU, Tanh, Sigmoid, ELU,SoftPlus
 
 
 def plot_activations():
-    fig, axes = plt.subplots(2, 3, sharex=True, sharey=True)
-    fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU()]
+    fig, axes = plt.subplots(2, 4, sharex=True, sharey=True)
+    fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU(), SoftPlus()]
     for ax, fn in zip(axes.flatten(), fns):
         X = np.linspace(-3, 3, 100).astype(float).reshape(100, 1)
         ax.plot(X, fn(X), label=r"$y$", alpha=0.7)
diff --git a/neural_nets/tests/tests.py b/neural_nets/tests/tests.py
index b8f1e04..e765fdc 100644
--- a/neural_nets/tests/tests.py
+++ b/neural_nets/tests/tests.py
@@ -148,6 +148,11 @@ def test_activations(N=50):
     test_elu_activation(N)
     test_elu_grad(N)
 
+    print("Testing SoftPlus activation")
+    time.sleep(1)
+    test_softplus_activation(N)
+    test_softplus_grad(N)
+
 
 def test_layers(N=50):
     print("Testing FullyConnected layer")
@@ -526,6 +531,23 @@ def test_relu_activation(N=None):
         i += 1
 
 
+def test_softplus_activation(N=None):
+    from activations import SoftPlus
+
+    N = np.inf if N is None else N
+
+    mine = SoftPlus()
+    gold = lambda z: F.softplus(torch.FloatTensor(z)).numpy()
+
+    i = 0
+    while i < N:
+        n_dims = np.random.randint(1, 100)
+        z = random_stochastic_matrix(1, n_dims)
+        assert_almost_equal(mine.fn(z), gold(z))
+        print("PASSED")
+        i += 1
+
+
 #######################################################################
 #                      Activation Gradients                           #
 #######################################################################
@@ -634,6 +656,24 @@ def test_softmax_grad(N=None):
         i += 1
 
 
+def test_softplus_grad(N=None):
+    from activations import SoftPlus
+
+    N = np.inf if N is None else N
+
+    mine = SoftPlus()
+    gold = torch_gradient_generator(F.softplus)
+
+    i = 0
+    while i < N:
+        n_ex = np.random.randint(1, 100)
+        n_dims = np.random.randint(1, 100)
+        z = random_tensor((n_ex, n_dims), standardize=True)
+        assert_almost_equal(mine.grad(z), gold(z))
+        print("PASSED")
+        i += 1
+
+
 #######################################################################
 #                          Layers                                     #
 #######################################################################