diff --git a/examples/autoencoder/stacked_conv_autoencoders.py b/examples/autoencoder/stacked_conv_autoencoders.py
index 3be62bad..ca2dc3fe 100644
--- a/examples/autoencoder/stacked_conv_autoencoders.py
+++ b/examples/autoencoder/stacked_conv_autoencoders.py
@@ -5,7 +5,7 @@
 from sklearn import datasets, metrics
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import OneHotEncoder
-from neupy import algorithms, layers, environment, surgery
+from neupy import algorithms, layers, environment
 
 
 environment.reproducible()
@@ -32,85 +32,82 @@
 x_labeled_4d = x_labeled.reshape((n_labeled, 1, 28, 28))
 x_unlabeled_4d = x_unlabeled.reshape((n_unlabeled, 1, 28, 28))
 
-conv_autoencoder = algorithms.Momentum(
-    [
-        layers.Input((1, 28, 28)),
+encoder = layers.join(
+    layers.Input((1, 28, 28)),
 
-        layers.Convolution((16, 3, 3)) > layers.Relu(),
-        layers.Convolution((16, 3, 3)) > layers.Relu(),
-        layers.MaxPooling((2, 2)),
+    layers.Convolution((16, 3, 3)) > layers.Relu(),
+    layers.Convolution((16, 3, 3)) > layers.Relu(),
+    layers.MaxPooling((2, 2)),
 
-        layers.Convolution((32, 3, 3)) > layers.Relu(),
-        layers.MaxPooling((2, 2)),
+    layers.Convolution((32, 3, 3)) > layers.Relu(),
+    layers.MaxPooling((2, 2)),
 
-        layers.Reshape(),
+    layers.Reshape(),
 
-        layers.Relu(512),
-        layers.Relu(256),
-        surgery.CutLine(),
+    layers.Relu(256),
+    layers.Relu(128),
+)
 
-        layers.Relu(512),
-        layers.Relu(800),
+decoder = layers.join(
+    layers.Relu(256),
+    layers.Relu(32 * 5 * 5),
 
-        layers.Reshape((32, 5, 5)),
+    layers.Reshape((32, 5, 5)),
 
-        layers.Upscale((2, 2)),
-        layers.Convolution((16, 3, 3), padding='full') > layers.Relu(),
+    layers.Upscale((2, 2)),
+    layers.Convolution((16, 3, 3), padding='full') > layers.Relu(),
 
-        layers.Upscale((2, 2)),
-        layers.Convolution((16, 3, 3), padding='full') > layers.Relu(),
-        layers.Convolution((1, 3, 3), padding='full') > layers.Sigmoid(),
+    layers.Upscale((2, 2)),
+    layers.Convolution((16, 3, 3), padding='full') > layers.Relu(),
+    layers.Convolution((1, 3, 3), padding='full') > layers.Sigmoid(),
 
-        layers.Reshape(),
-    ],
+    layers.Reshape(),
+)
 
+conv_autoencoder = algorithms.Momentum(
+    connection=encoder > decoder,
     verbose=True,
     step=0.1,
     momentum=0.99,
     shuffle_data=True,
-    batch_size=128,
+    batch_size=64,
     error='binary_crossentropy',
 )
 conv_autoencoder.architecture()
 conv_autoencoder.train(x_unlabeled_4d, x_unlabeled,
                        x_labeled_4d, x_labeled, epochs=10)
 
-classifier_structure, _ = surgery.cut_along_lines(conv_autoencoder)
+x_labeled_encoded = encoder.output(x_labeled_4d).eval()
+x_unlabeled_encoded = encoder.output(x_unlabeled_4d).eval()
 
-x_labeled_encoded = classifier_structure.output(x_labeled_4d).eval()
-x_unlabeled_encoded = classifier_structure.output(x_unlabeled_4d).eval()
+classifier_network = layers.join(
+    layers.PRelu(512),
+    layers.Dropout(0.25),
+    layers.Softmax(10),
+)
 
-linear_classifier = algorithms.Adadelta(
-    [
-        layers.Input(classifier_structure.output_shape),
-        layers.PRelu(512),
-        layers.Dropout(0.25),
-        layers.Softmax(10),
-    ],
+encoder_classifier = algorithms.Adadelta(
+    layers.Input(encoder.output_shape) > classifier_network,
     verbose=True,
     step=0.05,
     shuffle_data=True,
-    batch_size=128,
+    batch_size=64,
     error='categorical_crossentropy',
 )
-linear_classifier.architecture()
-linear_classifier.train(x_labeled_encoded, y_labeled,
-                        x_unlabeled_encoded, y_unlabeled, epochs=100)
-
-classification_layer = surgery.cut(linear_classifier, start=1, end=4)
-classifier_structure = surgery.sew_together([classifier_structure,
-                                             classification_layer])
+encoder_classifier.architecture()
+encoder_classifier.train(x_labeled_encoded, y_labeled,
+                         x_unlabeled_encoded, y_unlabeled, epochs=100)
 
 classifier = algorithms.MinibatchGradientDescent(
-    classifier_structure,
+    encoder > classifier_network,
     verbose=True,
-    step=0.1,
+    step=0.01,
     shuffle_data=True,
-    batch_size=128,
+    batch_size=64,
     error='categorical_crossentropy',
 )
 classifier.architecture()
-classifier.train(x_labeled_4d, y_labeled, epochs=1000)
+classifier.train(x_labeled_4d, y_labeled, epochs=100)
 
 unlabeled_predicted = classifier.predict(x_unlabeled_4d).argmax(axis=1)
 y_unlabeled_classes = np.asarray(y_unlabeled).argmax(axis=1)
diff --git a/examples/cnn/alexnet.py b/examples/cnn/alexnet.py
index 58352a47..88fac7cd 100644
--- a/examples/cnn/alexnet.py
+++ b/examples/cnn/alexnet.py
@@ -72,47 +72,44 @@ def __repr__(self):
     layers.MaxPooling((3, 3), stride=(2, 2)),
     layers.LocalResponseNorm(),
 
-    layers.Parallel(
-        [[
-            SliceChannels(0, 48),
-            layers.Convolution((128, 5, 5), padding=2, **conv_2_1),
-            layers.Relu(),
-        ], [
-            SliceChannels(48, 96),
-            layers.Convolution((128, 5, 5), padding=2, **conv_2_2),
-            layers.Relu(),
-        ]],
-        layers.Concatenate(),
-    ),
+    [[
+        SliceChannels(0, 48),
+        layers.Convolution((128, 5, 5), padding=2, **conv_2_1),
+        layers.Relu(),
+    ], [
+        SliceChannels(48, 96),
+        layers.Convolution((128, 5, 5), padding=2, **conv_2_2),
+        layers.Relu(),
+    ]],
+    layers.Concatenate(),
+
     layers.MaxPooling((3, 3), stride=(2, 2)),
     layers.LocalResponseNorm(),
 
     layers.Convolution((384, 3, 3), padding=1, **conv_3) > layers.Relu(),
 
-    layers.Parallel(
-        [[
-            SliceChannels(0, 192),
-            layers.Convolution((192, 3, 3), padding=1, **conv_4_1),
-            layers.Relu(),
-        ], [
-            SliceChannels(192, 384),
-            layers.Convolution((192, 3, 3), padding=1, **conv_4_2),
-            layers.Relu(),
-        ]],
-        layers.Concatenate(),
-    ),
-    layers.Parallel(
-        [[
-            SliceChannels(0, 192),
-            layers.Convolution((128, 3, 3), padding=1, **conv_5_1),
-            layers.Relu(),
-        ], [
-            SliceChannels(192, 384),
-            layers.Convolution((128, 3, 3), padding=1, **conv_5_2),
-            layers.Relu(),
-        ]],
-        layers.Concatenate(),
-    ),
+    [[
+        SliceChannels(0, 192),
+        layers.Convolution((192, 3, 3), padding=1, **conv_4_1),
+        layers.Relu(),
+    ], [
+        SliceChannels(192, 384),
+        layers.Convolution((192, 3, 3), padding=1, **conv_4_2),
+        layers.Relu(),
+    ]],
+    layers.Concatenate(),
+
+    [[
+        SliceChannels(0, 192),
+        layers.Convolution((128, 3, 3), padding=1, **conv_5_1),
+        layers.Relu(),
+    ], [
+        SliceChannels(192, 384),
+        layers.Convolution((128, 3, 3), padding=1, **conv_5_2),
+        layers.Relu(),
+    ]],
+    layers.Concatenate(),
+
     layers.MaxPooling((3, 3), stride=(2, 2)),
 
     layers.Reshape(),
diff --git a/examples/cnn/googlenet.py b/examples/cnn/googlenet.py
index a6cc485c..cff96bbd 100644
--- a/examples/cnn/googlenet.py
+++ b/examples/cnn/googlenet.py
@@ -7,7 +7,7 @@
 
 
 def Inception(nfilters):
-    return layers.Parallel(
+    return layers.join(
         [[
             layers.MaxPooling((3, 3), stride=1, padding=(1, 1)),
             layers.Convolution((nfilters[0], 1, 1)),
@@ -60,6 +60,4 @@ def Inception(nfilters):
 
     layers.Softmax(1000),
 )
-googlenet.initialize()
-
 plots.layer_structure(googlenet)
diff --git a/examples/cnn/inception_v3.py b/examples/cnn/inception_v3.py
index ff9b0f3c..5103cdcd 100644
--- a/examples/cnn/inception_v3.py
+++ b/examples/cnn/inception_v3.py
@@ -12,7 +12,7 @@ def Conv_Relu_BatchNorm(*conv_args, **conv_kwargs):
 
 
 def Inception_1(conv_filters):
-    return layers.Parallel(
+    return layers.join(
         [[
             Conv_Relu_BatchNorm((conv_filters[0][0], 1, 1)),
         ], [
@@ -32,7 +32,7 @@ def Inception_1(conv_filters):
 
 
 def Inception_2(conv_filters):
-    return layers.Parallel(
+    return layers.join(
         [[
             Conv_Relu_BatchNorm((conv_filters[0][0], 1, 1)),
         ], [
@@ -64,34 +64,28 @@ def Inception_3(conv_filters, pooling):
     elif pooling == 'average':
         Pooling = partial(layers.AveragePooling, mode='exclude_padding')
 
-    return layers.Parallel(
+    return layers.join(
         [[
             Conv_Relu_BatchNorm((conv_filters[0][0], 1, 1)),
         ], [
             Conv_Relu_BatchNorm((conv_filters[1][0], 1, 1)),
-            layers.Parallel(
-                [[
-                    Conv_Relu_BatchNorm((conv_filters[1][1], 1, 3),
-                                        padding=(0, 1)),
-                ], [
-                    Conv_Relu_BatchNorm((conv_filters[1][2], 3, 1),
-                                        padding=(1, 0)),
-                ]],
-                layers.Concatenate(),
-            )
+            [[
+                Conv_Relu_BatchNorm((conv_filters[1][1], 1, 3),
+                                    padding=(0, 1)),
+            ], [
+                Conv_Relu_BatchNorm((conv_filters[1][2], 3, 1),
+                                    padding=(1, 0)),
+            ]],
         ], [
             Conv_Relu_BatchNorm((conv_filters[2][0], 1, 1)),
             Conv_Relu_BatchNorm((conv_filters[2][1], 3, 3), padding=1),
-            layers.Parallel(
-                [[
-                    Conv_Relu_BatchNorm((conv_filters[2][2], 1, 3),
-                                        padding=(0, 1)),
-                ], [
-                    Conv_Relu_BatchNorm((conv_filters[2][3], 3, 1),
-                                        padding=(1, 0)),
-                ]],
-                layers.Concatenate(),
-            )
+            [[
+                Conv_Relu_BatchNorm((conv_filters[2][2], 1, 3),
+                                    padding=(0, 1)),
+            ], [
+                Conv_Relu_BatchNorm((conv_filters[2][3], 3, 1),
+                                    padding=(1, 0)),
+            ]],
         ], [
             Pooling((3, 3), stride=(1, 1), padding=1),
             Conv_Relu_BatchNorm((conv_filters[3][0], 1, 1)),
@@ -116,39 +110,35 @@ def Inception_3(conv_filters, pooling):
     Inception_1([[64], [48, 64], [64, 96, 96], [64]]),
     Inception_1([[64], [48, 64], [64, 96, 96], [64]]),
 
-    layers.Parallel(
-        [[
-            Conv_Relu_BatchNorm((384, 3, 3), stride=2),
-        ], [
-            Conv_Relu_BatchNorm((64, 1, 1)),
-            Conv_Relu_BatchNorm((96, 3, 3), padding=1),
-            Conv_Relu_BatchNorm((96, 3, 3), stride=2),
+    [[
+        Conv_Relu_BatchNorm((384, 3, 3), stride=2),
+    ], [
+        Conv_Relu_BatchNorm((64, 1, 1)),
+        Conv_Relu_BatchNorm((96, 3, 3), padding=1),
+        Conv_Relu_BatchNorm((96, 3, 3), stride=2),
 
-        ], [
-            layers.MaxPooling((3, 3), stride=(2, 2))
-        ]],
-        layers.Concatenate(),
-    ),
+    ], [
+        layers.MaxPooling((3, 3), stride=(2, 2))
+    ]],
+    layers.Concatenate(),
 
     Inception_2([[192], [128, 128, 192], [128, 128, 128, 128, 192], [192]]),
     Inception_2([[192], [160, 160, 192], [160, 160, 160, 160, 192], [192]]),
     Inception_2([[192], [160, 160, 192], [160, 160, 160, 160, 192], [192]]),
     Inception_2([[192], [192, 192, 192], [192, 192, 192, 192, 192], [192]]),
 
-    layers.Parallel(
-        [[
-            Conv_Relu_BatchNorm((192, 1, 1)),
-            Conv_Relu_BatchNorm((320, 3, 3), stride=2),
-        ], [
-            Conv_Relu_BatchNorm((192, 1, 1)),
-            Conv_Relu_BatchNorm((192, 1, 7), padding=(0, 3)),
-            Conv_Relu_BatchNorm((192, 7, 1), padding=(3, 0)),
-            Conv_Relu_BatchNorm((192, 3, 3), stride=2),
-        ], [
-            layers.MaxPooling((3, 3), stride=(2, 2))
-        ]],
-        layers.Concatenate(),
-    ),
+    [[
+        Conv_Relu_BatchNorm((192, 1, 1)),
+        Conv_Relu_BatchNorm((320, 3, 3), stride=2),
+    ], [
+        Conv_Relu_BatchNorm((192, 1, 1)),
+        Conv_Relu_BatchNorm((192, 1, 7), padding=(0, 3)),
+        Conv_Relu_BatchNorm((192, 7, 1), padding=(3, 0)),
+        Conv_Relu_BatchNorm((192, 3, 3), stride=2),
+    ], [
+        layers.MaxPooling((3, 3), stride=(2, 2))
+    ]],
+    layers.Concatenate(),
 
     Inception_3([[320], [384, 384, 384], [448, 384, 384, 384], [192]],
                 pooling='average'),
@@ -158,6 +148,4 @@ def Inception_3(conv_filters, pooling):
     layers.GlobalPooling(),
     layers.Softmax(1000),
 )
-inception_v3.initialize()
-
 plots.layer_structure(inception_v3)
diff --git a/examples/cnn/resnet50.py b/examples/cnn/resnet50.py
index 17bc9616..da522416 100644
--- a/examples/cnn/resnet50.py
+++ b/examples/cnn/resnet50.py
@@ -23,7 +23,7 @@ def ResidualUnit(n_in_filters, n_out_filters, stride, has_branch=False):
             layers.BatchNorm(),
         )
 
-    return layers.Parallel(
+    return layers.join(
         [main_branch, residual_branch],
         layers.Elementwise() > layers.Relu(),
     )
@@ -62,6 +62,4 @@ def ResidualUnit(n_in_filters, n_out_filters, stride, has_branch=False):
     layers.Reshape(),
     layers.Softmax(1000),
 )
-resnet50.initialize()
-
 plots.layer_structure(resnet50)
diff --git a/examples/cnn/squeezenet.py b/examples/cnn/squeezenet.py
index a8a7ec7e..8e358d4f 100644
--- a/examples/cnn/squeezenet.py
+++ b/examples/cnn/squeezenet.py
@@ -10,17 +10,14 @@ def Fire(s_1x1, e_1x1, e_3x3):
     return layers.join(
         layers.Convolution((s_1x1, 1, 1), padding='half'),
         layers.Relu(),
-
-        layers.Parallel(
-            [[
-                layers.Convolution((e_1x1, 1, 1), padding='half'),
-                layers.Relu(),
-            ], [
-                layers.Convolution((e_3x3, 3, 3), padding='half'),
-                layers.Relu(),
-            ]],
-            layers.Concatenate(),
-        )
+        [[
+            layers.Convolution((e_1x1, 1, 1), padding='half'),
+            layers.Relu(),
+        ], [
+            layers.Convolution((e_3x3, 3, 3), padding='half'),
+            layers.Relu(),
+        ]],
+        layers.Concatenate(),
     )
 
 
@@ -51,6 +48,4 @@ def Fire(s_1x1, e_1x1, e_3x3):
     layers.Reshape(),
     layers.Softmax(),
 )
-squeezenet.initialize()
-
 plots.layer_structure(squeezenet)
diff --git a/examples/cnn/vgg16.py b/examples/cnn/vgg16.py
index 8ee8e4e8..83f1e7d8 100644
--- a/examples/cnn/vgg16.py
+++ b/examples/cnn/vgg16.py
@@ -44,7 +44,6 @@
     layers.Relu(4096) > layers.Dropout(0.5),
     layers.Softmax(1000),
 )
-vgg16.initialize()
 
 if not os.path.exists(VGG16_WEIGHTS_FILE):
     download_file(
diff --git a/examples/cnn/vgg19.py b/examples/cnn/vgg19.py
index 2483ce20..391ea832 100644
--- a/examples/cnn/vgg19.py
+++ b/examples/cnn/vgg19.py
@@ -47,7 +47,6 @@
     layers.Relu(4096) > layers.Dropout(0.5),
     layers.Softmax(1000),
 )
-vgg19.initialize()
 
 if not os.path.exists(VGG19_WEIGHTS_FILE):
     download_file(
diff --git a/neupy/algorithms/__init__.py b/neupy/algorithms/__init__.py
index 8373ffa8..57a45d57 100644
--- a/neupy/algorithms/__init__.py
+++ b/neupy/algorithms/__init__.py
@@ -47,6 +47,3 @@
 from .linear.perceptron import *
 
 from .rbm import *
-
-# Other
-from .utils import StopTrainingException
diff --git a/neupy/algorithms/associative/oja.py b/neupy/algorithms/associative/oja.py
index 07ac7cf7..7d60a19f 100644
--- a/neupy/algorithms/associative/oja.py
+++ b/neupy/algorithms/associative/oja.py
@@ -1,6 +1,7 @@
 import numpy as np
 
-from neupy.utils import format_data, NotTrainedException
+from neupy.utils import format_data
+from neupy.exceptions import NotTrained
 from neupy.core.properties import IntProperty, ParameterProperty
 from neupy.algorithms.base import BaseNetwork
 from neupy import init
@@ -141,8 +142,7 @@ def train(self, input_data, epsilon=1e-2, epochs=100):
 
     def reconstruct(self, input_data):
         if not isinstance(self.weight, np.ndarray):
-            raise NotTrainedException("Train network before use "
-                                      "reconstruct method.")
+            raise NotTrained("Network hasn't been trained yet")
 
         input_data = format_data(input_data)
         if input_data.shape[1] != self.minimized_data_size:
@@ -158,8 +158,7 @@ def reconstruct(self, input_data):
 
     def predict(self, input_data):
         if not isinstance(self.weight, np.ndarray):
-            raise NotTrainedException("Train network before use "
-                                      "prediction method.")
+            raise NotTrained("Network hasn't been trained yet")
 
         input_data = format_data(input_data)
         return np.dot(input_data, self.weight)
diff --git a/neupy/algorithms/base.py b/neupy/algorithms/base.py
index 6ed995ef..3d60abbb 100644
--- a/neupy/algorithms/base.py
+++ b/neupy/algorithms/base.py
@@ -8,12 +8,12 @@
 
 from neupy.utils import preformat_value, AttributeKeyDict
 from neupy.helpers import table
+from neupy.exceptions import StopTraining
 from neupy.core.base import BaseSkeleton
 from neupy.core.properties import (BoundedProperty, NumberProperty,
                                    Property)
 from .summary_info import SummaryTable, InlineSummary
-from .utils import (iter_until_converge, shuffle, normalize_error,
-                    StopTrainingException)
+from .utils import iter_until_converge, shuffle, normalize_error
 
 
 __all__ = ('BaseNetwork',)
@@ -435,7 +435,7 @@ def train(self, input_train, target_train=None, input_test=None,
 
                     is_first_iteration = False
 
-                except StopTrainingException as err:
+                except StopTraining as err:
                     # TODO: This notification breaks table view in terminal.
                     # I need to show it in a different way.
                     logs.message("TRAIN", "Epoch #{} stopped. {}"
diff --git a/neupy/algorithms/constructor.py b/neupy/algorithms/constructor.py
index b0c88e0a..50934b5e 100644
--- a/neupy/algorithms/constructor.py
+++ b/neupy/algorithms/constructor.py
@@ -8,11 +8,10 @@
 import theano.tensor as T
 
 from neupy import layers
-from neupy.utils import (AttributeKeyDict, asfloat, is_list_of_integers,
-                         format_data, does_layer_accept_1d_feature)
+from neupy.utils import AttributeKeyDict, asfloat, format_data
 from neupy.layers.utils import preformat_layer_shape
-from neupy.layers.connections import (LayerConnection, NetworkConnectionError,
-                                      is_sequential)
+from neupy.layers.connections import LayerConnection, is_sequential
+from neupy.exceptions import InvalidConnection
 from neupy.helpers import table
 from neupy.core.properties import ChoiceProperty
 from neupy.algorithms.base import BaseNetwork
@@ -23,6 +22,21 @@
 __all__ = ('ConstructableNetwork',)
 
 
+def does_layer_accept_1d_feature(layer):
+    """
+    Check if 1D feature values are valid for the layer.
+
+    Parameters
+    ----------
+    layer : object
+
+    Returns
+    -------
+    bool
+    """
+    return (layer.output_shape == (1,))
+
+
 def generate_layers(layers_sizes):
     """
     Create from list of layer sizes basic linear network.
@@ -72,7 +86,7 @@ def clean_layers(connection):
     if isinstance(connection, tuple):
         connection = list(connection)
 
-    if is_list_of_integers(connection):
+    if all(isinstance(element, int) for element in connection):
         connection = generate_layers(connection)
 
     islist = isinstance(connection, list)
@@ -299,15 +313,15 @@ def __init__(self, connection, *args, **kwargs):
         self.layers = list(self.connection)
         graph = self.connection.graph
 
-        if len(graph.input_layers) != 1:
+        if len(self.connection.input_layers) != 1:
             n_inputs = len(graph.input_layers)
-            raise NetworkConnectionError("Connection should have one input "
-                                         "layer, got {}".format(n_inputs))
+            raise InvalidConnection("Connection should have one input "
+                                    "layer, got {}".format(n_inputs))
 
-        if len(graph.output_layers) != 1:
+        if len(self.connection.output_layers) != 1:
             n_outputs = len(graph.output_layers)
-            raise NetworkConnectionError("Connection should have one output "
-                                         "layer, got {}".format(n_outputs))
+            raise InvalidConnection("Connection should have one output "
+                                    "layer, got {}".format(n_outputs))
 
         self.input_layer = graph.input_layers[0]
         self.output_layer = graph.output_layers[0]
@@ -380,7 +394,6 @@ def init_layers(self):
         Initialize layers in the same order as they were list in
         network initialization step.
         """
-        self.connection.initialize()
 
     def init_train_updates(self):
         """
diff --git a/neupy/algorithms/gd/errors.py b/neupy/algorithms/gd/errors.py
index ddd52b3d..7d453d68 100644
--- a/neupy/algorithms/gd/errors.py
+++ b/neupy/algorithms/gd/errors.py
@@ -1,15 +1,37 @@
 from __future__ import division
 
+import theano
 import theano.tensor as T
 
 from neupy.core.docs import shared_docs
-from neupy.utils import smallest_positive_number, asint
+from neupy.utils import asint
 
 
 __all__ = ('mse', 'rmse', 'mae', 'msle', 'rmsle', 'binary_crossentropy',
            'categorical_crossentropy', 'binary_hinge', 'categorical_hinge')
 
 
+def smallest_positive_number():
+    """
+    Function returns different nubmer for different
+    ``theano.config.floatX`` values.
+
+    * ``1e-7`` for 32-bit float
+    * ``1e-16`` for 64-bit float
+
+    Returns
+    -------
+    float
+        Smallest positive float number.
+    """
+    float_type = theano.config.floatX
+    epsilon_values = {
+        'float32': 1e-7,
+        'float64': 1e-16,
+    }
+    return epsilon_values[float_type]
+
+
 def error_function(expected, predicted):
     """
     Parameters
diff --git a/neupy/algorithms/gd/momentum.py b/neupy/algorithms/gd/momentum.py
index 1db6fa6f..d9d967a6 100644
--- a/neupy/algorithms/gd/momentum.py
+++ b/neupy/algorithms/gd/momentum.py
@@ -56,8 +56,8 @@ def init_layers(self):
         for layer in self.layers:
             for parameter in layer.parameters.values():
                 parameter_shape = parameter.get_value().shape
-                parameter.prev_param_delta = theano.shared(
-                    name="{}/prev-param-delta".format(parameter.name),
+                parameter.previous_velocity = theano.shared(
+                    name="{}/previous-velocity".format(parameter.name),
                     value=asfloat(np.zeros(parameter_shape)),
                 )
 
@@ -65,13 +65,13 @@ def init_param_updates(self, layer, parameter):
         step = self.variables.step
         gradient = T.grad(self.variables.error_func, wrt=parameter)
 
-        prev_param_delta = parameter.prev_param_delta
-        parameter_delta = self.momentum * prev_param_delta - step * gradient
+        previous_velocity = parameter.previous_velocity
+        velocity = self.momentum * previous_velocity - step * gradient
 
         if self.nesterov:
-            parameter_delta = self.momentum * parameter_delta - step * gradient
+            velocity = self.momentum * velocity - step * gradient
 
         return [
-            (parameter, parameter + parameter_delta),
-            (prev_param_delta, parameter_delta),
+            (parameter, parameter + velocity),
+            (previous_velocity, velocity),
         ]
diff --git a/neupy/algorithms/linear/base.py b/neupy/algorithms/linear/base.py
index 9b957141..cbb302aa 100644
--- a/neupy/algorithms/linear/base.py
+++ b/neupy/algorithms/linear/base.py
@@ -1,6 +1,6 @@
-from neupy.utils import is_list_of_integers
 from neupy.layers import Step, Input
-from neupy.layers.connections import NetworkConnectionError, LayerConnection
+from neupy.layers.connections import LayerConnection
+from neupy.exceptions import InvalidConnection
 from neupy.algorithms.constructor import ConstructableNetwork
 
 
@@ -48,15 +48,17 @@ def __init__(self, connection, **options):
         if len(connection) != 2:
             raise ValueError("This network should contains two layers.")
 
-        if is_list_of_integers(connection):
+        if all(isinstance(element, int) for element in connection):
             input_layer_size, output_layer_size = connection
             connection = Input(input_layer_size) > Step(output_layer_size)
 
         if not isinstance(connection, LayerConnection):
-            raise ValueError("Invalid network connection structure.")
+            raise ValueError("Invalid connection type")
 
-        if not isinstance(connection.output_layer, Step):
-            raise NetworkConnectionError(
+        output_layer = connection.output_layers[0]
+
+        if not isinstance(output_layer, Step):
+            raise InvalidConnection(
                 "Final layer should contains step activation function "
                 "(``layers.Step`` class instance)."
             )
diff --git a/neupy/algorithms/memory/bam.py b/neupy/algorithms/memory/bam.py
index e507622c..8a20e5ff 100644
--- a/neupy/algorithms/memory/bam.py
+++ b/neupy/algorithms/memory/bam.py
@@ -2,7 +2,8 @@
 
 import numpy as np
 
-from neupy.utils import format_data, NotTrainedException
+from neupy.utils import format_data
+from neupy.exceptions import NotTrained
 from .utils import bin2sign, hopfield_energy, step_function
 from .base import DiscreteMemory
 
@@ -109,7 +110,7 @@ def predict_output(self, input_data, n_times=None):
 
     def prediction(self, input_data=None, output_data=None, n_times=None):
         if self.weight is None:
-            raise NotTrainedException("Network hasn't been trained yet")
+            raise NotTrained("Network hasn't been trained yet")
 
         if input_data is None and output_data is not None:
             self.discrete_validation(output_data)
diff --git a/neupy/algorithms/rbfn/grnn.py b/neupy/algorithms/rbfn/grnn.py
index 8eee01a0..2886d602 100644
--- a/neupy/algorithms/rbfn/grnn.py
+++ b/neupy/algorithms/rbfn/grnn.py
@@ -1,6 +1,7 @@
 from numpy import dot
 
-from neupy.utils import format_data, NotTrainedException
+from neupy.utils import format_data
+from neupy.exceptions import NotTrained
 from neupy.core.properties import BoundedProperty
 from neupy.algorithms.base import BaseNetwork
 from neupy.algorithms.learning import LazyLearningMixin
@@ -123,8 +124,8 @@ def predict(self, input_data):
         array-like (n_samples,)
         """
         if self.input_train is None:
-            raise NotTrainedException("Cannot make a prediction. Network "
-                                      "hasn't been trained yet")
+            raise NotTrained("Cannot make a prediction. Network "
+                             "hasn't been trained yet")
 
         input_data = format_data(input_data)
 
diff --git a/neupy/algorithms/rbfn/pnn.py b/neupy/algorithms/rbfn/pnn.py
index 23ea6a80..90e96fe2 100644
--- a/neupy/algorithms/rbfn/pnn.py
+++ b/neupy/algorithms/rbfn/pnn.py
@@ -1,6 +1,7 @@
 import numpy as np
 
-from neupy.utils import format_data, NotTrainedException
+from neupy.utils import format_data
+from neupy.exceptions import NotTrained
 from neupy.core.properties import BoundedProperty
 from neupy.algorithms.base import BaseNetwork
 from neupy.algorithms.learning import LazyLearningMixin
@@ -169,7 +170,7 @@ def predict_raw(self, input_data):
 
         Raises
         ------
-        NotTrainedException
+        NotTrained
             If network hasn't been trained.
 
         ValueError
@@ -180,8 +181,8 @@ def predict_raw(self, input_data):
         array-like (n_samples, n_classes)
         """
         if self.classes is None:
-            raise NotTrainedException("Cannot make a prediction. Network "
-                                      "hasn't been trained yet")
+            raise NotTrained("Cannot make a prediction. Network "
+                             "hasn't been trained yet")
 
         input_data_size = input_data.shape[1]
         train_data_size = self.input_train.shape[1]
diff --git a/neupy/algorithms/utils.py b/neupy/algorithms/utils.py
index bad53f7a..aa6a59c7 100644
--- a/neupy/algorithms/utils.py
+++ b/neupy/algorithms/utils.py
@@ -4,16 +4,9 @@
 import theano.tensor as T
 
 
-__all__ = ('StopTrainingException', 'shuffle', 'parameters2vector',
+__all__ = ('shuffle', 'parameters2vector', 'iter_until_converge',
            'iter_parameter_values', 'setup_parameter_updates',
-           'iter_until_converge', 'normalize_error')
-
-
-class StopTrainingException(Exception):
-    """
-    Exception that needs to be triggered in case of
-    early training interruption.
-    """
+           'normalize_error')
 
 
 def iter_parameter_values(network):
diff --git a/neupy/core/properties.py b/neupy/core/properties.py
index a2690398..ad16f199 100644
--- a/neupy/core/properties.py
+++ b/neupy/core/properties.py
@@ -72,17 +72,22 @@ def __delete__(self, instance):
         if name in instance.options:
             del instance.options[name]
 
-    def __str__(self):
+    def validate(self, value):
+        """
+        Validate properties value
+
+        Parameters
+        ----------
+        value : object
+        """
+
+    def __repr__(self):
         classname = self.__class__.__name__
+
         if self.name is None:
             return '{}()'.format(classname)
-        return '{}(name="{}")'.format(classname, self.name)
-
-    def __repr__(self):
-        return self.__str__()
 
-    def validate(self, value):
-        pass
+        return '{}(name="{}")'.format(classname, self.name)
 
 
 class Property(BaseProperty):
diff --git a/neupy/exceptions.py b/neupy/exceptions.py
new file mode 100644
index 00000000..96d28ac1
--- /dev/null
+++ b/neupy/exceptions.py
@@ -0,0 +1,27 @@
+__all__ = ('LayerConnectionError', 'InvalidConnection', 'NotTrained',
+           'StopTraining')
+
+
+class LayerConnectionError(Exception):
+    """
+    Error class that triggers in case of connection
+    issues within layers.
+    """
+
+
+class InvalidConnection(Exception):
+    """
+    Connection is not suitable for the specified algorithm
+    """
+
+
+class NotTrained(Exception):
+    """
+    Algorithms hasn't been trained yet.
+    """
+
+
+class StopTraining(Exception):
+    """
+    Interrupt training procedure.
+    """
diff --git a/neupy/helpers/sms.py b/neupy/helpers/sms.py
deleted file mode 100644
index d6daf249..00000000
--- a/neupy/helpers/sms.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from neupy.helpers.logs import Verbose
-
-
-__all__ = ('twilio_sms',)
-
-
-def twilio_sms(account_id, token, to_phone, from_phone, verbose=True):
-    """
-    Send SMS via Twilio service.
-
-    Parameters
-    ----------
-    account_id : str
-        Twilio account ID.
-    token : str
-        Twilio account token.
-    to_phone : str
-        SMS receiver phone number.
-    from_phone : str
-        SMS sender phone number.
-    verbose : bool
-        Logging verbose. Defaults to ``True``.
-
-    Returns
-    -------
-    func
-        Retunr function which take one text message argument and send it
-        via Twilio API.
-    """
-    verbose = Verbose(verbose=verbose)
-
-    try:
-        import twilio
-    except ImportError:
-        raise ImportError("Install `twilio` library.")
-
-    def send_message(text_message):
-        formated_message = "Send SMS with text: '{}'".format(text_message)
-        verbose.message("SMS", formated_message)
-
-        client = twilio.rest.TwilioRestClient(account_id, token)
-        message = client.messages.create(body=text_message, to=to_phone,
-                                         from_=from_phone)
-        return message
-    return send_message
diff --git a/neupy/init.py b/neupy/init.py
index 5c7d97cf..acdcac05 100644
--- a/neupy/init.py
+++ b/neupy/init.py
@@ -1,4 +1,5 @@
 import abc
+import math
 
 import six
 import numpy as np
@@ -225,11 +226,34 @@ def __repr__(self):
         return '{}(scale={})'.format(classname(self), self.scale)
 
 
-class HeNormal(Initializer):
+class InitializerWithGain(Initializer):
+    """
+    Initialization class that has gain property
+
+    Parameters
+    ----------
+    gain : float or {{'relu'}}
+        Multiplies scaling factor by speified gain.
+        The ``relu`` values set up gain equal to :math:`\\sqrt{{2}}`
+        Defaults to ``1``.
+    """
+    def __init__(self, gain=1.0):
+        if gain == 'relu':
+            gain = math.sqrt(2)
+
+        self.gain = gain
+        super(InitializerWithGain, self).__init__()
+
+
+class HeNormal(InitializerWithGain):
     """
     Kaiming He parameter initialization method based on the
     normal distribution.
 
+    Parameters
+    ----------
+    {InitializerWithGain.Parameters}
+
     Methods
     -------
     {Initializer.Methods}
@@ -243,15 +267,19 @@ class HeNormal(Initializer):
     def sample(self, shape):
         fan_in, _ = identify_fans(shape)
         variance = 2. / fan_in
-        std = np.sqrt(variance)
+        std = self.gain * np.sqrt(variance)
         return np.random.normal(loc=0, scale=std, size=shape)
 
 
-class HeUniform(Initializer):
+class HeUniform(InitializerWithGain):
     """
     Kaiming He parameter initialization method based on the
     uniformal distribution.
 
+    Parameters
+    ----------
+    {InitializerWithGain.Parameters}
+
     Methods
     -------
     {Initializer.Methods}
@@ -265,17 +293,24 @@ class HeUniform(Initializer):
     def sample(self, shape):
         fan_in, _ = identify_fans(shape)
         variance = 6. / fan_in
-        abs_max_value = np.sqrt(variance)
+        abs_max_value = self.gain * np.sqrt(variance)
 
         uniform = Uniform(minval=-abs_max_value, maxval=abs_max_value)
         return uniform.sample(shape)
 
 
-class XavierNormal(Initializer):
+class XavierNormal(InitializerWithGain):
     """
     Xavier Glorot parameter initialization method based on
     normal distribution.
 
+    Parameters
+    ----------
+    gain : float or {{'relu'}}
+        Multiplies scaling factor by speified gain.
+        The ``relu`` values set up gain equal to :math:`\\sqrt{{2}}`
+        Defaults to ``1``.
+
     Methods
     -------
     {Initializer.Methods}
@@ -288,11 +323,11 @@ class XavierNormal(Initializer):
     def sample(self, shape):
         fan_in, fan_out = identify_fans(shape)
         variance = 2. / (fan_in + fan_out)
-        std = np.sqrt(variance)
+        std = self.gain * np.sqrt(variance)
         return np.random.normal(loc=0, scale=std, size=shape)
 
 
-class XavierUniform(Initializer):
+class XavierUniform(InitializerWithGain):
     """
     Xavier Glorot parameter initialization method based
     on uniform distribution.
@@ -309,7 +344,7 @@ class XavierUniform(Initializer):
     def sample(self, shape):
         fan_in, fan_out = identify_fans(shape)
         variance = 6. / (fan_in + fan_out)
-        abs_max_value = np.sqrt(variance)
+        abs_max_value = self.gain * np.sqrt(variance)
 
         uniform = Uniform(minval=-abs_max_value, maxval=abs_max_value)
         return uniform.sample(shape)
diff --git a/neupy/layers/__init__.py b/neupy/layers/__init__.py
index 71271e52..c362c3dc 100644
--- a/neupy/layers/__init__.py
+++ b/neupy/layers/__init__.py
@@ -6,6 +6,6 @@
 from .stochastic import *
 from .normalization import *
 from .merge import *
-from .parallel import *
 
-from .utils import join, count_parameters
+from .connections import join
+from .utils import count_parameters
diff --git a/neupy/layers/activations.py b/neupy/layers/activations.py
index 12823c87..1a768dfe 100644
--- a/neupy/layers/activations.py
+++ b/neupy/layers/activations.py
@@ -189,6 +189,9 @@ class Relu(ActivationLayer):
     {ActivationLayer.Attributes}
     """
     alpha = NumberProperty(default=0, minval=0)
+    weight = ParameterProperty(default=init.XavierNormal(gain='relu'))
+    bias = ParameterProperty(default=init.XavierNormal(gain='relu'),
+                             allow_none=True)
 
     def activation_function(self, input_value):
         alpha = asfloat(self.alpha)
diff --git a/neupy/layers/base.py b/neupy/layers/base.py
index 0de895be..f04bddf4 100644
--- a/neupy/layers/base.py
+++ b/neupy/layers/base.py
@@ -9,10 +9,10 @@
 from neupy.core.config import Configurable
 from neupy.core.properties import (TypedListProperty, IntProperty, Property,
                                    ParameterProperty)
-from neupy.layers.connections import ChainConnection
+from neupy.layers.connections import BaseConnection
 
 
-__all__ = ('BaseLayer', 'ParameterBasedLayer', 'Input')
+__all__ = ('BaseLayer', 'ParameterBasedLayer', 'Input', 'ResidualConnection')
 
 
 def next_identifier(identifiers):
@@ -56,9 +56,6 @@ def generate_layer_name(layer):
     if layer.layer_id is not None:
         layer_id = layer.layer_id
 
-    elif not graph:
-        layer_id = 1
-
     else:
         graph_layers = graph.forward_graph.keys()
         layer_identifiers = []
@@ -106,7 +103,7 @@ def create_shared_parameter(value, name, shape):
     return theano.shared(value=asfloat(value), name=name, borrow=True)
 
 
-class BaseLayer(ChainConnection, Configurable):
+class BaseLayer(BaseConnection, Configurable):
     """
     Base class for all layers.
 
@@ -141,19 +138,32 @@ class BaseLayer(ChainConnection, Configurable):
     parameters : dict
         Trainable parameters.
 
-    graph : LayerGraph instance or None
+    graph : LayerGraph instance
         Graphs that stores all relations between layers.
     """
     name = Property(expected_type=six.string_types)
 
+    # Stores global identifier index for each layer class
+    global_identifiers_map = {}
+
+    def __new__(cls, *args, **kwargs):
+        if cls not in cls.global_identifiers_map:
+            cls.global_identifiers_map[cls] = 1
+        return super(BaseLayer, cls).__new__(cls)
+
     def __init__(self, *args, **options):
         super(BaseLayer, self).__init__(*args)
 
         self.parameters = {}
         self.updates = []
-        self.layer_id = None
         self.input_shape_ = None
 
+        cls = self.__class__
+        self.layer_id = self.global_identifiers_map[cls]
+        self.global_identifiers_map[cls] += 1
+
+        self.graph.add_layer(self)
+
         Configurable.__init__(self, **options)
 
     def validate(self, input_shape):
@@ -198,6 +208,10 @@ def __repr__(self):
         return '{name}()'.format(name=classname)
 
 
+class ResidualConnection(BaseLayer):
+    pass
+
+
 class ParameterBasedLayer(BaseLayer):
     """
     Layer that creates weight and bias parameters.
@@ -313,7 +327,9 @@ class Input(BaseLayer):
 
     def __init__(self, size, **options):
         super(Input, self).__init__(size=size, **options)
+
         self.input_shape = as_tuple(self.size)
+        self.initialize()
 
     @property
     def output_shape(self):
diff --git a/neupy/layers/connections/__init__.py b/neupy/layers/connections/__init__.py
new file mode 100644
index 00000000..362f4501
--- /dev/null
+++ b/neupy/layers/connections/__init__.py
@@ -0,0 +1,3 @@
+from .graph import *
+from .base import *
+from .utils import *
diff --git a/neupy/layers/connections/base.py b/neupy/layers/connections/base.py
new file mode 100644
index 00000000..6d581216
--- /dev/null
+++ b/neupy/layers/connections/base.py
@@ -0,0 +1,261 @@
+from itertools import product
+from contextlib import contextmanager
+
+from neupy.layers.utils import preformat_layer_shape
+from .utils import join, is_sequential
+from .graph import LayerGraph
+
+
+__all__ = ('LayerConnection', 'BaseConnection', 'ParallelConnection')
+
+
+class BaseConnection(object):
+    """
+    Base class from chain connections.
+    """
+    def __init__(self):
+        self.connection = None
+        self.training_state = True
+        self.graph = LayerGraph()
+
+        self.input_layers = [self]
+        self.output_layers = [self]
+
+    def __gt__(self, other):
+        return LayerConnection(self, other)
+
+    def __lt__(self, other):
+        return LayerConnection(other, self)
+
+    def __iter__(self):
+        yield self
+
+    def output(self, input_value):
+        raise NotImplementedError
+
+    def initialize(self):
+        raise NotImplementedError
+
+    @contextmanager
+    def disable_training_state(self):
+        self.training_state = False
+        yield
+        self.training_state = True
+
+
+def make_common_graph(left_layer, right_layer):
+    """
+    Makes common graph for two layers that exists
+    in different graphs.
+
+    Parameters
+    ----------
+    left_layer : layer
+    right_layer : layer
+
+    Returns
+    -------
+    LayerGraph instance
+        Graph that contains both layers and their connections.
+    """
+    graph = LayerGraph.merge(left_layer.graph, right_layer.graph)
+
+    for layer in graph.forward_graph.keys():
+        layer.graph = graph
+
+    return graph
+
+
+def topological_sort(graph):
+    """
+    Repeatedly go through all of the nodes in the graph, moving each of
+    the nodes that has all its edges resolved, onto a sequence that
+    forms our sorted graph. A node has all of its edges resolved and
+    can be moved once all the nodes its edges point to, have been moved
+    from the unsorted graph onto the sorted one.
+
+    Parameters
+    ----------
+    graph : dict
+        Dictionary that has graph structure.
+
+    Raises
+    ------
+    RuntimeError
+        If graph has cycles.
+
+    Returns
+    -------
+    list
+        List of nodes sorted in topological order.
+    """
+    sorted_nodes = []
+    graph_unsorted = graph.copy()
+
+    while graph_unsorted:
+        acyclic = False
+
+        for node, edges in list(graph_unsorted.items()):
+            if all(edge not in graph_unsorted for edge in edges):
+                acyclic = True
+                del graph_unsorted[node]
+                sorted_nodes.append(node)
+
+        if not acyclic:
+            raise RuntimeError("A cyclic dependency occurred")
+
+    return sorted_nodes
+
+
+class ParallelConnection(BaseConnection):
+    def __init__(self, connections):
+        from neupy.layers.base import ResidualConnection
+
+        super(ParallelConnection, self).__init__()
+
+        self.connections = []
+        self.input_layers = []
+        self.output_layers = []
+
+        for layers in connections:
+            if layers:
+                connection = join(*layers)
+            else:
+                connection = ResidualConnection()
+
+            self.connections.append(connection)
+            self.input_layers.extend(connection.input_layers)
+            self.output_layers.extend(connection.output_layers)
+
+
+class LayerConnection(BaseConnection):
+    """
+    Make connection between layers.
+
+    Parameters
+    ----------
+    left : layer, connection or list of connections
+    right : layer, connection or list of connections
+    """
+    def __init__(self, left, right):
+        super(LayerConnection, self).__init__()
+
+        if isinstance(left, (list, tuple)):
+            left = ParallelConnection(left)
+
+        elif left.connection and left in left.connection.output_layers:
+            left = left.connection
+
+        if isinstance(right, (list, tuple)):
+            right = ParallelConnection(right)
+
+        elif right.connection and right in right.connection.input_layers:
+            right = right.connection
+
+        self.left = left
+        self.right = right
+
+        self.left.connection = self
+        self.right.connection = self
+
+        layers = product(left.output_layers, right.input_layers)
+        for left_output, right_input in layers:
+            self.full_graph = make_common_graph(left_output, right_input)
+
+        self.full_graph.connect_layers(left.output_layers, right.input_layers)
+
+        self.input_layers = self.left.input_layers
+        self.output_layers = self.right.output_layers
+
+        self.graph = self.full_graph
+        if self.output_layers:
+            self.graph = self.graph.subgraph_for_output(self.output_layers)
+
+        if self.input_layers:
+            self.graph = self.graph.subgraph_for_output(self.input_layers,
+                                                        graph='forward')
+
+    @property
+    def input_shape(self):
+        # Cannot save them during initialization step,
+        # because input shape can be modified later
+        if not self.input_layers:
+            return
+
+        if len(self.input_layers) == 1:
+            input_layer = self.input_layers[0]
+            return input_layer.input_shape
+
+        input_shapes = []
+        for input_layer in self.input_layers:
+            input_shapes.append(input_layer.input_shape)
+
+        return input_shapes
+
+    @property
+    def output_shape(self):
+        # Cannot save them during initialization step,
+        # because input shape can be modified later
+        if not self.output_layers:
+            return
+
+        if len(self.output_layers) == 1:
+            output_layer = self.output_layers[0]
+            return output_layer.output_shape
+
+        output_shapes = []
+        for output_layer in self.output_layers:
+            output_shapes.append(output_layer.output_shape)
+
+        return output_shapes
+
+    def initialize(self):
+        for layer in self:
+            layer.initialize()
+
+    def output(self, *input_values):
+        subgraph = self.graph
+        n_inputs = len(input_values)
+
+        if n_inputs == 1 and not isinstance(input_values[0], dict):
+            input_value = input_values[0]
+            new_input_values = {}
+
+            for input_layer in self.input_layers:
+                new_input_values[input_layer] = input_value
+
+            input_values = [new_input_values]
+
+        return subgraph.propagate_forward(*input_values)
+
+    @contextmanager
+    def disable_training_state(self):
+        for layer in self:
+            layer.training_state = False
+
+        yield
+
+        for layer in self:
+            layer.training_state = True
+
+    def __len__(self):
+        return len(self.graph.forward_graph)
+
+    def __iter__(self):
+        backward_graph = self.graph.backward_graph
+        for layer in topological_sort(backward_graph):
+            yield layer
+
+    def __repr__(self):
+        n_layers = len(self)
+
+        if n_layers > 5 or not is_sequential(self):
+            conn = '{} -> [... {} layers ...] -> {}'.format(
+                preformat_layer_shape(self.input_shape),
+                n_layers,
+                preformat_layer_shape(self.output_shape)
+            )
+        else:
+            conn = ' > '.join([repr(layer) for layer in self])
+
+        return conn
diff --git a/neupy/layers/connections.py b/neupy/layers/connections/graph.py
similarity index 54%
rename from neupy/layers/connections.py
rename to neupy/layers/connections/graph.py
index eaa480e5..69549e1d 100644
--- a/neupy/layers/connections.py
+++ b/neupy/layers/connections/graph.py
@@ -1,64 +1,48 @@
 import copy
+import pprint
 import inspect
-from itertools import chain
-from contextlib import contextmanager
 from collections import OrderedDict
 
-from neupy.layers.utils import preformat_layer_shape
+from neupy.exceptions import LayerConnectionError
 
 
-__all__ = ('LayerConnection', 'ChainConnection', 'NetworkConnectionError',
-           'LayerConnectionError', 'LayerGraph')
+__all__ = ('LayerGraph',)
 
 
-class LayerConnectionError(Exception):
-    """
-    Error class that triggers in case of connection
-    issues within layers.
-    """
-
-
-class NetworkConnectionError(Exception):
-    """
-    Error class that triggers in case of connection
-    within layers in the network
+def filter_list(iterable, include_values):
     """
-
-
-def is_sequential(connection):
-    """
-    Check whether graph connection is a sequence.
+    Create new list that contains only values
+    specified in the ``include_values`` attribute.
 
     Parameters
     ----------
-    connection : ChainConnection instance
+    iterable : list
+        List that needs to be filtered.
+
+    include_values : list, tuple
+        List of values that needs to be included in the
+        filtered list. Other values that hasn't been
+        defined in the list will be excluded from the
+        list specified by ``iterable`` attribute.
 
     Returns
     -------
-    bool
+    list
+        Filtered list.
     """
-    graph = connection.graph
-
-    if not graph:
-        # Single layer is a sequential connection
-        return True
+    filtered_list = []
 
-    f_graph = graph.forward_graph
-    b_graph = graph.backward_graph
+    for value in iterable:
+        if value in include_values:
+            filtered_list.append(value)
 
-    for layers in chain(f_graph.values(), b_graph.values()):
-        if len(layers) >= 2:
-            # One of the layers has multiple input
-            # or output connections
-            return False
-
-    return True
+    return filtered_list
 
 
 def filter_dict(dictionary, include_keys):
     """
-    Creates new dictionary that contains only some of the
-    keys from the original one.
+    Create new list that contains only values
+    specified in the ``include_keys`` attribute.
 
     Parameters
     ----------
@@ -73,10 +57,12 @@ def filter_dict(dictionary, include_keys):
     -------
     dict
     """
-    filtered_dict = {}
+    filtered_dict = OrderedDict()
+
     for key, value in dictionary.items():
         if key in include_keys:
-            filtered_dict[key] = value
+            filtered_dict[key] = filter_list(value, include_keys)
+
     return filtered_dict
 
 
@@ -104,12 +90,12 @@ def merge_dicts_with_list(first_dict, second_dict):
         common_dict[key] = copy.copy(value)
 
     for key, values in second_dict.items():
-        if key not in common_dict:
-            common_dict[key] = copy.copy(values)
-        else:
+        if key in common_dict:
             for value in values:
                 if value not in common_dict[key]:
                     common_dict[key].append(value)
+        else:
+            common_dict[key] = copy.copy(values)
 
     return common_dict
 
@@ -180,7 +166,7 @@ def does_layer_expect_one_input(layer):
                          "output method".format(layer))
 
     if not inspect.ismethod(layer.output):
-        raise ValueError("The `output` attribute is not a method")
+        raise ValueError("Layer has an `output` property, but it not a method")
 
     arginfo = inspect.getargspec(layer.output)
 
@@ -260,6 +246,9 @@ def add_layer(self, layer):
         if layer in self.forward_graph:
             return False
 
+        if layer.input_shape:
+            layer.initialize()
+
         self.forward_graph[layer] = []
         self.backward_graph[layer] = []
 
@@ -293,7 +282,6 @@ def add_connection(self, from_layer, to_layer):
         self.add_layer(from_layer)
         self.add_layer(to_layer)
 
-        expect_one_input_layer = does_layer_expect_one_input(to_layer)
         forward_connections = self.forward_graph[from_layer]
         backward_connections = self.backward_graph[to_layer]
 
@@ -301,37 +289,38 @@ def add_connection(self, from_layer, to_layer):
             # Layers have been already connected
             return False
 
-        if expect_one_input_layer and backward_connections:
-            raise LayerConnectionError(
-                "Cannot connect `{from_layer}` to the `{to_layer}`. "
-                "Layer `{to_layer}` expectes input only from one "
-                "layer and it has been alredy connected with "
-                "`{to_layer_connection}`.".format(
-                    from_layer=from_layer,
-                    to_layer=to_layer,
-                    to_layer_connection=backward_connections[0]
-                )
-            )
+        # expect_one_input_layer = does_layer_expect_one_input(to_layer)
+        # if expect_one_input_layer and backward_connections:
+        #     raise LayerConnectionError(
+        #         "Cannot connect `{from_layer}` to the `{to_layer}`. "
+        #         "Layer `{to_layer}` expectes input only from one "
+        #         "layer and it has been alredy connected with "
+        #         "`{to_layer_connection}`.".format(
+        #             from_layer=from_layer,
+        #             to_layer=to_layer,
+        #             to_layer_connection=backward_connections[0]
+        #         ))
 
         forward_connections.append(to_layer)
+        backward_connections.append(from_layer)
 
         if is_cyclic(self.forward_graph):
-            # Rollback changes in case if user cathes exception
-            self.forward_graph[from_layer].pop()
-            raise LayerConnectionError("Graph cannot have cycles")
+            raise LayerConnectionError(
+                "Cannot connect layer `{}` to `{}`, because this "
+                "connection creates cycle in the graph."
+                "".format(from_layer, to_layer))
 
-        backward_connections.append(from_layer)
         return True
 
-    def connect_layers(self, from_layer, to_layer):
+    def connect_layers(self, from_layers, to_layers):
         """
         Connect two layers together and update other layers
         in the graph.
 
         Parameters
         ----------
-        from_layer : layer
-        to_layer : layer
+        from_layer : layer or list of layers
+        to_layer : layer or list of layers
 
         Raises
         ------
@@ -344,77 +333,114 @@ def connect_layers(self, from_layer, to_layer):
             Returns ``False`` if connection has already been added into
             the graph, and ``True`` if connection was added successfully.
         """
-        connection_added = self.add_connection(from_layer, to_layer)
+        if not isinstance(from_layers, (list, tuple)):
+            from_layers = [from_layers]
 
-        if not connection_added:
+        if not isinstance(to_layers, (list, tuple)):
+            to_layers = [to_layers]
+
+        connections_added = []
+        for from_layer in from_layers:
+            for to_layer in to_layers:
+                connection_added = self.add_connection(from_layer, to_layer)
+                connections_added.append(connection_added)
+
+        if not any(connections_added):
             return False
 
-        if from_layer.input_shape is None:
+        if all(layer.input_shape is None for layer in from_layers):
             return True
 
         # Layer has an input shape which means that we can
         # propagate this information through the graph and
         # set up input shape for layers that don't have it.
-        layers = [from_layer]
+        layers = copy.copy(from_layers)
         forward_graph = self.forward_graph
 
         # We need to know whether all input layers
         # have defined input shape
-        all_inputs_has_shape = True
-        for layer, next_layers in self.backward_graph.items():
-            if not next_layers and not layer.input_shape:
-                all_inputs_has_shape = False
-                break
+        all_inputs_has_shape = all(
+            layer.input_shape for layer in self.input_layers)
 
         while layers:
             current_layer = layers.pop()
             next_layers = forward_graph[current_layer]
 
             for next_layer in next_layers:
-                next_in_shape = next_layer.input_shape
+                next_inp_shape = next_layer.input_shape
                 current_out_shape = current_layer.output_shape
-                one_input_layer = does_layer_expect_one_input(next_layer)
+                expect_one_input = does_layer_expect_one_input(next_layer)
 
-                if not next_in_shape and one_input_layer:
+                if not next_inp_shape and expect_one_input:
                     next_layer.input_shape = current_out_shape
+                    next_layer.initialize()
 
-                elif not one_input_layer and all_inputs_has_shape:
+                elif not expect_one_input and all_inputs_has_shape:
                     input_shapes = []
                     for incoming_layer in self.backward_graph[next_layer]:
                         input_shapes.append(incoming_layer.output_shape)
 
                     if None not in input_shapes:
                         next_layer.input_shape = input_shapes
+                        next_layer.initialize()
+
                     else:
                         # Some of the previous layers still don't
                         # have input shape. We can put layer at the
                         # end of the stack and check it again at the end
                         layers.insert(0, current_layer)
 
-                elif one_input_layer and next_in_shape != current_out_shape:
+                elif expect_one_input and next_inp_shape != current_out_shape:
                     raise LayerConnectionError(
                         "Cannot connect `{}` to the `{}`. Output shape "
                         "from one layer is equal to {} and input shape "
                         "to the next one is equal to {}".format(
                             current_layer, next_layer,
-                            current_out_shape, next_in_shape,
-                        )
-                    )
+                            current_out_shape, next_inp_shape,
+                        ))
 
             layers.extend(next_layers)
 
         return True
 
-    def subgraph_for_output(self, layer):
-        layers = [layer]
-        observed_layers = []
+    def subgraph(self, from_layers, to_layers):
+        pass
+
+    def subgraph_for_output(self, layers, graph='backward'):
+        """
+        Exctract subgraph from a graph that contains only
+        layers related to the output layer.
+
+        Parameters
+        ----------
+        layer : layer
+
+        Returns
+        -------
+        LayerGraph instance
+        """
+        if not isinstance(layers, (list, tuple)):
+            layers = [layers]
 
-        if layer not in self.forward_graph:
+        l = layers
+
+        if all(layer not in self.forward_graph for layer in layers):
             return LayerGraph()
 
+        if graph == 'backward':
+            graph = self.backward_graph
+        else:
+            graph = self.forward_graph
+
+        observed_layers = []
+        layers = copy.copy(layers)
+
         while layers:
             current_layer = layers.pop()
-            next_layers = self.backward_graph[current_layer]
+            if current_layer not in graph:
+                print(pprint.pformat(list(self.backward_graph.items())))
+                print(self)
+            next_layers = graph[current_layer]
 
             for next_layer in next_layers:
                 if next_layer not in observed_layers:
@@ -427,9 +453,14 @@ def subgraph_for_output(self, layer):
         backward_subgraph = filter_dict(self.backward_graph,
                                         observed_layers)
 
-        # Remove old relations to the other layers.
-        # Output layer cannot point to some other layers.
-        forward_subgraph[layer] = []
+        # # Remove old relations to the other layers.
+        # # Output layer cannot point to some other layers.
+        # if graph == 'backward':
+        #     for layer in l:
+        #         forward_subgraph[layer] = []
+        # else:
+        #     for layer in l:
+        #         backward_subgraph[layer] = []
 
         return LayerGraph(forward_subgraph, backward_subgraph)
 
@@ -449,11 +480,9 @@ def input_layers(self):
             List of input layers.
         """
         input_layers = []
+
         for layer, next_layers in self.backward_graph.items():
-            # TODO: I should check whether it's always useful
-            # to have only an input layers that have specified
-            # input shape
-            if not next_layers and layer.input_shape:
+            if not next_layers:
                 input_layers.append(layer)
 
         return input_layers
@@ -474,47 +503,49 @@ def output_layers(self):
             List of output layers.
         """
         output_layers = []
+
         for layer, next_layers in self.forward_graph.items():
             if not next_layers:
                 output_layers.append(layer)
 
         return output_layers
 
-    def propagate_forward(self, input_):
+    def propagate_forward(self, input_value):
         """
         Propagates input variable through the directed acyclic
         graph and returns output from the final layers.
 
         Parameters
         ----------
-        input_ : array-like, Theano variable or dict
-            If input has array or Theano variable type than it will
-            be used as a direct input for input layer/layers. The
-            dict type input should has a specific structure. Each
-            key of the dict is a layer and each value is array or
-            Theano variable. Dict defines input values for specific
-            layers. In the dict input layer is not necessary should
-            be an instance of the ``layers.Input`` class. It can be
-            any layer from the graph.
+        input_value : array-like, Theano variable or dict
+            - If input is an array or Theano variable than it will
+              be used as a direct input to the input layer/layers.
+
+            - The dict type input should has a specific structure.
+              Each key of the dict is a layer and each value array or
+              Theano variable. Dict defines input values for specific
+              layers. In the dict input layer is not necessary should
+              be an instance of the ``layers.Input`` class. It can be
+              any layer from the graph.
 
         Returns
         -------
         object
-            Output from the final layers.
+            Output from the final layer/layers.
         """
         outputs = {}
 
-        if isinstance(input_, dict):
-            for layer, input_variable in input_.items():
+        if isinstance(input_value, dict):
+            for layer, input_variable in input_value.items():
                 if layer not in self.forward_graph:
                     raise ValueError("The `{}` layer doesn't appear "
-                                     "in this graph".format(layer))
+                                     "in the graph".format(layer))
 
                 outputs[layer] = layer.output(input_variable)
 
         else:
             for input_layer in self.input_layers:
-                outputs[input_layer] = input_layer.output(input_)
+                outputs[input_layer] = input_layer.output(input_value)
 
         def output_from_layer(layer):
             if layer in outputs:
@@ -545,203 +576,10 @@ def output_from_layer(layer):
     def __len__(self):
         return len(self.forward_graph)
 
-
-class ChainConnection(object):
-    """
-    Base class from chain connections.
-    """
-    def __init__(self):
-        self.connection = None
-        self.training_state = True
-        self.graph = LayerGraph()
-
-    def __gt__(self, other):
-        return LayerConnection(self, other)
-
-    def __lt__(self, other):
-        return LayerConnection(other, self)
-
-    def output(self, input_value):
-        raise NotImplementedError
-
-    def initialize(self):
-        raise NotImplementedError
-
-    @contextmanager
-    def disable_training_state(self):
-        self.training_state = False
-        yield
-        self.training_state = True
-
-
-def make_common_graph(left_layer, right_layer):
-    """
-    Makes common graph for two layers that exists
-    in different graphs.
-
-    Parameters
-    ----------
-    left_layer : layer
-    right_layer : layer
-
-    Returns
-    -------
-    LayerGraph instance
-        Graph that contains both layers and their connections.
-    """
-    left_graph = left_layer.graph
-    right_graph = right_layer.graph
-
-    graph = LayerGraph.merge(left_graph, right_graph)
-
-    for layer in graph.forward_graph.keys():
-        layer.graph = graph
-
-    left_layer.graph = graph
-    right_layer.graph = graph
-
-    return graph
-
-
-def topological_sort(graph):
-    """
-    Repeatedly go through all of the nodes in the graph, moving each of
-    the nodes that has all its edges resolved, onto a sequence that
-    forms our sorted graph. A node has all of its edges resolved and
-    can be moved once all the nodes its edges point to, have been moved
-    from the unsorted graph onto the sorted one.
-
-    Parameters
-    ----------
-    graph : dict
-        Dictionary that has graph structure.
-
-    Raises
-    ------
-    RuntimeError
-        If graph has cycles.
-
-    Returns
-    -------
-    list
-        List of nodes sorted in topological order.
-    """
-    sorted_nodes = []
-    graph_unsorted = graph.copy()
-
-    while graph_unsorted:
-        acyclic = False
-        for node, edges in list(graph_unsorted.items()):
-            for edge in edges:
-                if edge in graph_unsorted:
-                    break
-            else:
-                acyclic = True
-                del graph_unsorted[node]
-                sorted_nodes.append(node)
-
-        if not acyclic:
-            raise RuntimeError("A cyclic dependency occurred")
-
-    return sorted_nodes
-
-
-class LayerConnection(ChainConnection):
-    """
-    Connect to layers or connections together.
-
-    Parameters
-    ----------
-    left : layer or connection
-    right : layer or conenction
-    """
-    def __init__(self, left, right):
-        super(LayerConnection, self).__init__()
-
-        if left.connection and left.connection.output_layer is left:
-            self.left = left.connection
-        else:
-            self.left = left
-
-        if right.connection and right.connection.input_layer is right:
-            self.right = right.connection
-        else:
-            self.right = right
-
-        self.layers = []
-
-        if isinstance(self.left, LayerConnection):
-            self.layers = copy.copy(self.left.layers)
-            self.left_layer = self.layers[-1]
-        else:
-            self.left_layer = self.left
-            self.layers = [self.left]
-
-        if isinstance(self.right, LayerConnection):
-            right_layers = self.right.layers
-            self.right_layer = right_layers[0]
-            self.layers.extend(right_layers)
-        else:
-            self.right_layer = self.right
-            self.layers.append(self.right)
-
-        self.input_layer = self.layers[0]
-        self.output_layer = self.layers[-1]
-
-        self.left.connection = self
-        self.right.connection = self
-
-        self.graph = make_common_graph(self.left_layer, self.right_layer)
-        self.graph.connect_layers(self.left_layer, self.right_layer)
-
-    @property
-    def input_shape(self):
-        # Cannot save them one time because shape
-        # can be modified later
-        return self.input_layer.input_shape
-
-    @property
-    def output_shape(self):
-        # Cannot save them one time because shape
-        # can be modified later
-        return self.output_layer.output_shape
-
-    def initialize(self):
-        for layer in self:
-            layer.initialize()
-
-    def output(self, *input_values):
-        subgraph = self.graph.subgraph_for_output(self.output_layer)
-        return subgraph.propagate_forward(*input_values)
-
-    @contextmanager
-    def disable_training_state(self):
-        for layer in self:
-            layer.training_state = False
-
-        yield
-
-        for layer in self:
-            layer.training_state = True
-
-    def __len__(self):
-        return len(self.graph.forward_graph)
-
-    def __iter__(self):
-        subgraph = self.graph.subgraph_for_output(self.output_layer)
-        for layer in topological_sort(subgraph.backward_graph):
-            yield layer
-
     def __repr__(self):
-        n_layers = len(self)
-
-        if n_layers > 5 or not is_sequential(self):
-            conn = '{} -> [... {} layers ...] -> {}'.format(
-                preformat_layer_shape(self.input_shape),
-                n_layers,
-                preformat_layer_shape(self.output_shape)
-            )
-        else:
-            conn = ' > '.join([repr(layer) for layer in self])
+        graph = self.forward_graph
+
+        if isinstance(graph, OrderedDict):
+            graph = list(graph.items())
 
-        return conn
+        return pprint.pformat(graph)
diff --git a/neupy/layers/connections/utils.py b/neupy/layers/connections/utils.py
new file mode 100644
index 00000000..eb699925
--- /dev/null
+++ b/neupy/layers/connections/utils.py
@@ -0,0 +1,66 @@
+from itertools import chain
+from functools import reduce
+
+
+def join(*connections):
+    """
+    Connect two layers.
+
+    Parameters
+    ----------
+    *connections : layers or connections
+
+    Returns
+    -------
+    connection
+        Layers connected in a sequence.
+
+    Examples
+    --------
+    >>> from neupy import layers
+    >>> conn = layers.join(
+    ...     layers.Input(784),
+    ...     layers.Relu(500),
+    ...     layers.Relu(300),
+    ...     layers.Softmax(10),
+    ... )
+    """
+    from .base import LayerConnection, ParallelConnection
+
+    if not connections:
+        return
+
+    if len(connections) == 1:
+        connection = connections[0]
+
+        if isinstance(connection, (list, tuple)):
+            return ParallelConnection(connection)
+        else:
+            return connection
+
+    merged_connections = reduce(LayerConnection, connections)
+    return merged_connections
+
+
+def is_sequential(connection):
+    """
+    Check whether graph connection is a sequence.
+
+    Parameters
+    ----------
+    connection : connection
+
+    Returns
+    -------
+    bool
+    """
+    forward_graph_layers = connection.graph.forward_graph.values()
+    backward_graph_layers = connection.graph.backward_graph.values()
+
+    for layers in chain(forward_graph_layers, backward_graph_layers):
+        if len(layers) >= 2:
+            # One of the layers has multiple input
+            # or output connections
+            return False
+
+    return True
diff --git a/neupy/layers/convolutions.py b/neupy/layers/convolutions.py
index f21ab578..5fce384d 100644
--- a/neupy/layers/convolutions.py
+++ b/neupy/layers/convolutions.py
@@ -4,9 +4,9 @@
 import theano.tensor as T
 
 from neupy.utils import as_tuple
+from neupy.exceptions import LayerConnectionError
 from neupy.core.properties import TypedListProperty, Property
 from .base import ParameterBasedLayer
-from .connections import LayerConnectionError
 
 
 __all__ = ('Convolution',)
diff --git a/neupy/layers/merge.py b/neupy/layers/merge.py
index d4313b60..826292a5 100644
--- a/neupy/layers/merge.py
+++ b/neupy/layers/merge.py
@@ -4,9 +4,9 @@
 import theano.tensor as T
 
 from neupy.core.properties import IntProperty, Property
+from neupy.exceptions import LayerConnectionError
 from neupy.utils import as_tuple
 from .base import BaseLayer
-from .connections import LayerConnectionError
 
 
 __all__ = ('Elementwise', 'Concatenate')
diff --git a/neupy/layers/normalization.py b/neupy/layers/normalization.py
index b16d04f1..dc5ffd4d 100644
--- a/neupy/layers/normalization.py
+++ b/neupy/layers/normalization.py
@@ -1,3 +1,4 @@
+import theano
 import theano.tensor as T
 import numpy as np
 
@@ -5,7 +6,7 @@
 from neupy.core.properties import (NumberProperty, ProperFractionProperty,
                                    ParameterProperty, IntProperty)
 from neupy.utils import asfloat, as_tuple
-from .connections import LayerConnectionError
+from neupy.exceptions import LayerConnectionError
 from .activations import AxesProperty
 from .utils import dimshuffle
 from .base import BaseLayer
@@ -131,11 +132,14 @@ def initialize(self):
                              "with unknown size over the dimension #{} "
                              "(0-based indeces).".format(unknown_dim_index))
 
-        self.add_parameter(value=np.zeros(parameter_shape),
-                           name='running_mean', shape=parameter_shape)
-
-        self.add_parameter(value=np.ones(parameter_shape),
-                           name='running_inv_std', shape=parameter_shape)
+        self.running_mean = theano.shared(
+            name='layer:{}/running-mean'.format(self.name),
+            value=np.zeros(parameter_shape),
+        )
+        self.running_inv_std = theano.shared(
+            name='layer:{}/running-inv-std'.format(self.name),
+            value=np.ones(parameter_shape),
+        )
 
         self.add_parameter(value=self.gamma, name='gamma',
                            shape=parameter_shape)
diff --git a/neupy/layers/parallel.py b/neupy/layers/parallel.py
deleted file mode 100644
index 9cb53e78..00000000
--- a/neupy/layers/parallel.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from .connections import ChainConnection
-from .utils import join as layers_join
-from .base import BaseLayer
-
-
-__all__ = ('Parallel',)
-
-
-class TransferLayer(BaseLayer):
-    """
-    Hack for parallel connections.
-    """
-
-
-def Parallel(connections, merge_layer):
-    """
-    Propagate input value through the multiple parallel layer
-    connections and then combine output result.
-
-    Parameters
-    ----------
-    connections : list of lists, list of LayerConnection
-        List that contains list of layer connections.
-
-    merge_layer : BaseLayer instance
-        Layer that merges final outputs from each parallel
-        connection.
-
-    Returns
-    -------
-    LayerConnection
-
-    Examples
-    --------
-    >>> from neupy import layers
-    >>>
-    >>> parallel_layer = layers.Parallel(
-    ...     [[
-    ...         layers.Convolution((3, 5, 5)),
-    ...     ], [
-    ...         layers.Convolution((10, 3, 3)),
-    ...         layers.Convolution((5, 3, 3)),
-    ...     ]],
-    ...     layers.Concatenate()
-    ... )
-    """
-    if not isinstance(connections, (list, tuple)):
-        raise ValueError("Connections should be a list or a tuple.")
-
-    if not isinstance(merge_layer, ChainConnection):
-        raise ValueError("The `merge_layer` argument is not "
-                         "a layer or connection")
-
-    input_layer = TransferLayer()
-
-    for i, connection in enumerate(connections):
-        if not connection:
-            full_connection = layers_join(input_layer, merge_layer)
-            continue
-
-        if isinstance(connection, (list, tuple)):
-            connection = layers_join(connection)
-            connections[i] = connection
-
-        full_connection = layers_join(input_layer, connection, merge_layer)
-
-    return full_connection
diff --git a/neupy/layers/pooling.py b/neupy/layers/pooling.py
index 99a1852e..cf62e2e8 100644
--- a/neupy/layers/pooling.py
+++ b/neupy/layers/pooling.py
@@ -3,8 +3,8 @@
 
 from neupy.utils import as_tuple
 from neupy.core.properties import TypedListProperty, ChoiceProperty, Property
+from neupy.exceptions import LayerConnectionError
 from .base import BaseLayer
-from .connections import LayerConnectionError
 from .convolutions import StrideProperty
 
 
diff --git a/neupy/layers/utils.py b/neupy/layers/utils.py
index ca8abd5c..3bcd721c 100644
--- a/neupy/layers/utils.py
+++ b/neupy/layers/utils.py
@@ -1,16 +1,13 @@
 import collections
-from functools import reduce
 
 
-__all__ = ('preformat_layer_shape', 'dimshuffle', 'join', 'iter_parameters',
+__all__ = ('preformat_layer_shape', 'dimshuffle', 'iter_parameters',
            'count_parameters')
 
 
 def preformat_layer_shape(shape):
     """
-    Each layer should have input and output shape
-    attributes. This function formats layer's shape value to
-    make it easy to read.
+    Format layer's input or output shape.
 
     Parameters
     ----------
@@ -41,48 +38,11 @@ def dimshuffle(value, ndim, axes):
     Theano variable
     """
     pattern = ['x'] * ndim
+
     for i, axis in enumerate(axes):
         pattern[axis] = i
-    return value.dimshuffle(pattern)
-
-
-def join(*connections):
-    """
-    Connect two layers.
 
-    Parameters
-    ----------
-    *connections : layers or connections
-
-    Returns
-    -------
-    connection
-        Layers connected in a sequence.
-
-    Examples
-    --------
-    >>> from neupy import layers
-    >>> conn = layers.join(
-    ...     layers.Input(784),
-    ...     layers.Relu(500),
-    ...     layers.Relu(300),
-    ...     layers.Softmax(10),
-    ... )
-    >>>
-    >>> conn = layers.join([
-    ...     layers.Input(784),
-    ...     layers.Sigmoid(100),
-    ...     layers.Softmax(10),
-    ... ])
-    """
-    from neupy.layers.connections import LayerConnection
-
-    n_layers = len(connections)
-    if n_layers == 1 and isinstance(connections[0], collections.Iterable):
-        connections = connections[0]
-
-    merged_connections = reduce(LayerConnection, connections)
-    return merged_connections
+    return value.dimshuffle(pattern)
 
 
 def iter_parameters(layers):
@@ -116,11 +76,10 @@ def count_parameters(connection):
     int
         Number of parameters.
     """
-    if not isinstance(connection, collections.Iterable):
-        connection = [connection]
-
     n_parameters = 0
+
     for _, _, parameter in iter_parameters(connection):
         parameter = parameter.get_value()
         n_parameters += parameter.size
+
     return n_parameters
diff --git a/neupy/plots/layer_structure.py b/neupy/plots/layer_structure.py
index c2e958e7..f893123c 100644
--- a/neupy/plots/layer_structure.py
+++ b/neupy/plots/layer_structure.py
@@ -4,7 +4,7 @@
 
 import graphviz
 
-from neupy.layers.parallel import TransferLayer
+from neupy.layers.base import ResidualConnection
 from neupy.algorithms.base import BaseNetwork
 
 
@@ -17,7 +17,7 @@ def layer_uid(layer):
 
     Parameters
     ----------
-    layer : BaseLayer instance
+    layer : layer
 
     Returns
     -------
@@ -26,6 +26,23 @@ def layer_uid(layer):
     return str(id(layer))
 
 
+def format_label(info):
+    """
+    Format label information.
+
+    Parameters
+    ----------
+    object
+
+    Returns
+    -------
+    str
+    """
+    # Space at the beggining shifts string
+    # to the right
+    return " {}".format(info)
+
+
 def exclude_layer_from_graph(graph, ignore_layers):
     """
     Exclude specific types of layers from the graph.
@@ -113,9 +130,7 @@ def layer_structure(connection, ignore_layers=None, filepath=None, show=True):
     if filepath is None:
         filepath = tempfile.mktemp()
 
-    # TransferLayer is a hack for the Parallel layer,
-    # so we don't need it anyway
-    ignore_layers = [TransferLayer] + ignore_layers
+    ignore_layers = [ResidualConnection] + ignore_layers
 
     forward_graph = connection.graph.forward_graph
     forward_graph = exclude_layer_from_graph(forward_graph, ignore_layers)
@@ -125,12 +140,19 @@ def layer_structure(connection, ignore_layers=None, filepath=None, show=True):
     for layer in forward_graph.keys():
         digraph.node(layer_uid(layer), str(layer))
 
+    output_id = 1
     for from_layer, to_layers in forward_graph.items():
         for to_layer in to_layers:
-            digraph.edge(layer_uid(from_layer),
-                         layer_uid(to_layer),
-                         # Space at the beggining shifts string
-                         # to the right
+            digraph.edge(layer_uid(from_layer), layer_uid(to_layer),
+                         label=format_label(from_layer.output_shape))
+
+        if not to_layers:
+            output = 'output-{}'.format(output_id)
+
+            digraph.node(output, 'Output #{}'.format(output_id))
+            digraph.edge(layer_uid(from_layer), output,
                          label=" {}".format(from_layer.output_shape))
 
+            output_id += 1
+
     digraph.render(filepath, view=show)
diff --git a/neupy/preprocessing.py b/neupy/preprocessing.py
deleted file mode 100644
index d2e7b8d3..00000000
--- a/neupy/preprocessing.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import numpy as np
-
-from neupy.core.properties import NumberProperty
-from neupy.core.base import BaseSkeleton
-from neupy.utils import as_array2d, NotTrainedException
-
-
-__all__ = ('ZCA',)
-
-
-class ZCA(BaseSkeleton):
-    """
-    ZCA (zero-phase component analysis) whitening.
-
-    Parameters
-    ----------
-    regularization : float
-        Regularization parameter. Defaults to ``1e-5``.
-
-    Attributes
-    ----------
-    mean : 1D array
-        Mean for each feature.
-
-    components : array-like
-        ZCA components.
-
-    Methods
-    -------
-    train(data)
-        Train ZCA.
-
-    transform(data)
-        Transform input data.
-    """
-    regularization = NumberProperty(default=1e-5, minval=0)
-
-    def __init__(self, regularization=1e-5, **options):
-        self.regularization = regularization
-        self.mean = None
-        self.components = None
-        super(ZCA, self).__init__(**options)
-
-    def fit(self, X, *args, **kwargs):
-        """
-        This method is an alias to `train` method.
-        This method is important for the scikit-learn
-        compatibility.
-
-        Parameters
-        ----------
-        X : array-like
-
-        Returns
-        -------
-        ZCA class instance
-        """
-        self.train(X, *args, **kwargs)
-        return self
-
-    def train(self, data):
-        """
-        Train ZCA.
-
-        Parameters
-        ----------
-        data : array-like
-        """
-        data = as_array2d(data)
-        self.mean = data.mean(axis=0)
-        data = data - self.mean
-
-        n_features = data.shape[1]
-        sigma = np.dot(data.T, data) / n_features
-        U, S, V = np.linalg.svd(sigma)
-
-        self.components = (U / np.sqrt(S + self.regularization)).dot(U.T)
-
-    def transform(self, data):
-        """
-        Apply ZCA transformation on data.
-
-        Parameters
-        ----------
-        data : array-like
-
-        Returns
-        -------
-        array-like
-        """
-        if self.mean is None or self.components is None:
-            raise NotTrainedException("Train ZCA before use it.")
-
-        data_shape = data.shape
-        data = as_array2d(data)
-        data_transformed = data - self.mean
-        data_transformed = np.dot(data_transformed, self.components.T)
-        return data_transformed.reshape(data_shape)
diff --git a/neupy/surgery.py b/neupy/surgery.py
deleted file mode 100644
index 28513d14..00000000
--- a/neupy/surgery.py
+++ /dev/null
@@ -1,345 +0,0 @@
-from copy import deepcopy
-from functools import reduce
-
-from neupy.algorithms.constructor import ConstructableNetwork
-from neupy.layers.connections import LayerConnection, is_sequential, LayerGraph
-from neupy import layers
-
-
-__all__ = ('cut', 'sew_together', 'CutLine', 'cut_along_lines',
-           'isolate_connection_if_needed')
-
-
-def isolate_connection_if_needed(connection):
-    """
-    Function copies connection or layer if one is related
-    to the other layers. In addition it gets rid of these
-    connections.
-
-    Parameters
-    ----------
-    connection : BaseLayer or LayerConnection instance
-        Layer or combined layers that needs to be validated.
-
-    Returns
-    -------
-    BaseLayer or LayerConnection instance
-        Copy of the object or the same instance.
-
-    Raises
-    ------
-    ValueError
-        If input data type is incorrect.
-    """
-    if isinstance(connection, layers.BaseLayer):
-        connection = deepcopy(connection)
-        connection.connection = None
-
-    elif isinstance(connection, LayerConnection):
-        connection = deepcopy(connection)
-
-        connection.input_layer.connection = None
-        connection.output_layer.connection = None
-        connection.connection = None
-
-    else:
-        raise TypeError("Unknown data type: {}. Surgery module supports "
-                        "only operations with layers and connections."
-                        "".format(type(connection)))
-
-    return connection
-
-
-def clean_and_validate_connection(connection):
-    """
-
-    Parameters
-    ----------
-    connection : ConstructableNetwork ot LayerConnection instance
-        Network class that has constructuble layers or
-        connected layers.
-
-    Returns
-    -------
-    LayerConnection
-    """
-    if isinstance(connection, ConstructableNetwork):
-        # Re-define variable to make it easy to understand that
-        # object in not a real connection.
-        # The two lines below looks more information that just
-        # write it as ``connection = connection.connection``
-        network = connection
-        connection = network.connection
-
-    if not isinstance(connection, LayerConnection):
-        raise ValueError("You can cut only layer connections.")
-
-    if not is_sequential(connection):
-        raise ValueError("Cannot cut connection that has non-sequential "
-                         "relations between layers.")
-
-    return connection
-
-
-def cut(connection, start, end):
-    """
-    Function cuts a specific part of the neural networks
-    structure. Function works in the same way as a slicing in
-    Python. You can think about it as a ``layers[start:end]``.
-
-    Parameters
-    ----------
-    connection : ConstructableNetwork ot LayerConnection instance
-        Network class that has constructuble layers or
-        connected layers.
-
-    start : int
-        Index of the first layer in the new sequence.
-
-    end : int
-        Index of the final layer in the new sequence.
-
-    Returns
-    -------
-    LayerConnection instance
-        Redefined connection between cutted layers.
-
-    Raises
-    ------
-    ValueError
-        In case if something is wrong with the input parameters.
-
-    Notes
-    -----
-    Works only for feedforward connections.
-
-    Examples
-    --------
-    >>> from neupy import layers, surgery
-    >>> layers = [
-    ...     layers.Input(10),
-    ...     layers.Sigmoid(20),
-    ...     layers.Sigmoid(30),
-    ...     layers.Sigmoid(40),
-    ... ]
-    >>> connection = surgery.sew_together(layers)
-    >>> connection
-    Input(10) > Sigmoid(20) > Sigmoid(30) > Sigmoid(40)
-    >>>
-    >>> surgery.cut(connection, start=1, end=3)
-    Sigmoid(20) > Sigmoid(30)
-    """
-    connection = clean_and_validate_connection(connection)
-
-    layers = list(connection)
-    n_layers = len(layers)
-
-    if end > n_layers:
-        raise ValueError("Cannot cut till the {} layer. Connection has "
-                         "only {} layers.".format(end, n_layers))
-
-    cutted_layers = layers[start:end]
-
-    for layer in cutted_layers:
-        layer.graph = LayerGraph()
-        layer.connection = None
-
-    if not cutted_layers:
-        raise ValueError("Specified slice didn't cut any layer.")
-
-    return sew_together(cutted_layers)
-
-
-def sew_together(connections):
-    """
-    Connect layers and connections together.
-
-    Parameters
-    ----------
-    connections : list
-        List of layers and layer connections.
-
-    Returns
-    -------
-    BaseLayer instance, LayerConnection instance or None
-        Combined layers and partial connections in one
-        bug connection. ``None`` result means that your input
-        is an empty list or tuple. If you get a layer instead
-        of connection it mean that you have just one layer in the
-        sequence.
-
-    Notes
-    -----
-    Functon make a copy of each layer/connection.
-
-    Examples
-    --------
-    >>> from neupy import layers, surgery
-    >>> connection = surgery.sew_together([
-    ...    layers.Input(784),
-    ...    layers.Relu(30) > layers.Relu(20),
-    ...    layers.Softmax(10),
-    ... ])
-    >>> connection
-    Input(784) > Relu(30) > Relu(20) > Softmax(10)
-    """
-    if not connections:
-        return
-
-    # cleaned_connections = connections
-    cleaned_connections = []
-    for connection in connections:
-        # Since connection can be related to some other network,
-        # we need to copy it and clean all old relations.
-        # It will help us to prevent errors in future.
-        connection = isolate_connection_if_needed(connection)
-        cleaned_connections.append(connection)
-
-    merged_connections = reduce(LayerConnection, cleaned_connections)
-    return merged_connections
-
-
-class CutLine(layers.BaseLayer):
-    """
-    Basic layer class that doesn't change network's structure.
-    This class just help you to define places where you need to
-    split your layer's structure.
-    """
-
-
-def iter_cutted_regions(cutted_regions):
-    """
-    Takes a list of integer and iterates over non-empty
-    slicing index pairs.
-
-    Parameters
-    ----------
-    cutted_regions : list of int
-        List of indeces that defines cut points.
-
-    Yields
-    ------
-    tuple with 2 int
-        It contains indeces that defines cut points
-        ``(left_index, right_index)``.
-
-    Examples
-    --------
-    >>> from neupy import surgery
-    >>> regions = [0, 1, 5, 7, 9, 10]
-    >>> for start, end in surgery.iter_cutted_regions(regions):
-    ...     print(start, end)
-    ...
-    1 4
-    5 6
-    7 8
-    """
-    left_bounds = cutted_regions
-    right_bounds = cutted_regions[1:]
-
-    for left_index, right_index in zip(left_bounds, right_bounds):
-        # We try to avoid cases when cutted region gives an empty slice.
-        if right_index - left_index > 1:
-            yield (left_index, right_index - 1)
-
-
-def find_cut_points(connection):
-    """
-    Function looks for the cut lines in the connection.
-
-    Parameters
-    ----------
-    connection : LayerConnection instance
-        Connected layers.
-
-    Returns
-    -------
-    list of int
-        The final result is a list of indeces that defines cut
-        line layers position. One important note is that function
-        automaticaly added the first and the last layers as a cut
-        points. It means that you will always get at least
-        two values in the list.
-
-    Notes
-    -----
-    It's better to use ``surgery.cut_along_lines`` function to cut
-    your layers.
-
-    Examples
-    --------
-    >>> from neupy import layers, surgery
-    >>> connection = layers.Input(10) > layers.Sigmoid(5)
-    >>> connection
-    Input(10) > Sigmoid(5)
-    >>> surgery.find_cut_points(connection)
-    [0, 3]
-    """
-    n_layers = len(connection)
-    # We assume that first index is always a 'cut line', that's
-    # wht we added 0 as a first index. We don't connection them
-    # directly at the beginning to prevent all issues related
-    # to the connection structure modifications
-    cut_points = [0]
-
-    # Since we 'added' cut line layer at the beginning, we need
-    # to start count other layers from first index
-    for i, layer in enumerate(connection, start=1):
-        if isinstance(layer, CutLine):
-            cut_points.append(i)
-
-    # We also assume that we have a final layer as a 'cut line'.
-    # And again we aren't adding layer directly. We just
-    # assuming that we have it in the network
-    cut_points.append(n_layers + 1)
-    return cut_points
-
-
-def cut_along_lines(connection):
-    """
-    Cuts layer's connection in the specified places.
-    in the places where you need to cut layer you need to set up
-    ``surgery.CutLine`` layer, that defines place where you need
-    to cut the network.
-
-    Parameters
-    ----------
-    connection : ConstructableNetwork ot LayerConnection instance
-        Network class that has constructuble layers or
-        connected layers.
-
-    Returns
-    -------
-    list
-        List that contains all cutted connections.
-
-    Examples
-    --------
-    >>> from neupy import algorithms, layers, surgery
-    >>> network = algorithms.GradientDescent([
-    ...     layers.Input(5),
-    ...     surgery.CutLine(),  # <- first cut point
-    ...     layers.Sigmoid(10),
-    ...     layers.Sigmoid(20),
-    ...     layers.Sigmoid(30),
-    ...     surgery.CutLine(),  # <- second cut point
-    ...     layers.Sigmoid(1),
-    ... ])
-    >>> cutted_connections = surgery.cut_along_lines(network)
-    >>>
-    >>> for connection in cutted_connections:
-    ...     print(connection)
-    ...
-    Input(5)
-    Sigmoid(10) > Sigmoid(20) > Sigmoid(30)
-    Sigmoid(1)
-    """
-    connection = clean_and_validate_connection(connection)
-    cut_points = find_cut_points(connection)
-
-    connections = []
-    for start, end in iter_cutted_regions(cut_points):
-        cutted_connection = cut(connection, start, end)
-        connections.append(cutted_connection)
-
-    return connections
diff --git a/neupy/utils.py b/neupy/utils.py
index 53fbb364..86cecd9f 100644
--- a/neupy/utils.py
+++ b/neupy/utils.py
@@ -8,22 +8,13 @@
 from scipy.sparse import issparse
 
 
-__all__ = ('format_data', 'does_layer_accept_1d_feature', 'asfloat',
-           'AttributeKeyDict', 'is_list_of_integers', 'preformat_value',
-           'as_array2d', 'NotTrainedException', 'smallest_positive_number',
+__all__ = ('format_data', 'asfloat', 'AttributeKeyDict', 'preformat_value',
            'as_tuple', 'asint', 'number_type', 'theano_random_stream')
 
 
 number_type = (int, float, np.floating, np.integer)
 
 
-class NotTrainedException(Exception):
-    """
-    Exception needs for cases when algorithm is not trained
-    and can not be applied.
-    """
-
-
 def format_data(data, is_feature1d=True, copy=False):
     """
     Transform data in a standardized format.
@@ -38,9 +29,11 @@ def format_data(data, is_feature1d=True, copy=False):
     data : array-like
         Data that should be formated. That could be, matrix, vector or
         Pandas DataFrame instance.
+
     is_feature1d : bool
         Should be equal to ``True`` if input data is a vector that
         contains N samples with 1 feature each. Defaults to ``True``.
+
     copy : bool
         Defaults to ``False``.
 
@@ -68,21 +61,6 @@ def format_data(data, is_feature1d=True, copy=False):
     return data
 
 
-def does_layer_accept_1d_feature(layer):
-    """
-    Check if 1D feature values are valid for the layer.
-
-    Parameters
-    ----------
-    layer : object
-
-    Returns
-    -------
-    bool
-    """
-    return (layer.output_shape == (1,))
-
-
 def asfloat(value):
     """
     Convert variable to float type configured by theano
@@ -183,24 +161,6 @@ def __delattr__(self, attrname):
         del self[attrname]
 
 
-def is_list_of_integers(sequence):
-    """
-    Check that sequence contains only integer numbers.
-
-    Parameters
-    ----------
-    sequence : list, tuple
-        Array that should be validated.
-
-    Returns
-    -------
-    bool
-        Result would be ``True`` only if each element in a sequence contains
-        is an integer. ``False`` otherwise.
-    """
-    return all(isinstance(element, int) for element in sequence)
-
-
 def preformat_value(value):
     """
     Function pre-format input value depending on it's type.
@@ -222,45 +182,10 @@ def preformat_value(value):
     elif isinstance(value, (np.ndarray, np.matrix)):
         return value.shape
 
-    return value
-
-
-def as_array2d(array):
-    """
-    Transform any array to 2D.
-
-    Parameters
-    ----------
-    array : array-like
+    elif hasattr(value, 'default'):
+        return value.default
 
-    Returns
-    -------
-    array-like
-        The same array transformed to 2D.
-    """
-    if array.ndim == 1:
-        return array.reshape((1, -1))
-
-    n_samples, feature_shape = array.shape[0], array.shape[1:]
-    return array.reshape((n_samples, np.prod(feature_shape)))
-
-
-def smallest_positive_number():
-    """
-    Based on the `floatX` variables function returns different
-    smallest positive numbers.
-
-    Returns
-    -------
-    float
-        Smallest positive float number.
-    """
-    float_type = theano.config.floatX
-    epsilon_values = {
-        'float32': 1e-7,
-        'float64': 1e-16,
-    }
-    return epsilon_values[float_type]
+    return value
 
 
 def as_tuple(*values):
@@ -281,8 +206,9 @@ def as_tuple(*values):
     --------
     >>> as_tuple(None, (1, 2, 3), None)
     (None, 1, 2, 3, None)
-    >>> as_tuple((1, 2, 3), (4, 5, 3))
-    (1, 2, 3, 4, 5, 3)
+    >>>
+    >>> as_tuple((1, 2, 3), (4, 5, 6))
+    (1, 2, 3, 4, 5, 6)
     """
     cleaned_values = []
     for value in values:
@@ -298,7 +224,7 @@ def theano_random_stream():
     Create Theano random stream instance.
     """
     # Use NumPy seed to make Theano code easely reproducible
-    max_possible_seed = 2147483647
+    max_possible_seed = 2147483647  # max 32-bit integer
     seed = np.random.randint(max_possible_seed)
     theano_random = T.shared_randomstreams.RandomStreams(seed)
     return theano_random
diff --git a/site/apidocs/neupy.exceptions.rst b/site/apidocs/neupy.exceptions.rst
new file mode 100644
index 00000000..7e71ac26
--- /dev/null
+++ b/site/apidocs/neupy.exceptions.rst
@@ -0,0 +1,7 @@
+neupy.exceptions module
+=======================
+
+.. automodule:: neupy.exceptions
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/site/apidocs/neupy.helpers.rst b/site/apidocs/neupy.helpers.rst
index f025f734..c604708b 100644
--- a/site/apidocs/neupy.helpers.rst
+++ b/site/apidocs/neupy.helpers.rst
@@ -8,7 +8,6 @@ Submodules
 
    neupy.helpers.logs
    neupy.helpers.progressbar
-   neupy.helpers.sms
    neupy.helpers.table
    neupy.helpers.terminal
 
diff --git a/site/apidocs/neupy.helpers.sms.rst b/site/apidocs/neupy.helpers.sms.rst
deleted file mode 100644
index c7d8bd84..00000000
--- a/site/apidocs/neupy.helpers.sms.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-neupy.helpers.sms module
-========================
-
-.. automodule:: neupy.helpers.sms
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/site/apidocs/neupy.layers.connections.base.rst b/site/apidocs/neupy.layers.connections.base.rst
new file mode 100644
index 00000000..5ac8e4f8
--- /dev/null
+++ b/site/apidocs/neupy.layers.connections.base.rst
@@ -0,0 +1,7 @@
+neupy.layers.connections.base module
+====================================
+
+.. automodule:: neupy.layers.connections.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/site/apidocs/neupy.layers.connections.graph.rst b/site/apidocs/neupy.layers.connections.graph.rst
new file mode 100644
index 00000000..d968ad7d
--- /dev/null
+++ b/site/apidocs/neupy.layers.connections.graph.rst
@@ -0,0 +1,7 @@
+neupy.layers.connections.graph module
+=====================================
+
+.. automodule:: neupy.layers.connections.graph
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/site/apidocs/neupy.layers.connections.rst b/site/apidocs/neupy.layers.connections.rst
index 97f1fa05..f917868d 100644
--- a/site/apidocs/neupy.layers.connections.rst
+++ b/site/apidocs/neupy.layers.connections.rst
@@ -1,5 +1,17 @@
-neupy.layers.connections module
-===============================
+neupy.layers.connections package
+================================
+
+Submodules
+----------
+
+.. toctree::
+
+   neupy.layers.connections.base
+   neupy.layers.connections.graph
+   neupy.layers.connections.utils
+
+Module contents
+---------------
 
 .. automodule:: neupy.layers.connections
     :members:
diff --git a/site/apidocs/neupy.layers.connections.utils.rst b/site/apidocs/neupy.layers.connections.utils.rst
new file mode 100644
index 00000000..5158e314
--- /dev/null
+++ b/site/apidocs/neupy.layers.connections.utils.rst
@@ -0,0 +1,7 @@
+neupy.layers.connections.utils module
+=====================================
+
+.. automodule:: neupy.layers.connections.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/site/apidocs/neupy.layers.parallel.rst b/site/apidocs/neupy.layers.parallel.rst
deleted file mode 100644
index df46ae5b..00000000
--- a/site/apidocs/neupy.layers.parallel.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-neupy.layers.parallel module
-============================
-
-.. automodule:: neupy.layers.parallel
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/site/apidocs/neupy.layers.rst b/site/apidocs/neupy.layers.rst
index d349948b..615f5c2d 100644
--- a/site/apidocs/neupy.layers.rst
+++ b/site/apidocs/neupy.layers.rst
@@ -1,6 +1,13 @@
 neupy.layers package
 ====================
 
+Subpackages
+-----------
+
+.. toctree::
+
+    neupy.layers.connections
+
 Submodules
 ----------
 
@@ -8,11 +15,9 @@ Submodules
 
    neupy.layers.activations
    neupy.layers.base
-   neupy.layers.connections
    neupy.layers.convolutions
    neupy.layers.merge
    neupy.layers.normalization
-   neupy.layers.parallel
    neupy.layers.pooling
    neupy.layers.recurrent
    neupy.layers.stochastic
diff --git a/site/apidocs/neupy.preprocessing.rst b/site/apidocs/neupy.preprocessing.rst
deleted file mode 100644
index d01b65a9..00000000
--- a/site/apidocs/neupy.preprocessing.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-neupy.preprocessing module
-==========================
-
-.. automodule:: neupy.preprocessing
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/site/apidocs/neupy.rst b/site/apidocs/neupy.rst
index ffba057d..effecf32 100644
--- a/site/apidocs/neupy.rst
+++ b/site/apidocs/neupy.rst
@@ -21,10 +21,9 @@ Submodules
 
    neupy.environment
    neupy.estimators
+   neupy.exceptions
    neupy.init
-   neupy.preprocessing
    neupy.storage
-   neupy.surgery
    neupy.utils
 
 Module contents
diff --git a/site/apidocs/neupy.surgery.rst b/site/apidocs/neupy.surgery.rst
deleted file mode 100644
index 27a26332..00000000
--- a/site/apidocs/neupy.surgery.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-neupy.surgery module
-====================
-
-.. automodule:: neupy.surgery
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/site/docs/algorithms/signals.rst b/site/docs/algorithms/signals.rst
index 9798569b..7b6eb826 100644
--- a/site/docs/algorithms/signals.rst
+++ b/site/docs/algorithms/signals.rst
@@ -46,11 +46,11 @@ The other useful feature releated to the signals is that you can can implement y
 .. code-block:: python
 
     from neupy import algorithms
-    from neupy.algorithms import StopTrainingException
+    from neupy.exceptions import StopTraining
 
     def on_epoch_end(network):
         if network.errors.last() < 0.001:
-            raise StopTrainingException("Stop training")
+            raise StopTraining("Stop training")
 
     gdnet = algorithms.GradientDescent(
         (2, 3, 1),
diff --git a/site/docs/layers/basics.rst b/site/docs/layers/basics.rst
index 9807ca95..fd80ff41 100644
--- a/site/docs/layers/basics.rst
+++ b/site/docs/layers/basics.rst
@@ -58,31 +58,6 @@ Even thought we know the output shape we don't know an input. To be able to cons
     >>> connection.input_shape
     (3,)
 
-Layer initialization
---------------------
-
-Since layers are defined independently from each other we cannot perfom all initialization procedure after we connected layers. To be able to do that we need to call ``initialization`` method when all connections are defined.
-
-.. code-block:: python
-
-    >>> from neupy import layers
-    >>>
-    >>> sigmoid_layer = layers.Sigmoid(3)
-    >>> connection = layers.Input(2) > sigmoid_layer
-    >>>
-    >>> sigmoid_layer.weight.get_value()
-    Traceback (most recent call last):
-      File "<stdin>", line 1, in <module>
-    neupy.init.UninitializedException: Cannot get parameter value.
-    Parameter hasn't been initialized yet
-    >>>
-    >>> connection.initialize()
-    >>> sigmoid_layer.weight.get_value()
-    array([[ 0.90131086,  0.38221973, -0.69804142],
-           [-0.54882893,  0.81503922, -0.53348505]])
-
-Only after the initialization we was able to get parameter.
-
 Mutlilayer Perceptron (MLP)
 ===========================
 
@@ -218,17 +193,16 @@ Any connection between layers in NeuPy is a `Directional Acyclic Graph (DAG) <ht
 
     connection = layers.join(
         layers.Input((3, 10, 10)),
-        layers.Parallel(
-            [[
-                layers.Convolution((32, 3, 3)),
-                layers.Relu(),
-                layers.MaxPooling((2, 2)),
-            ], [
-                layers.Convolution((16, 7, 7)),
-                layers.Relu(),
-            ]],
-            layers.Concatenate()
-        ),
+        [[
+            layers.Convolution((32, 3, 3)),
+            layers.Relu(),
+            layers.MaxPooling((2, 2)),
+        ], [
+            layers.Convolution((16, 7, 7)),
+            layers.Relu(),
+        ]],
+        layers.Concatenate()
+
         layers.Reshape(),
         layers.Softmax(10),
     )
@@ -237,7 +211,7 @@ Any connection between layers in NeuPy is a `Directional Acyclic Graph (DAG) <ht
     :align: center
     :alt: Graph connections in NeuPy
 
-You can see two new layers. The first one is the :layer:`Parallel` layer. This layer accepts two parameters. First one is an array of multiple connections. As you can see from the figure above each of the connections above accepts the same input, but each of the do different transformation to this input. The second parameter is an layer that accepts multiple inputs and combine then into single output. From our example we can see that from the left branch we got output shape equal to ``(32, 4, 4)`` and from the right branch - ``(16, 4, 4)``. The :layer:`Concatenate` layer joins layers over the firts dimension and as output returns tensor with shape ``(48, 4, 4)```
+You can see two new layers. The first one is the Parallel layer. This layer accepts two parameters. First one is an array of multiple connections. As you can see from the figure above each of the connections above accepts the same input, but each of the do different transformation to this input. The second parameter is an layer that accepts multiple inputs and combine then into single output. From our example we can see that from the left branch we got output shape equal to ``(32, 4, 4)`` and from the right branch - ``(16, 4, 4)``. The :layer:`Concatenate` layer joins layers over the firts dimension and as output returns tensor with shape ``(48, 4, 4)```
 
 .. raw:: html
 
diff --git a/site/docs/layers/debug.rst b/site/docs/layers/debug.rst
index fc82c2ba..1628b924 100644
--- a/site/docs/layers/debug.rst
+++ b/site/docs/layers/debug.rst
@@ -55,17 +55,17 @@ For the debugging it's useful to explore connection's structure. It's possible t
 
     connection = layers.join(
         layers.Input((3, 10, 10)),
-        layers.Parallel(
-            [[
-                layers.Convolution((32, 3, 3)),
-                layers.Relu(),
-                layers.MaxPooling((2, 2)),
-            ], [
-                layers.Convolution((16, 7, 7)),
-                layers.Relu(),
-            ]],
-            layers.Concatenate()
-        ),
+
+        [[
+            layers.Convolution((32, 3, 3)),
+            layers.Relu(),
+            layers.MaxPooling((2, 2)),
+        ], [
+            layers.Convolution((16, 7, 7)),
+            layers.Relu(),
+        ]],
+        layers.Concatenate()
+
         layers.Reshape(),
         layers.Softmax(10),
     )
@@ -108,8 +108,6 @@ Count number of parameters
     ...     layers.Relu(5),  # weight: 10 * 5, bias: 5, total: 55
     ...     layers.Relu(2),  # weight:  5 * 2, bias: 2, total: 12
     ... )
-    >>> connection.initialize()
-    >>>
     >>> layers.count_parameters(connection)
     67
 
diff --git a/site/docs/secondary.rst b/site/docs/secondary.rst
index ee4e1f64..36aed85b 100644
--- a/site/docs/secondary.rst
+++ b/site/docs/secondary.rst
@@ -7,7 +7,5 @@ This part of documentation contains secondary feature from NeuPy library.
     :glob:
     :maxdepth: 2
 
-    secondary/notifications
     secondary/environments
-    secondary/surgery
     secondary/visualizations
diff --git a/site/docs/secondary/notifications.rst b/site/docs/secondary/notifications.rst
deleted file mode 100644
index ef0682a1..00000000
--- a/site/docs/secondary/notifications.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-Notifications
-=============
-
-In NeuPy only Twilio SMS notifications are available, but you can use your
-own implementation for anything you need.
-So the basic idea is that you override signal, for example ``train_end_signal``, and define a notification inside the new function.
-
-Before useing Twilio API you should first install a library.
-
-.. code-block:: bash
-
-    $ pip install twilio
-
-There is the simple example of program which will send SMS when network training
-process will be finished.
-
-.. code-block:: python
-
-    from neupy import algorithms
-    from neupy.helpers.sms import twilio_sms
-
-    send_sms = twilio_sms(
-        account_id="MY_ACCOUNT_ID",
-        token="SECRET_TOKEN",
-        to_phone="+XXXXXXXXXX",
-        from_phone="+XXXXXXXXXX",
-        verbose=True
-    )
-
-    def on_train_end(network):
-        last_error = network.errors.last()
-        send_sms("Training finished. Last error: {}".format(last_error))
-
-    lmnet = algorithms.LevenbergMarquardt(
-        (10, 40, 1),
-        train_end_signal=on_train_end
-    )
-
-More information about signals you can read in `documentation <signals.html>`_.
diff --git a/site/docs/secondary/surgery.rst b/site/docs/secondary/surgery.rst
deleted file mode 100644
index 3b2464e1..00000000
--- a/site/docs/secondary/surgery.rst
+++ /dev/null
@@ -1,156 +0,0 @@
-Neural Network Surgery
-======================
-
-It's very easy to define relations between layers in NeuPy. Also NeuPy's syntax makes it easy to read and understand your network's strucutre. But any method should have its pros and cons. The main disadvantage of this approach is that it's a bit complecated to use a part of the network when you define full network. To solve this issue in NeuPy exists module named as a surgery. The idea is simple. You still can define your networks using very simple syntax and then cut pieces from them and connect these pieces together in the new networks.
-
-Cutting layers from the network
-*******************************
-
-Cut is a first feature of a surgery module. You can cut network in a few different ways. To make it simple to understand I'm going to consider a simple pretraining autoencoder problem. Let's define the autoencoder network.
-
-.. code-block:: python
-
-    from neupy import algorithms, layers
-
-    autoencoder = algorithms.Momentum(
-        [
-            layers.Input(784),
-            layers.Sigmoid(100),
-            layers.Sigmoid(784),
-        ],
-        step=0.1,
-        momentum=0.99,
-        verbose=True,
-        error='rmse',
-    )
-
-    x_train, y_train = load_mnist()
-    autoencoder.train(x_train, x_train, epochs=20)
-
-As you can see we define a very simple autoencoder that trains over 20 epochs. Trained networks contain two pieces that usually known as encoder and decoder networks. In case of pretraining we are interested in encoding layer. Let's cut this part.
-
-.. code-block:: python
-
-    >>> from neupy import surgery
-    >>> encoder = surgery.cut(autoencoder, start=0, end=2)
-    >>> encoder
-    Input(784) > Sigmoid(100)
-
-As you can see this operation is similar to Python's slicings. Basically this operation do something like this.
-
-.. code-block:: python
-
-    # NOTE: This is a pseudo-code, so it will
-    # not work in the NeuPy
-    encoder = autoencoder_layers[0:2]
-
-Since we have a part of the network that do an encoding procedure we can attach pretrained part to the other network that will manage to do classification.
-
-.. code-block:: python
-
-    classifier = algorithms.Momentum(
-        encoder > layers.Softmax(10),
-        step=0.1,
-        momentum=0.99,
-        verbose=True,
-        error='categorical_crossentropy',
-    )
-    classifier.train(x_train, y_train, epochs=10)
-
-That's it, now you have classifier with pretrained layer. Now you can check its final structure.
-
-.. code-block:: python
-
-    >>> classifier.architecture()
-    -----------------------------------------------
-    | # | Input shape | Layer Type | Output shape |
-    -----------------------------------------------
-    | 1 | 784         | Input      | 784          |
-    | 2 | 784         | Sigmoid    | 100          |
-    | 3 | 100         | Softmax    | 10           |
-    -----------------------------------------------
-
-Such a method can be unsutable when you deal with networks that have more than 10 layers. To make simplify these procedure for the bigger networks NeuPy gives an ability to predefine places where you need to cut network into pieces. Surgery module contains class ``CutLine``. These class defines places where you want to cut network. Let's consider another example. Suppose we need to cut all hidden layers from the network. Here is an example on how we can do that with predefined layout.
-
-.. code-block:: python
-
-    from neupy import algorithms, layers, surgery
-    network = algorithms.GradientDescent([
-        layers.Input(5),
-
-        surgery.CutLine(),  # <- first cut point
-
-        layers.Sigmoid(10),
-        layers.Sigmoid(20),
-        layers.Sigmoid(30),
-
-        surgery.CutLine(),  # <- second cut point
-
-        layers.Sigmoid(1),
-    ])
-
-In the surgery module there exists another function that can do this procedure. Here is how it works.
-
-.. code-block:: python
-
-    >>> cutted_connections = surgery.cut_along_lines(network)
-    >>>
-    >>> for connection in cutted_connections:
-    ...     print(connection)
-    ...
-    Input(5)
-    Sigmoid(10) > Sigmoid(20) > Sigmoid(30)
-    Sigmoid(1)
-
-It returns a list that contains all of these pieces. Now you can get piece that yu are interested in.
-
-.. code-block:: python
-
-    >>> _, hidden_layers, _ = cutted_connections
-    >>> hidden_layers
-    Sigmoid(10) > Sigmoid(20) > Sigmoid(30)
-
-As in the autoencoder case we can use this layers in the other networks
-
-In case if you need to use cutted layers without additional training you can compile them into Theano function.
-
-.. code-block:: python
-
-    >>> import theano
-    >>> import theano.tensor as T
-    >>> import numpy as np
-    >>>
-    >>> x = T.matrix('x')
-    >>> y = hidden_layers.output(x)
-    >>>
-    >>> f = theano.function([x], y)
-    >>>
-    >>> input_matrix = np.random.random((100, 5))
-    >>> output_matrix = f(input_matrix)
-    >>> output_matrix.shape
-    (100, 30)
-
-Sew layers together
-*******************
-
-Surgery module not only can break networks into pieces, but it also cut join different pieces together. It's known as **sewing**. We can use cutted layers from the previous example.
-
-.. code-block:: python
-
-    >>> encoder
-    Input(784) > Sigmoid(100)
-    >>>>
-    >>> hidden_layers
-    Sigmoid(10) > Sigmoid(20) > Sigmoid(30)
-
-Let's imagine that we need to join them together. The main problem is that we are not able to combine networks together. The reason is that the ``encoder`` produces 100 dimensional output, while ``hidden_layers`` expects 5 dimensional input. To connect them together we need to define intermidiate layer.
-
-.. code-block:: python
-
-    >>> connected_layers = surgery.sew_together([
-    ...     encoder,
-    ...     layers.Relu(5),
-    ...     hidden_layers
-    ... ])
-    >>> connected_layers
-    Input(784) > Sigmoid(100) > Relu(5) > Sigmoid(10) > Sigmoid(20) > Sigmoid(30)
diff --git a/site/pages/cheatsheet.rst b/site/pages/cheatsheet.rst
index e899f4a9..9e1ab114 100644
--- a/site/pages/cheatsheet.rst
+++ b/site/pages/cheatsheet.rst
@@ -198,7 +198,6 @@ Other layers
 
     ":layer:`Input`", "Layer defines input value's feature shape"
     ":layer:`Reshape`", "Reshape tensor input"
-    ":layer:`Parallel`", "Parallel layer connections"
     ":layer:`Embedding`", "Embedding layer accepts indeces as an input and returns rows from the weight matrix associated with these indeces."
 
 .. _init-methods:
diff --git a/tests/algorithms/associative/test_oja.py b/tests/algorithms/associative/test_oja.py
index 826e8ec4..cd83bb31 100644
--- a/tests/algorithms/associative/test_oja.py
+++ b/tests/algorithms/associative/test_oja.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from neupy import algorithms, init
-from neupy.utils import NotTrainedException
+from neupy.exceptions import NotTrained
 
 from base import BaseTestCase
 from utils import vectors_for_testing
@@ -48,11 +48,11 @@ def test_oja_exceptions(self):
         ojanet = algorithms.Oja(minimized_data_size=1, step=0.01,
                                 verbose=False)
 
-        with self.assertRaises(NotTrainedException):
+        with self.assertRaises(NotTrained):
             # Can't reconstruct without training
             ojanet.reconstruct(np.random.random((4, 1)))
 
-        with self.assertRaises(NotTrainedException):
+        with self.assertRaises(NotTrained):
             # Can't predict without training
             ojanet.predict(np.random.random((4, 1)))
 
diff --git a/tests/algorithms/gd/test_errors.py b/tests/algorithms/gd/test_errors.py
index 7094603f..09274221 100644
--- a/tests/algorithms/gd/test_errors.py
+++ b/tests/algorithms/gd/test_errors.py
@@ -131,3 +131,8 @@ def test_categorical_hinge_without_one_hot_encoding(self):
     def test_categorical_hinge_invalid_dimension(self):
         with self.assertRaises(TypeError):
             errors.categorical_hinge(T.tensor3(), T.matrix())
+
+    def test_smallest_positive_number(self):
+        epsilon = errors.smallest_positive_number()
+        self.assertNotEqual(0, asfloat(1) - (asfloat(1) - asfloat(epsilon)))
+        self.assertEqual(0, asfloat(1) - (asfloat(1) - asfloat(epsilon / 10)))
diff --git a/tests/algorithms/linear/test_perceptron.py b/tests/algorithms/linear/test_perceptron.py
index c71ca061..907d1a1a 100644
--- a/tests/algorithms/linear/test_perceptron.py
+++ b/tests/algorithms/linear/test_perceptron.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from neupy import algorithms, layers
-from neupy.layers.connections import NetworkConnectionError
+from neupy.exceptions import InvalidConnection
 
 from base import BaseTestCase
 
@@ -14,7 +14,7 @@ def test_perceptron_init_errors(self):
         with self.assertRaises(ValueError):
             algorithms.Perceptron((2, 2.5), verbose=False)
 
-        with self.assertRaises(NetworkConnectionError):
+        with self.assertRaises(InvalidConnection):
             algorithms.Perceptron(
                 layers.Input(2) > layers.Sigmoid(1),
                 verbose=False
diff --git a/tests/algorithms/memory/test_bam.py b/tests/algorithms/memory/test_bam.py
index 8f7b2e31..bcd85a6a 100644
--- a/tests/algorithms/memory/test_bam.py
+++ b/tests/algorithms/memory/test_bam.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from neupy import algorithms
-from neupy.utils import NotTrainedException
+from neupy.exceptions import NotTrained
 
 from algorithms.memory.data import zero, one, half_one, half_zero
 from base import BaseTestCase
@@ -29,7 +29,7 @@ def test_bam_prediction_method(self):
             dbnet.prediction(np.array([0, 1]), np.array([0, 1]))
 
     def test_bam_exceptions(self):
-        with self.assertRaises(NotTrainedException):
+        with self.assertRaises(NotTrained):
             dbnet = algorithms.DiscreteBAM()
             dbnet.predict(np.array([-1, 1]))
 
diff --git a/tests/algorithms/rbfn/test_grnn.py b/tests/algorithms/rbfn/test_grnn.py
index 958af5d2..0a38eb1c 100644
--- a/tests/algorithms/rbfn/test_grnn.py
+++ b/tests/algorithms/rbfn/test_grnn.py
@@ -3,7 +3,7 @@
 from sklearn.model_selection import train_test_split
 
 from neupy import algorithms
-from neupy.utils import NotTrainedException
+from neupy.exceptions import NotTrained
 
 from base import BaseTestCase
 
@@ -27,7 +27,7 @@ def test_grrn_exceptions(self):
             grnet.train(np.array([[0], [0]]), np.array([0]))
             grnet.predict(np.array([[0]]))
 
-        with self.assertRaises(NotTrainedException):
+        with self.assertRaises(NotTrained):
             # Prediction without training
             grnet = algorithms.GRNN(verbose=False)
             grnet.predict(np.array([[0]]))
diff --git a/tests/algorithms/rbfn/test_pnn.py b/tests/algorithms/rbfn/test_pnn.py
index 55be9d69..3ef2ab02 100644
--- a/tests/algorithms/rbfn/test_pnn.py
+++ b/tests/algorithms/rbfn/test_pnn.py
@@ -6,7 +6,7 @@
 from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from neupy import algorithms
-from neupy.utils import NotTrainedException
+from neupy.exceptions import NotTrained
 
 from base import BaseTestCase
 
@@ -31,7 +31,7 @@ def test_handle_errors(self):
             pnnet.predict(np.array([[0]]))
 
         msg = "hasn't been trained"
-        with self.assertRaisesRegexp(NotTrainedException, msg):
+        with self.assertRaisesRegexp(NotTrained, msg):
             # predict without training
             pnnet = algorithms.PNN(verbose=False)
             pnnet.predict(np.array([[0]]))
diff --git a/tests/algorithms/test_constructor.py b/tests/algorithms/test_constructor.py
index e42386e7..5c10abec 100644
--- a/tests/algorithms/test_constructor.py
+++ b/tests/algorithms/test_constructor.py
@@ -1,11 +1,11 @@
 import theano.tensor as T
 
 from neupy import layers
-from neupy.layers.connections import NetworkConnectionError
-from neupy.algorithms.constructor import (generate_layers,
-                                          create_input_variable,
+from neupy.exceptions import InvalidConnection
+from neupy.algorithms.constructor import (create_input_variable,
                                           create_output_variable,
-                                          ConstructableNetwork)
+                                          ConstructableNetwork,
+                                          generate_layers)
 
 from base import BaseTestCase
 
@@ -31,25 +31,21 @@ def error_func(expected, predicted):
 
 class ConstructableNetworkTestCase(BaseTestCase):
     def test_multi_input_exception(self):
-        input_layer_1 = layers.Input(10)
-        input_layer_2 = layers.Input(10)
-        output_layer = layers.Concatenate()
+        connection = layers.join([
+            [layers.Input(10)],
+            [layers.Input(10)],
+        ]) > layers.Concatenate()
 
-        connection = layers.join(input_layer_1, output_layer)
-        connection = layers.join(input_layer_2, output_layer)
-
-        with self.assertRaises(NetworkConnectionError):
+        with self.assertRaises(InvalidConnection):
             ConstructableNetwork(connection)
 
     def test_multi_output_exception(self):
-        input_layer = layers.Input(10)
-        output_layer_1 = layers.Sigmoid(20)
-        output_layer_2 = layers.Sigmoid(30)
-
-        connection = layers.join(input_layer, output_layer_1)
-        connection = layers.join(input_layer, output_layer_2)
+        connection = layers.Input(10) > [
+            [layers.Sigmoid(20)],
+            [layers.Sigmoid(30)],
+        ]
 
-        with self.assertRaises(NetworkConnectionError):
+        with self.assertRaises(InvalidConnection):
             ConstructableNetwork(connection)
 
     def test_no_updates_by_default(self):
diff --git a/tests/algorithms/test_network_features.py b/tests/algorithms/test_network_features.py
index 3b8c1260..515b01a5 100644
--- a/tests/algorithms/test_network_features.py
+++ b/tests/algorithms/test_network_features.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn import datasets
 from neupy import algorithms, layers
-from neupy.algorithms import StopTrainingException
+from neupy.exceptions import StopTraining
 from neupy.algorithms.base import (ErrorHistoryList, show_network_options,
                                    logging_info_about_the_data,
                                    parse_show_epoch_property)
@@ -49,7 +49,7 @@ def test_train_and_test_dataset_training(self):
     def test_stop_iteration(self):
         def stop_training_after_the_5th_epoch(network):
             if network.last_epoch == 5:
-                raise StopTrainingException("Stopped training")
+                raise StopTraining("Stopped training")
 
         data, target = datasets.make_classification(30, n_features=10,
                                                     n_classes=2)
diff --git a/tests/base.py b/tests/base.py
index a42eab3a..f8ecf689 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from neupy import environment
+from neupy import environment, layers
 
 from utils import vectors_for_testing
 
@@ -25,6 +25,9 @@ def setUp(self):
             # simple so some Theano optimizations can be redundant.
             environment.sandbox()
 
+        # Clean identifiers map for each test
+        layers.BaseLayer.global_identifiers_map = {}
+
     def assertInvalidVectorTrain(self, network, input_vector, target=None,
                                  decimal=5, is_feature1d=True, **train_kwargs):
         """
diff --git a/tests/core/test_properties.py b/tests/core/test_properties.py
index 135b545c..ac7f33ec 100644
--- a/tests/core/test_properties.py
+++ b/tests/core/test_properties.py
@@ -70,9 +70,14 @@ def test_property_get_method(self):
 
     def test_property_repr(self):
         prop = Property(default=3)
-        self.assertEqual('Property()', str(prop))
         self.assertEqual('Property()', repr(prop))
 
+    def test_property_repr_with_name(self):
+        prop = Property(default=3)
+        prop.name = 'test'
+
+        self.assertEqual('Property(name="test")', repr(prop))
+
 
 class BoundedPropertiesTestCase(BaseTestCase):
     def test_bounded_properties(self):
diff --git a/tests/core/test_storage.py b/tests/core/test_storage.py
index ace910b9..85736594 100644
--- a/tests/core/test_storage.py
+++ b/tests/core/test_storage.py
@@ -158,7 +158,6 @@ def test_simple_storage(self):
             layers.Sigmoid(5),
             layers.Sigmoid(2),
         )
-        connection.initialize()
 
         with tempfile.NamedTemporaryFile() as temp:
             storage.save(connection, temp.name)
@@ -188,7 +187,6 @@ def test_storage_save_load_save(self):
             layers.Sigmoid(5),
             layers.Sigmoid(2),
         )
-        connection.initialize()
 
         with tempfile.NamedTemporaryFile() as temp:
             storage.save(connection, temp.name)
@@ -221,7 +219,6 @@ def test_storage_load_unknown_parameter(self):
             layers.Input(10),
             layers.Relu(1),
         )
-        connection.initialize()
 
         with self.assertRaisesRegexp(ValueError, "Cannot load parameters"):
             storage.load(connection, {}, ignore_missed=False)
@@ -232,7 +229,6 @@ def test_storage_load_unknown_parameter(self):
     def test_storage_load_from_dict(self):
         relu = layers.Relu(2, name='relu')
         connection = layers.Input(10) > relu
-        connection.initialize()
 
         weight = np.ones((10, 2))
         bias = np.ones((2,))
diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py
index d635f503..c11f8092 100644
--- a/tests/core/test_utils.py
+++ b/tests/core/test_utils.py
@@ -4,9 +4,8 @@
 import numpy as np
 from scipy.sparse import csr_matrix
 
-from neupy.utils import (preformat_value, as_array2d, AttributeKeyDict, asint,
-                         smallest_positive_number, asfloat, format_data,
-                         as_tuple)
+from neupy.utils import (preformat_value, as_tuple, AttributeKeyDict, asint,
+                         asfloat, format_data)
 from neupy.algorithms.utils import shuffle, iter_until_converge
 from neupy.layers.utils import preformat_layer_shape
 from neupy import algorithms
@@ -52,11 +51,6 @@ def test_preformat_layer_shape(self):
         self.assertEqual((2, 3, 1), preformat_layer_shape((2, 3, 1)))
         self.assertEqual(10, preformat_layer_shape((10,)))
 
-    def test_as_array2d(self):
-        test_input = np.ones(5)
-        actual_output = as_array2d(test_input)
-        self.assertEqual((1, 5), actual_output.shape)
-
     def test_attribute_key_dict(self):
         attrdict = AttributeKeyDict(val1='hello', val2='world')
 
@@ -76,11 +70,6 @@ def test_attribute_key_dict(self):
         with self.assertRaises(KeyError):
             attrdict.val1
 
-    def test_smallest_positive_number(self):
-        epsilon = smallest_positive_number()
-        self.assertNotEqual(0, asfloat(1) - (asfloat(1) - asfloat(epsilon)))
-        self.assertEqual(0, asfloat(1) - (asfloat(1) - asfloat(epsilon / 10)))
-
     def test_format_data(self):
         # None input
         self.assertEqual(format_data(None), None)
diff --git a/tests/layers/test_activation_layers.py b/tests/layers/test_activation_layers.py
index be61358c..2d54ea82 100644
--- a/tests/layers/test_activation_layers.py
+++ b/tests/layers/test_activation_layers.py
@@ -98,9 +98,7 @@ def test_elu_layer(self):
     def test_linear_layer_withut_bias(self):
         input_layer = layers.Input(10)
         output_layer = layers.Linear(2, weight=init.Constant(0.1), bias=None)
-
         connection = input_layer > output_layer
-        connection.initialize()
 
         self.assertEqual(output_layer.bias_shape, None)
 
@@ -129,7 +127,6 @@ def test_invalid_alpha_axes_parameter(self):
     def test_prelu_random_params(self):
         prelu_layer = layers.PRelu(10, alpha=init.XavierNormal())
         layers.Input(10) > prelu_layer
-        prelu_layer.initialize()
 
         alpha = prelu_layer.alpha.get_value()
         self.assertEqual(10, np.unique(alpha).size)
@@ -137,7 +134,6 @@ def test_prelu_random_params(self):
     def test_prelu_layer_param_dense(self):
         prelu_layer = layers.PRelu(10, alpha=0.25)
         layers.Input(10) > prelu_layer
-        prelu_layer.initialize()
 
         alpha = prelu_layer.alpha.get_value()
 
@@ -148,10 +144,8 @@ def test_prelu_layer_param_conv(self):
         input_layer = layers.Input((3, 10, 10))
         conv_layer = layers.Convolution((5, 3, 3))
         prelu_layer = layers.PRelu(alpha=0.25, alpha_axes=(1, 3))
-        input_layer > conv_layer > prelu_layer
 
-        conv_layer.initialize()
-        prelu_layer.initialize()
+        input_layer > conv_layer > prelu_layer
 
         alpha = prelu_layer.alpha.get_value()
         expected_alpha = np.ones((5, 8)) * 0.25
@@ -162,7 +156,6 @@ def test_prelu_layer_param_conv(self):
     def test_prelu_output_by_dense_input(self):
         prelu_layer = layers.PRelu(1, alpha=0.25)
         layers.Input(1) > prelu_layer
-        prelu_layer.initialize()
 
         input_data = np.array([[10, 1, 0.1, 0, -0.1, -1]]).T
         expected_output = np.array([[10, 1, 0.1, 0, -0.025, -0.25]]).T
@@ -176,10 +169,8 @@ def test_prelu_output_by_spatial_input(self):
         input_layer = layers.Input((3, 10, 10))
         conv_layer = layers.Convolution((5, 3, 3))
         prelu_layer = layers.PRelu(alpha=0.25, alpha_axes=(1, 3))
-        connection = input_layer > conv_layer > prelu_layer
 
-        conv_layer.initialize()
-        prelu_layer.initialize()
+        connection = input_layer > conv_layer > prelu_layer
 
         actual_output = input_data
         for layer in connection:
diff --git a/tests/layers/test_basic_layers.py b/tests/layers/test_basic_layers.py
index b5bbbd62..55c3dce1 100644
--- a/tests/layers/test_basic_layers.py
+++ b/tests/layers/test_basic_layers.py
@@ -43,8 +43,7 @@ def test_layer_default_name(self):
         input_layer = layers.Input(10)
         output_layer = layers.Sigmoid(1)
 
-        connection = layers.join(input_layer, output_layer)
-        connection.initialize()
+        layers.join(input_layer, output_layer)
 
         self.assertEqual(output_layer.name, 'sigmoid-1')
         self.assertEqual(input_layer.name, 'input-1')
@@ -58,8 +57,7 @@ def test_layer_name_for_connection(self):
         hidden_layer = layers.Sigmoid(5)
         output_layer = layers.Sigmoid(10)
 
-        connection = layers.join(input_layer, hidden_layer, output_layer)
-        connection.initialize()
+        layers.join(input_layer, hidden_layer, output_layer)
 
         name = generate_layer_name(hidden_layer)
         self.assertEqual(name, 'sigmoid-1')
@@ -82,8 +80,7 @@ def test_layer_name_for_connected_subgraphs(self):
         output_layer = layers.Relu(4)
         conn2 = hidden2_layer > output_layer
 
-        conn = conn1 > conn2
-        conn.initialize()
+        conn1 > conn2
 
         self.assertEqual(input_layer.name, 'input-1')
         self.assertEqual(hidden1_layer.name, 'relu-1')
diff --git a/tests/layers/test_connections.py b/tests/layers/test_connections.py
index af6c8186..1d69039a 100644
--- a/tests/layers/test_connections.py
+++ b/tests/layers/test_connections.py
@@ -7,8 +7,9 @@
 from neupy import layers, algorithms
 from neupy.utils import asfloat, as_tuple
 from neupy.layers import Input, Relu, Tanh, Sigmoid
-from neupy.layers.connections import (is_sequential, merge_dicts_with_list,
-                                      does_layer_expect_one_input)
+from neupy.layers.connections import is_sequential
+from neupy.layers.connections.graph import (merge_dicts_with_list,
+                                            does_layer_expect_one_input)
 
 from base import BaseTestCase
 
@@ -36,7 +37,7 @@ def test_connection_inside_connection_mlp(self):
         connection.initialize()
 
         expected_sizes = [2, 10, 4, 7, 3, 1]
-        for layer, expected_size in zip(connection.layers, expected_sizes):
+        for layer, expected_size in zip(connection, expected_sizes):
             self.assertEqual(expected_size, layer.size)
 
     def test_connection_inside_connection_conv(self):
@@ -54,11 +55,13 @@ def test_connection_inside_connection_conv(self):
 
         self.assertEqual(8, len(connection))
 
-        self.assertIsInstance(connection.layers[1], layers.Convolution)
-        self.assertIsInstance(connection.layers[2], layers.Relu)
-        self.assertIsInstance(connection.layers[3], layers.Convolution)
-        self.assertIsInstance(connection.layers[4], layers.Relu)
-        self.assertIsInstance(connection.layers[5], layers.MaxPooling)
+        expected_order = [
+            layers.Input, layers.Convolution, layers.Relu,
+            layers.Convolution, layers.Relu, layers.MaxPooling,
+            layers.Reshape, layers.Softmax
+        ]
+        for actual_layer, expected_layer in zip(connection, expected_order):
+            self.assertIsInstance(actual_layer, expected_layer)
 
     def test_connection_shapes(self):
         connection = Input(2) > Relu(10) > Tanh(1)
@@ -178,32 +181,24 @@ def test_dict_based_inputs_into_connection(self):
 
     def test_connections_with_complex_parallel_relations(self):
         input_layer = layers.Input((3, 5, 5))
-        connection = layers.Parallel(
+        connection = layers.join(
             [[
                 layers.Convolution((8, 1, 1)),
             ], [
                 layers.Convolution((4, 1, 1)),
-                layers.Parallel(
-                    [[
-
-                        layers.Convolution((2, 1, 3), padding=(0, 1)),
-                    ], [
-                        layers.Convolution((2, 3, 1), padding=(1, 0)),
-                    ]],
-                    layers.Concatenate(),
-                )
+                [[
+                    layers.Convolution((2, 1, 3), padding=(0, 1)),
+                ], [
+                    layers.Convolution((2, 3, 1), padding=(1, 0)),
+                ]],
             ], [
                 layers.Convolution((8, 1, 1)),
                 layers.Convolution((4, 3, 3), padding=1),
-                layers.Parallel(
-                    [[
-
-                        layers.Convolution((2, 1, 3), padding=(0, 1)),
-                    ], [
-                        layers.Convolution((2, 3, 1), padding=(1, 0)),
-                    ]],
-                    layers.Concatenate(),
-                )
+                [[
+                    layers.Convolution((2, 1, 3), padding=(0, 1)),
+                ], [
+                    layers.Convolution((2, 3, 1), padding=(1, 0)),
+                ]],
             ], [
                 layers.MaxPooling((3, 3), stride=(1, 1), padding=1),
                 layers.Convolution((8, 1, 1)),
@@ -229,7 +224,6 @@ def test_is_sequential_connection(self):
         layer = layers.Input(10)
         self.assertTrue(is_sequential(layer))
 
-    @unittest.skip("broken")
     def test_is_sequential_partial_connection(self):
         connection_2 = layers.Input(10) > layers.Sigmoid(5)
         connection_31 = connection_2 > layers.Sigmoid(1)
@@ -262,3 +256,56 @@ class A(object):
                 output = 'attribute'
 
             does_layer_expect_one_input(A)
+
+
+class TestParallelConnectionsTestCase(BaseTestCase):
+    def test_parallel_layer(self):
+        input_layer = layers.Input((3, 8, 8))
+        parallel_layer = layers.join(
+            [[
+                layers.Convolution((11, 5, 5)),
+            ], [
+                layers.Convolution((10, 3, 3)),
+                layers.Convolution((5, 3, 3)),
+            ]],
+            layers.Concatenate(),
+        )
+        output_layer = layers.MaxPooling((2, 2))
+
+        conn = layers.join(input_layer, parallel_layer)
+        output_connection = layers.join(conn, output_layer)
+        output_connection.initialize()
+
+        x = T.tensor4()
+        y = theano.function([x], conn.output(x))
+
+        x_tensor4 = asfloat(np.random.random((10, 3, 8, 8)))
+        output = y(x_tensor4)
+        self.assertEqual(output.shape, (10, 11 + 5, 4, 4))
+
+        output_function = theano.function([x], output_connection.output(x))
+        final_output = output_function(x_tensor4)
+        self.assertEqual(final_output.shape, (10, 11 + 5, 2, 2))
+
+    def test_parallel_with_joined_connections(self):
+        # Should work without errors
+        layers.join(
+            [
+                layers.Convolution((11, 5, 5)) > layers.Relu(),
+                layers.Convolution((10, 3, 3)) > layers.Relu(),
+            ],
+            layers.Concatenate() > layers.Relu(),
+        )
+
+    def test_parallel_layer_with_residual_connections(self):
+        connection = layers.join(
+            layers.Input((3, 8, 8)),
+            [[
+                layers.Convolution((7, 1, 1)),
+                layers.Relu()
+            ], [
+                # Residual connection
+            ]],
+            layers.Concatenate(),
+        )
+        self.assertEqual(connection.output_shape, (10, 8, 8))
diff --git a/tests/layers/test_convolution_layers.py b/tests/layers/test_convolution_layers.py
index 26677e10..e2eabea1 100644
--- a/tests/layers/test_convolution_layers.py
+++ b/tests/layers/test_convolution_layers.py
@@ -6,7 +6,7 @@
 from neupy import layers
 from neupy.utils import asfloat, as_tuple
 from neupy.layers.convolutions import conv_output_shape
-from neupy.layers.connections import LayerConnectionError
+from neupy.exceptions import LayerConnectionError
 
 from base import BaseTestCase
 
diff --git a/tests/layers/test_graph.py b/tests/layers/test_graph.py
index 0f1a032a..01d53b99 100644
--- a/tests/layers/test_graph.py
+++ b/tests/layers/test_graph.py
@@ -3,8 +3,9 @@
 
 from neupy import layers
 from neupy.utils import asfloat
-from neupy.layers.connections import (LayerGraph, LayerConnectionError,
-                                      is_cyclic, topological_sort)
+from neupy.exceptions import LayerConnectionError
+from neupy.layers.connections.graph import LayerGraph, is_cyclic
+from neupy.layers.connections.base import topological_sort
 
 from base import BaseTestCase
 
diff --git a/tests/layers/test_init_methods.py b/tests/layers/test_init_methods.py
index 4745bf53..5cea1bd2 100644
--- a/tests/layers/test_init_methods.py
+++ b/tests/layers/test_init_methods.py
@@ -3,7 +3,7 @@
 from scipy import stats
 import numpy as np
 
-from neupy import init
+from neupy import init, environment
 
 from base import BaseTestCase
 
@@ -68,6 +68,23 @@ def test_uniform_initializer_repr(self):
         self.assertEqual("Uniform(0, 1)", str(uniform_initializer))
 
 
+class InitializerWithGainTestCase(BaseInitializerTestCase):
+    def test_gain_relu(self):
+        he_initializer = init.HeNormal(gain='relu')
+        self.assertEqual(he_initializer.gain, math.sqrt(2))
+
+    def test_gain_relu_he_normal_scale(self):
+        environment.reproducible()
+        he_initializer = init.HeNormal(gain=1)
+        sample_1 = he_initializer.sample((3, 2))
+
+        environment.reproducible()
+        he_initializer = init.HeNormal(gain='relu')
+        sample_2 = he_initializer.sample((3, 2))
+
+        self.assertAlmostEqual(np.mean(sample_2 / sample_1), math.sqrt(2))
+
+
 class HeInitializeTestCase(BaseInitializerTestCase):
     def test_he_normal(self):
         he_normal = init.HeNormal()
@@ -110,9 +127,10 @@ def test_xavier_normal(self):
 
     def test_xavier_uniform(self):
         n_inputs, n_outputs = 10, 30
-        n_inputs, n_outputs = 30, 30
+
         xavier_uniform = init.XavierUniform()
         weight = xavier_uniform.sample((n_inputs, n_outputs))
+
         bound = math.sqrt(6. / (n_inputs + n_outputs))
 
         self.assertUniformlyDistributed(weight)
diff --git a/tests/layers/test_layer_utils.py b/tests/layers/test_layer_utils.py
index ccef722a..7cbf8a7e 100644
--- a/tests/layers/test_layer_utils.py
+++ b/tests/layers/test_layer_utils.py
@@ -10,20 +10,20 @@ def test_count_parameters(self):
             layers.Sigmoid(5),
             layers.Sigmoid(2),
         )
-        connection.initialize()
 
         n_parameters = layers.count_parameters(connection)
         self.assertEqual(n_parameters, (10 * 5 + 5) + (5 * 2 + 2))
 
     def test_count_parameters_single_layer(self):
         hidden_layer = layers.Sigmoid(5)
-
-        connection = layers.join(
+        layers.join(
             layers.Input(10),
             hidden_layer,
             layers.Sigmoid(2),
         )
-        connection.initialize()
 
         n_parameters = layers.count_parameters(hidden_layer)
         self.assertEqual(n_parameters, 10 * 5 + 5)
+
+    def test_join_empty_connection(self):
+        self.assertEqual(layers.join(), None)
diff --git a/tests/layers/test_merge_layers.py b/tests/layers/test_merge_layers.py
index 5aae5d0b..af2e9b75 100644
--- a/tests/layers/test_merge_layers.py
+++ b/tests/layers/test_merge_layers.py
@@ -4,7 +4,7 @@
 
 from neupy import layers, init
 from neupy.utils import asfloat
-from neupy.layers.connections import LayerConnectionError
+from neupy.exceptions import LayerConnectionError
 
 from base import BaseTestCase
 
diff --git a/tests/layers/test_normalization_layers.py b/tests/layers/test_normalization_layers.py
index bf1558be..9f6c5084 100644
--- a/tests/layers/test_normalization_layers.py
+++ b/tests/layers/test_normalization_layers.py
@@ -5,8 +5,8 @@
 
 from neupy import layers
 from neupy.utils import asfloat
+from neupy.exceptions import LayerConnectionError
 from neupy.layers.normalization import find_opposite_axes
-from neupy.layers.connections import LayerConnectionError
 
 from base import BaseTestCase
 
@@ -24,8 +24,7 @@ def test_batch_norm_as_shared_variable(self):
         beta = theano.shared(value=asfloat(2 * np.ones(2)))
 
         batch_norm = layers.BatchNorm(gamma=gamma, beta=beta)
-        connection = layers.Input(10) > batch_norm
-        connection.initialize()
+        layers.Input(10) > batch_norm
 
         self.assertIs(gamma, batch_norm.gamma)
         self.assertIs(beta, batch_norm.beta)
@@ -47,7 +46,6 @@ def test_find_pposite_axis_valid_cases(self):
 
     def test_simple_batch_norm(self):
         connection = layers.Input(10) > layers.BatchNorm()
-        connection.initialize()
 
         input_value = theano.shared(value=np.random.random((30, 10)))
         output_value = connection.output(input_value).eval()
@@ -63,7 +61,6 @@ def test_batch_norm_gamma_beta_params(self):
             layers.Input(10),
             layers.BatchNorm(gamma=default_gamma, beta=default_beta)
         )
-        connection.initialize()
 
         input_value = theano.shared(value=np.random.random((30, 10)))
         output_value = connection.output(input_value).eval()
@@ -78,7 +75,6 @@ def test_batch_norm_between_layers(self):
             layers.BatchNorm(),
             layers.Relu(1),
         )
-        connection.initialize()
 
         input_value = np.random.random((30, 10))
         outpu_value = connection.output(input_value).eval()
@@ -88,8 +84,7 @@ def test_batch_norm_between_layers(self):
     def test_batch_norm_exceptions(self):
         with self.assertRaises(ValueError):
             # Axis does not exist
-            connection = layers.Input(10) > layers.BatchNorm(axes=2)
-            connection.initialize()
+            layers.Input(10) > layers.BatchNorm(axes=2)
 
         with self.assertRaises(ValueError):
             connection = layers.Relu() > layers.BatchNorm()
@@ -97,12 +92,12 @@ def test_batch_norm_exceptions(self):
 
     def test_batch_norm_in_non_training_state(self):
         batch_norm = layers.BatchNorm()
-        connection = layers.Input(10) > batch_norm
-        connection.initialize()
+        layers.Input(10) > batch_norm
 
         input_value = theano.shared(value=np.random.random((30, 10)))
 
         self.assertEqual(len(batch_norm.updates), 0)
+
         batch_norm.output(input_value)
         self.assertEqual(len(batch_norm.updates), 2)
 
diff --git a/tests/layers/test_parallel_layers.py b/tests/layers/test_parallel_layers.py
deleted file mode 100644
index f6f437fd..00000000
--- a/tests/layers/test_parallel_layers.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import numpy as np
-import theano
-import theano.tensor as T
-
-from neupy import layers
-from neupy.utils import asfloat
-
-from base import BaseTestCase
-
-
-class ParallelLayerTestCase(BaseTestCase):
-    def test_parallel_layer(self):
-        input_layer = layers.Input((3, 8, 8))
-        parallel_layer = layers.Parallel(
-            [[
-                layers.Convolution((11, 5, 5)),
-            ], [
-                layers.Convolution((10, 3, 3)),
-                layers.Convolution((5, 3, 3)),
-            ]],
-            layers.Concatenate(),
-        )
-        output_layer = layers.MaxPooling((2, 2))
-
-        conn = layers.join(input_layer, parallel_layer)
-        output_connection = layers.join(conn, output_layer)
-        output_connection.initialize()
-
-        x = T.tensor4()
-        y = theano.function([x], conn.output(x))
-
-        x_tensor4 = asfloat(np.random.random((10, 3, 8, 8)))
-        output = y(x_tensor4)
-        self.assertEqual(output.shape, (10, 11 + 5, 4, 4))
-
-        output_function = theano.function([x], output_connection.output(x))
-        final_output = output_function(x_tensor4)
-        self.assertEqual(final_output.shape, (10, 11 + 5, 2, 2))
-
-    def test_parallel_with_joined_connections(self):
-        # Should work without errors
-        layers.Parallel(
-            [
-                layers.Convolution((11, 5, 5)) > layers.Relu(),
-                layers.Convolution((10, 3, 3)) > layers.Relu(),
-            ],
-            layers.Concatenate() > layers.Relu(),
-        )
-
-    def test_parallel_layer_exceptions(self):
-        with self.assertRaises(ValueError):
-            layers.Parallel(layers.Convolution((11, 5, 5)),
-                            layers.Concatenate())
-
-        with self.assertRaises(ValueError):
-            layers.Parallel([[layers.Convolution((11, 5, 5))]],
-                            'not a layer object')
-
-    def test_parallel_layer_with_residual_connections(self):
-        connection = layers.Input((3, 8, 8)) > layers.Parallel(
-            [[
-                layers.Convolution((7, 1, 1)),
-                layers.Relu()
-            ], [
-                # Residual connection
-            ]],
-            layers.Concatenate(),
-        )
-        self.assertEqual(connection.output_shape, (10, 8, 8))
diff --git a/tests/layers/test_pooling_layers.py b/tests/layers/test_pooling_layers.py
index 54efa7cc..349c97a8 100644
--- a/tests/layers/test_pooling_layers.py
+++ b/tests/layers/test_pooling_layers.py
@@ -6,8 +6,8 @@
 
 from neupy import layers
 from neupy.utils import asfloat
+from neupy.exceptions import LayerConnectionError
 from neupy.layers.pooling import pooling_output_shape
-from neupy.layers.connections import LayerConnectionError
 
 from base import BaseTestCase
 
diff --git a/tests/preprocessing/__init__.py b/tests/preprocessing/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/preprocessing/images/cifar10.png b/tests/preprocessing/images/cifar10.png
deleted file mode 100644
index cdc1eab6..00000000
Binary files a/tests/preprocessing/images/cifar10.png and /dev/null differ
diff --git a/tests/preprocessing/plots/zca/test_py2_simple_zca.png b/tests/preprocessing/plots/zca/test_py2_simple_zca.png
deleted file mode 100644
index 8c89e911..00000000
Binary files a/tests/preprocessing/plots/zca/test_py2_simple_zca.png and /dev/null differ
diff --git a/tests/preprocessing/plots/zca/test_py3_simple_zca.png b/tests/preprocessing/plots/zca/test_py3_simple_zca.png
deleted file mode 100644
index 8c89e911..00000000
Binary files a/tests/preprocessing/plots/zca/test_py3_simple_zca.png and /dev/null differ
diff --git a/tests/preprocessing/test_zca.py b/tests/preprocessing/test_zca.py
deleted file mode 100644
index 6d67ce2a..00000000
--- a/tests/preprocessing/test_zca.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import os
-
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy.misc import imread
-
-from neupy import preprocessing
-from neupy.utils import NotTrainedException
-
-from base import BaseTestCase
-from utils import (image_comparison, format_image_name,
-                   skip_image_comparison_if_specified)
-
-
-CURRENT_DIR = os.path.abspath(os.path.dirname(__file__))
-PLOTS_DIR = os.path.join(CURRENT_DIR, "plots", "zca")
-IMGDIR = os.path.join(CURRENT_DIR, "images")
-
-
-class ZCATestCase(BaseTestCase):
-    def test_zca_exceptions(self):
-        with self.assertRaises(NotTrainedException):
-            data = np.random.random((3, 2))
-            zca = preprocessing.ZCA()
-            zca.transform(data)
-
-    @skip_image_comparison_if_specified
-    def test_simple_zca(self):
-        plt.style.use('ggplot')
-
-        original_image_name = format_image_name("simple_zca.png")
-        original_image = os.path.join(PLOTS_DIR, original_image_name)
-        image = os.path.join(IMGDIR, "cifar10.png")
-
-        data = imread(image)
-        data = data[:, :, 0]
-
-        comparison_kwargs = dict(figsize=(10, 6), tol=0.05)
-
-        with image_comparison(original_image, **comparison_kwargs) as fig:
-            ax = fig.add_subplot(1, 1, 1)
-
-            zca = preprocessing.ZCA(0.001)
-            zca.train(data)
-            data_transformed = zca.transform(data)
-
-            ax.imshow(data_transformed, cmap=plt.cm.binary)
-
-        with image_comparison(original_image, **comparison_kwargs) as fig:
-            ax = fig.add_subplot(1, 1, 1)
-
-            zca = preprocessing.ZCA(0.001)
-            data_transformed = zca.fit(data).transform(data)
-
-            ax.imshow(data_transformed, cmap=plt.cm.binary)
diff --git a/tests/surgery/__init__.py b/tests/surgery/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/surgery/test_surgery.py b/tests/surgery/test_surgery.py
deleted file mode 100644
index 30c00199..00000000
--- a/tests/surgery/test_surgery.py
+++ /dev/null
@@ -1,276 +0,0 @@
-import collections
-
-import numpy as np
-
-from neupy import surgery, algorithms, layers
-from neupy.utils import as_tuple
-
-from base import BaseTestCase
-
-
-class ConnectionIsolationTestCase(BaseTestCase):
-    def test_layer_isolation(self):
-        surgery.isolate_connection_if_needed(layers.Sigmoid(10))
-
-    def test_connection_isolation(self):
-        connection = layers.Input(5) > layers.Sigmoid(10)
-        surgery.isolate_connection_if_needed(connection)
-
-    def test_isolate_invalid_data_type(self):
-        with self.assertRaises(TypeError):
-            surgery.isolate_connection_if_needed('invalid object')
-
-
-class SurgeryCutTestCase(BaseTestCase):
-    def setUp(self):
-        super(SurgeryCutTestCase, self).setUp()
-        self.network = algorithms.GradientDescent([
-            layers.Input(30),
-            layers.Sigmoid(10),
-            layers.Sigmoid(20),
-            layers.Sigmoid(1),
-        ])
-
-    def test_cutting_exceptions(self):
-        with self.assertRaises(ValueError):
-            surgery.cut(algorithms.PNN(), 0, 1)
-
-        with self.assertRaises(ValueError):
-            surgery.cut(self.network, 0, 10)
-
-        with self.assertRaises(ValueError):
-            surgery.cut(self.network, 0, 0)
-
-    def test_cut_layers_basics(self):
-        testcases = [
-            dict(kwargs=dict(connection=self.network, start=0, end=2),
-                 expected_sizes=(30, 10)),
-            dict(kwargs=dict(connection=self.network, start=1, end=3),
-                 expected_sizes=(10, 20)),
-            dict(kwargs=dict(connection=self.network, start=1, end=-1),
-                 expected_sizes=(10, 20)),
-        ]
-
-        for testcase in testcases:
-            layers = surgery.cut(**testcase['kwargs'])
-            output_shapes = [layer.output_shape for layer in iter(layers)]
-            self.assertEqual(
-                as_tuple(*output_shapes),
-                testcase['expected_sizes']
-            )
-
-    def test_cut_one_layer(self):
-        input_layer = surgery.cut(self.network, start=0, end=1)
-        self.assertIsInstance(input_layer, layers.Input)
-        self.assertEqual(input_layer.output_shape, (30,))
-
-    def test_cut_layer_copy(self):
-        # Check connection instead of networks as a different
-        # acceptible object type.
-        connection = self.network.connection
-        layer = surgery.cut(connection, start=1, end=2)
-
-        self.assertIsNot(self.network.layers[1], layer)
-
-        x = np.random.random((10, 30))
-        y = np.random.random((10, 1))
-        self.network.train(x, y, epochs=20)
-
-        trained_layer = self.network.layers[1]
-        trained_weight = trained_layer.weight.get_value()
-        copied_weight = layer.weight.get_value()
-
-        self.assertTrue(np.any(trained_weight != copied_weight))
-
-
-class SurgerySewTogetherTestCase(BaseTestCase):
-    def test_sew_together_cutted_pieces(self):
-        network1 = algorithms.GradientDescent([
-            layers.Input(100),
-            layers.Sigmoid(200),
-            layers.Sigmoid(100),
-        ])
-        network2 = algorithms.GradientDescent([
-            layers.Input(10),
-            layers.Sigmoid(20),
-            layers.Sigmoid(10),
-        ])
-
-        first_part = surgery.cut(network1, start=0, end=2)
-        self.assertEqual(first_part.output_shape, (200,))
-        self.assertEqual(first_part.input_shape, (100,))
-
-        second_part = surgery.cut(network2, start=0, end=2)
-        self.assertEqual(second_part.output_shape, (20,))
-        self.assertEqual(second_part.input_shape, (10,))
-
-    def test_sew_together_basic(self):
-        connection = surgery.sew_together([
-            layers.Sigmoid(24),
-            layers.Sigmoid(12) > layers.Sigmoid(6),
-            layers.Sigmoid(3),
-        ])
-        expected_shapes = (24, 12, 6, 3)
-        output_shapes = [layer.output_shape for layer in iter(connection)]
-
-        self.assertEqual(as_tuple(*output_shapes), expected_shapes)
-
-    def test_sew_together_when_cutted_piece_already_in_use(self):
-        autoencoder = algorithms.Momentum([
-            layers.Input(25),
-            layers.Sigmoid(15),
-            layers.Sigmoid(25),
-        ])
-
-        encoder = surgery.cut(autoencoder, start=0, end=2)
-        self.assertEqual(len(encoder), 2)
-
-        network = algorithms.GradientDescent([
-            layers.Input(5),
-
-            surgery.CutLine(),  # <- first cut point
-
-            layers.Sigmoid(10),
-            layers.Sigmoid(20),
-            layers.Sigmoid(30),
-
-            surgery.CutLine(),  # <- second cut point
-
-            layers.Sigmoid(1),
-        ])
-        _, hidden_layers, _ = surgery.cut_along_lines(network)
-        self.assertEqual(len(hidden_layers), 3)
-
-        connected_layers = surgery.sew_together([
-            encoder,
-            layers.Relu(5),
-            hidden_layers
-        ])
-        self.assertEqual(len(connected_layers), 6)
-
-    def test_sew_together_empty_list(self):
-        self.assertIs(surgery.sew_together([]), None)
-
-
-class SurgeryCutAlongLinesTestCase(BaseTestCase):
-    def test_cut_along_lines_basic(self):
-        network = algorithms.GradientDescent([
-            layers.Input(5),
-
-            surgery.CutLine(),
-
-            layers.Sigmoid(10),
-            layers.Sigmoid(20),
-            layers.Sigmoid(30),
-
-            surgery.CutLine(),
-
-            layers.Sigmoid(1),
-        ])
-
-        for connection in (network, network.connection):
-            _, interested_layers, _ = surgery.cut_along_lines(connection)
-            cutted_shapes = [layer.output_shape for layer in interested_layers]
-
-            self.assertEqual(as_tuple(*cutted_shapes), (10, 20, 30))
-
-    def test_cut_along_lines_check_cut_points(self):
-        testcases = (
-            dict(
-                network=algorithms.GradientDescent([
-                    layers.Input(5),
-                    layers.Sigmoid(10),
-                    layers.Sigmoid(20),
-                    layers.Sigmoid(30),
-
-                    surgery.CutLine(),
-
-                    layers.Sigmoid(1),
-                ]),
-                expected_shapes=[(5, 10, 20, 30), (1,)]
-            ),
-            dict(
-                network=algorithms.GradientDescent([
-                    layers.Input(5),
-                    layers.Sigmoid(10),
-                    layers.Sigmoid(20),
-                    layers.Sigmoid(30),
-
-                    surgery.CutLine(),
-                    surgery.CutLine(),
-
-                    layers.Sigmoid(1),
-                ]),
-                expected_shapes=[(5, 10, 20, 30), (1,)]
-            ),
-            dict(
-                network=algorithms.GradientDescent([
-                    layers.Input(5),
-
-                    surgery.CutLine(),
-                    layers.Sigmoid(10),
-
-                    surgery.CutLine(),
-                    layers.Sigmoid(20),
-
-                    surgery.CutLine(),
-                    layers.Sigmoid(30),
-
-                    surgery.CutLine(),
-
-                    layers.Sigmoid(1),
-                    surgery.CutLine(),
-                ]),
-                expected_shapes=[(5,), (10,), (20,), (30,), (1,)]
-            ),
-            dict(
-                network=surgery.sew_together([
-                    layers.Input(5),
-                    layers.Sigmoid(10),
-                    layers.Sigmoid(20),
-                    layers.Sigmoid(30),
-                    layers.Sigmoid(1),
-                ]),
-                expected_shapes=[(5, 10, 20, 30, 1)]
-            ),
-        )
-
-        for test_id, testcase in enumerate(testcases):
-            connections = surgery.cut_along_lines(testcase['network'])
-
-            actual_shapes = []
-            for connection in connections:
-                if isinstance(connection, collections.Iterable):
-                    shapes = [layer.output_shape for layer in connection]
-                else:
-                    layer = connection
-                    shapes = as_tuple(layer.output_shape)
-
-                actual_shapes.append(as_tuple(*shapes))
-
-            self.assertEqual(
-                actual_shapes,
-                testcase['expected_shapes'],
-                msg="Test ID: {}".format(test_id)
-            )
-
-    def test_cut_expcetion_non_feedforward(self):
-        input_layer = layers.Input(10)
-        layers.join(input_layer, layers.Sigmoid(1))
-        connection = layers.join(input_layer, layers.Sigmoid(2))
-
-        with self.assertRaisesRegexp(ValueError, r"non-sequential"):
-            # Relations betweeen layers is not sequential
-            surgery.cut(connection, start=0, end=1)
-
-    def test_cut_expcetion_invalid_end_parameter(self):
-        connection = layers.Input(10) > layers.Sigmoid(1)
-        with self.assertRaises(ValueError):
-            # Cannot cut till the 10th layer, bacuase connection has
-            # only two layers
-            surgery.cut(connection, start=0, end=10)
-
-    def test_cut_expection_slice_cutted_nothing(self):
-        connection = layers.Input(10) > layers.Sigmoid(1)
-        with self.assertRaises(ValueError):
-            surgery.cut(connection, start=0, end=0)
diff --git a/tests/utils.py b/tests/utils.py
index 44195112..41471140 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -66,7 +66,7 @@ def compare_networks(default_class, tested_class, data, **kwargs):
     # Compute result for default network (which must be slower)
     network = default_class(**kwargs)
 
-    default_connections = layers.join(copy.deepcopy(network.layers))
+    default_connections = copy.deepcopy(network.connection)
 
     network.train(*data, epochs=epochs)