diff --git a/LICENSE b/LICENSE
index 60f2f8d1..bc676d53 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2015 Luke Chang
+Copyright (c) 2015-2018 Cosan Lab
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-
diff --git a/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle b/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle
index e6a196a8..a888f678 100644
Binary files a/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle and b/docs/auto_examples/01_DataOperations/plot_design_matrix_codeobj.pickle differ
diff --git a/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle b/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle
index 67e954f2..fb7298ad 100644
Binary files a/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle and b/docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle differ
diff --git a/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle b/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle
index 512e0899..3852af7b 100644
Binary files a/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle and b/docs/auto_examples/01_DataOperations/plot_neurovault_io_codeobj.pickle differ
diff --git a/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle
index 34963645..0a66e998 100644
Binary files a/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle differ
diff --git a/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle
index 3755e90a..47f2f22d 100644
Binary files a/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_multivariate_classification_codeobj.pickle differ
diff --git a/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle b/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle
index 06d9ab07..908737fc 100644
Binary files a/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle and b/docs/auto_examples/02_Analysis/plot_univariate_regression_codeobj.pickle differ
diff --git a/docs/auto_examples/auto_examples_jupyter.zip b/docs/auto_examples/auto_examples_jupyter.zip
index 6f25c7f7..dcb97be3 100644
Binary files a/docs/auto_examples/auto_examples_jupyter.zip and b/docs/auto_examples/auto_examples_jupyter.zip differ
diff --git a/docs/auto_examples/auto_examples_python.zip b/docs/auto_examples/auto_examples_python.zip
index d3cd6e7b..1870b201 100644
Binary files a/docs/auto_examples/auto_examples_python.zip and b/docs/auto_examples/auto_examples_python.zip differ
diff --git a/docs/auto_examples/index.rst b/docs/auto_examples/index.rst
index 3215f8f6..8b1af970 100644
--- a/docs/auto_examples/index.rst
+++ b/docs/auto_examples/index.rst
@@ -202,6 +202,26 @@ Neuroimaging Analysis Examples
    :hidden:
 
    /auto_examples/02_Analysis/plot_multivariate_prediction
+
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="When performing any type of group analysis, we assume that each voxel is reflecting the same co...">
+
+.. only:: html
+
+    .. figure:: /auto_examples/02_Analysis/images/thumb/sphx_glr_plot_hyperalignment_thumb.png
+
+        :ref:`sphx_glr_auto_examples_02_Analysis_plot_hyperalignment.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /auto_examples/02_Analysis/plot_hyperalignment
 .. raw:: html
 
     <div style='clear:both'></div>
diff --git a/docs/backreferences/nltools.data.Brain_Data.align.examples b/docs/backreferences/nltools.data.Brain_Data.align.examples
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/backreferences/nltools.stats.align.examples b/docs/backreferences/nltools.stats.align.examples
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/backreferences/nltools.stats.procrustes.examples b/docs/backreferences/nltools.stats.procrustes.examples
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/02_Analysis/plot_hyperalignment.py b/examples/02_Analysis/plot_hyperalignment.py
new file mode 100644
index 00000000..19f9a88a
--- /dev/null
+++ b/examples/02_Analysis/plot_hyperalignment.py
@@ -0,0 +1,207 @@
+"""
+Functional Alignment
+====================
+
+When performing any type of group analysis, we assume that each voxel is
+reflecting the same computations across all participants. This assumption is
+unlikely to be true. Several standard preprocessing steps assist in improving
+'anatomical alignment'. We spatially normalize to a common anatomical template
+and we also apply spatial smoothing to improve signal to noise ratios in a
+target voxel by averaging activity in surrounding voxels with a gaussian kernel.
+However, these techniques are limited when learning multivariate models, where
+voxel alignment across participants is critical to making accurate inference.
+There have been several developments in improving 'functional alignment'.
+Jim Haxby's group has pioneered `hyperalignment
+<https://academic.oup.com/cercor/article/26/6/2919/1754308>`_, which uses an
+iterative procrustes transform to scale, rotate, and reflect voxel time series
+so that they are in the same functional space across participants. They have
+found that this technique can dramatically improve between subject
+classification accuracy particularly in `ventral temporal cortex
+<http://haxbylab.dartmouth.edu/publications/HGC+11.pdf>`_. This technique is
+implemented in the `PyMVPA <http://www.pymvpa.org/>`_ toolbox. Another promising
+functional alignment technique known as the `Shared Response Model <http://papers.nips.cc/paper/5855-a-reduced-dimension-fmri-shared-response-model.pdf>`_
+was developed at Princeton to improve intersubject-connectivity analyses and is
+implemented in the `brainiak <https://github.com/brainiak/brainiak>`_ toolbox.
+They also have found that this technique can improve between subject analyses.
+This method has several additional interesting properties such as the ability
+to learn a lower dimensional common representational space and also a
+probabilistic implementation. In this tutorial we demonstrate how to perform
+functional alignment using both hyperalignment and the shared response model
+using nltools.
+
+"""
+
+#########################################################################
+# Simulate Data
+# -------------
+#
+# First, let's simulate some data to align. Here we will simulate 3 subjects
+# with 100 data points. Each subject has signal in 30% of the voxels in the
+# MPFC with noise.
+
+import numpy as np
+from nltools.mask import create_sphere
+from nltools.data import Brain_Data
+import matplotlib.pyplot as plt
+from nilearn.plotting import plot_glass_brain
+
+n_observations = 500
+p = .3
+sigma = 1
+n_sub = 3
+
+y = np.zeros(n_observations)
+y[np.arange(75,150)] = 4
+y[np.arange(200,250)] = 10
+y[np.arange(300,475)] = 7
+
+def simulate_data(n_observations, y, p, sigma, mask):
+    ''' Simulate Brain Data
+
+        Args:
+            n_observations: (int) number of data points
+            y: (array) one dimensional array of signal
+            p: (float) probability of signal in voxels
+            sigma: (float) amount of gaussian noise to add
+
+        Returns:
+            data: (list) of Brain_Data objects
+    '''
+
+    dat = Brain_Data(mask).apply_mask(mask)
+    new_data = np.zeros((dat.shape()[0], n_observations))
+    for i in np.where(dat.data==1)[0]:
+        if np.random.randint(0,high=10) < p:
+            new_data[i,:] = y
+    noise = np.random.randn(new_data.shape[0],n_observations)*sigma
+    dat.data = (new_data+noise).T
+    return dat
+
+mask = create_sphere([0, 45, 0], radius=8)
+data = [simulate_data(n_observations, y, p, sigma, mask) for x in range(n_sub)]
+
+plt.figure(figsize=(10,3))
+plt.plot(y)
+plt.title('Simulated Signal', fontsize=20)
+plt.xlabel('Time', fontsize=18)
+plt.ylabel('Signal', fontsize=18)
+plot_glass_brain(data[0].mean().to_nifti())
+
+#########################################################################
+# Hyperalign Data
+# ---------------
+#
+# We will now align voxels with the same signal across participants. We will
+# start using hyperalignment with the procrustes transform. The align function
+# takes a list of Brain_Data objects (or numpy matrices) and aligns voxels based
+# on similar responses over time. The function outputs a dictionary with keys
+# for a list of the transformed data, corresponding transofmration matrices and
+# scaling terms.  In addition it returns the "common model" in which all
+# subjects are projected. The disparity values correspond to the multivariate
+# distance of the subject to the common space.
+
+from nltools.stats import align
+
+out = align(data, method='procrustes')
+
+print(out.keys())
+
+
+#########################################################################
+# Plot Transformed Data
+# ---------------------
+#
+# To make it more clear what it is happening we plot the voxel by time matrices
+# separately for each subject. It is clear that there is a consistent signal
+# across voxels, but that the signal is distributed across 'different' voxels.
+# The transformed data shows the voxels for each subject aligned to the common
+# space.  This now permits inferences across the voxels.  As an example, we
+# plot the matrices of the original compared to the aligned data across subjects.
+
+f,a = plt.subplots(nrows=2, ncols=3, figsize=(15,5), sharex=True, sharey=True)
+[a[0,i].imshow(x.data.T, aspect='auto') for i,x in enumerate(data)]
+[a[1,i].imshow(x.data.T, aspect='auto') for i,x in enumerate(out['transformed'])]
+a[0,0].set_ylabel('Original Voxels', fontsize=16)
+a[1,0].set_ylabel('Aligned Features', fontsize=16)
+[a[1,x].set_xlabel('Time', fontsize=16) for x in range(3)]
+[a[0,x].set_title('Subject %s' % str(x+1), fontsize=16) for x in range(3)]
+plt.tight_layout()
+
+f,a = plt.subplots(ncols=2, figsize=(15,5), sharex=True, sharey=True)
+a[0].imshow(np.mean(np.array([x.data.T for x in data]), axis=0), aspect='auto')
+a[1].imshow(np.mean(np.array([x.data.T for x in out['transformed']]), axis=0), aspect='auto')
+a[0].set_ylabel('Voxels', fontsize=16)
+[a[x].set_xlabel('Time', fontsize=16) for x in range(2)]
+a[0].set_title('Average Voxel x Time Matrix of Original Data', fontsize=16)
+a[1].set_title('Average Voxel x Time Matrix of Aligned Data', fontsize=16)
+
+
+#########################################################################
+# Transform aligned data back into original subject space
+# -------------------------------------------------------
+#
+# The transformation matrices can be used to project each subject's aligned
+# data into the original subject specific voxel space. The procrustes method
+# doesn't look identical as there are a few processing steps that occur within
+# the algorithm that would need to be accounted for to fully recover the original
+# data (e.g., centering, and scaling by norm).
+
+original_data = [np.dot(t.data, tm.T) for t,tm in zip(out['transformed'], out['transformation_matrix'])]
+
+f,a = plt.subplots(nrows=3, ncols=3, figsize=(15,10), sharex=True, sharey=True)
+[a[0, i].imshow(x.data.T, aspect='auto') for i, x in enumerate(data)]
+[a[1, i].imshow(x.data.T, aspect='auto') for i, x in enumerate(out['transformed'])]
+[a[2, i].imshow(x.T, aspect='auto') for i, x in enumerate(original_data)]
+[a[i, 0].set_ylabel(x,fontsize=16) for i, x in enumerate(['Original Voxels','Aligned Features', 'Backprojected Voxels'])]
+[a[2, x].set_xlabel('Time', fontsize=16) for x in range(3)]
+[a[0, x].set_title('Subject %s' % str(x+1), fontsize=16) for x in range(3)]
+plt.tight_layout()
+
+
+#########################################################################
+# Align new subject to common model
+# ---------------------------------
+#
+# We can also align a new subject to the common model without retraining the
+# entire model.  Here we individually align subject 3 to the common space
+# learned above.  We also backproject the transformed subject's data into the
+# original subject voxel space.
+
+d3 = data[2]
+d3_out = d3.align(out['common_model'], method='deterministic_srm')
+bp = np.dot(d3_out['transformed'].data, d3_out['transformation_matrix'].T)
+
+f,a = plt.subplots(ncols=3, figsize=(15,5), sharex=True, sharey=True)
+a[0].imshow(d3.data.T, aspect='auto')
+a[1].imshow(d3_out['transformed'].data.T, aspect='auto')
+a[2].imshow(bp.T, aspect='auto')
+[a[i].set_title(x,fontsize=18) for i, x in enumerate(['Original Data',' Transformed_Data', 'Backprojected Data'])]
+[a[x].set_xlabel('Time', fontsize=16) for x in range(2)]
+a[0].set_ylabel('Voxels', fontsize=16)
+plt.tight_layout()
+
+#########################################################################
+# Align subjects in lower dimensional common space
+# ---------------------------------
+#
+# The shared response model allows for the possibility of aligning in a lower
+# dimensional functional space. Here we provide an example of aligning to a 10
+# dimensional features space. Previous work has found that this can potentially
+# improve generalizability of multivariate models trained on an ROI compared to
+# using as many features as voxels. This feature is not yet implemented for
+# procrustes transformation as dimensionality reduction would need to happen
+# either before or after alignment.
+
+n_features = 10
+out = align(data, method='probabilistic_srm', n_features=n_features)
+
+original_data = [np.dot(t.data,tm.T) for t,tm in zip(out['transformed'],out['transformation_matrix'])]
+
+f,a = plt.subplots(nrows=3, ncols=3, figsize=(15,10), sharex=True, sharey=False)
+[a[0, i].imshow(x.data.T, aspect='auto') for i, x in enumerate(data)]
+[a[1, i].imshow(x.data.T, aspect='auto') for i, x in enumerate(out['transformed'])]
+[a[2, i].imshow(x.T, aspect='auto') for i, x in enumerate(original_data)]
+[a[i, 0].set_ylabel(x, fontsize=16) for i, x in enumerate(['Original Voxels','Aligned Features', 'Backprojected Voxels'])]
+[a[2, x].set_xlabel('Time', fontsize=16) for x in range(3)]
+[a[0, x].set_title('Subject %s' % str(x+1), fontsize=16) for x in range(3)]
+plt.tight_layout()
diff --git a/nltools/LICENSE_nipy b/nltools/LICENSE_nipy
deleted file mode 100644
index ff6fafa8..00000000
--- a/nltools/LICENSE_nipy
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2006-2017, NIPY Developers
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-       notice, this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above
-       copyright notice, this list of conditions and the following
-       disclaimer in the documentation and/or other materials provided
-       with the distribution.
-
-    * Neither the name of the NIPY Developers nor the names of any
-       contributors may be used to endorse or promote products derived
-       from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/nltools/__init__.py b/nltools/__init__.py
index 75cfa399..ec6b1148 100644
--- a/nltools/__init__.py
+++ b/nltools/__init__.py
@@ -11,6 +11,7 @@
 			'pbs_job',
 			'mask',
             'prefs',
+            'external',
 			'__version__']
 
 from .analysis import Roc
@@ -24,3 +25,5 @@
 from .simulator import Simulator
 from .prefs import MNI_Template, resolve_mni_path
 from .version import __version__
+from .mask import expand_mask, collapse_mask, create_sphere
+from .external import SRM, DetSRM
diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py
index d9df1df7..330ae1d4 100644
--- a/nltools/data/adjacency.py
+++ b/nltools/data/adjacency.py
@@ -27,10 +27,13 @@
                            concatenate,
                            _bootstrap_apply_func)
 from joblib import Parallel, delayed
+from sklearn.utils import check_random_state
 
 # Optional dependencies
 nx = attempt_to_import('networkx', 'nx')
 
+MAX_INT = np.iinfo(np.int32).max
+
 class Adjacency(object):
 
     '''
@@ -576,7 +579,7 @@ def plot_silhouette(self, labels, ax=None, permutation_test=True,
         return (f,outAll)
 
     def bootstrap(self, function, n_samples=5000, save_weights=False,
-                    n_jobs=-1, *args, **kwargs):
+                    n_jobs=-1, random_state=None, *args, **kwargs):
         '''Bootstrap an Adjacency method.
 
             Example Useage:
@@ -595,8 +598,11 @@ def bootstrap(self, function, n_samples=5000, save_weights=False,
 
         '''
 
+        random_state = check_random_state(random_state)
+        seeds = random_state.randint(MAX_INT, size=n_samples)
         bootstrapped = Parallel(n_jobs=n_jobs)(
                         delayed(_bootstrap_apply_func)(self,
-                        function, *args, **kwargs) for i in range(n_samples))
+                        function, random_state=seeds[i], *args, **kwargs)
+                        for i in range(n_samples))
         bootstrapped = Adjacency(bootstrapped)
         return summarize_bootstrap(bootstrapped, save_weights=save_weights)
diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py
index 29082be6..ce30596b 100644
--- a/nltools/data/brain_data.py
+++ b/nltools/data/brain_data.py
@@ -16,7 +16,7 @@
 
 import pickle # import cPickle
 from nilearn.signal import clean
-from scipy.stats import ttest_1samp, t, norm
+from scipy.stats import ttest_1samp, t, norm, spearmanr
 from scipy.signal import detrend
 from scipy.spatial.distance import squareform
 import os
@@ -30,7 +30,8 @@
 import tempfile
 from copy import deepcopy
 import six
-from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
+from sklearn.utils import check_random_state
 from pynv import Client
 from joblib import Parallel, delayed
 from nltools.mask import expand_mask, collapse_mask
@@ -43,7 +44,6 @@
 from nltools.utils import (get_resource_path,
                             set_algorithm,
                             get_anatomical,
-                            glover_hrf,
                             attempt_to_import,
                             concatenate,
                             _bootstrap_apply_func,
@@ -68,25 +68,19 @@
                            make_cosine_basis,
                            transform_pairwise,
                            summarize_bootstrap,
-                           regress)
+                           regress,
+                           procrustes)
 from nltools.pbs_job import PBS_Job
 from .adjacency import Adjacency
 from nltools.prefs import MNI_Template, resolve_mni_path
+from nltools.external.srm import DetSRM, SRM
 
 # Optional dependencies
+nx = attempt_to_import('networkx', 'nx')
 mne_stats = attempt_to_import('mne.stats',name='mne_stats', fromlist=
                                     ['spatio_temporal_cluster_1samp_test',
                                      'ttest_1samp_no_p'])
-try:
-    from mne.stats import (spatio_temporal_cluster_1samp_test,
-                           ttest_1samp_no_p)
-except ImportError:
-    pass
-
-try:
-    import networkx as nx
-except ImportError:
-    pass
+MAX_INT = np.iinfo(np.int32).max
 
 class Brain_Data(object):
 
@@ -570,19 +564,19 @@ def similarity(self, image, method='correlation'):
 
             Args:
                 image: Brain_Data or Nibabel instance of weight map
-
+                method: (str) Type of similarity
+                        ['correlation','dot_product','cosine']
             Returns:
                 pexp: Outputs a vector of pattern expression values
 
         """
 
         if not isinstance(image, Brain_Data):
-            if isinstance(image, nib.Nifti1Image):
-                image = Brain_Data(image, mask=self.mask)
-            else:
-                raise ValueError("Image is not a Brain_Data or nibabel "
-                                 "instance")
-        dim = image.shape()
+                if isinstance(image, nib.Nifti1Image):
+                    image = Brain_Data(image, mask=self.mask)
+                else:
+                    raise ValueError("Image is not a Brain_Data or nibabel "
+                                     "instance")
 
         # Check to make sure masks are the same for each dataset and if not
         # create a union mask
@@ -598,6 +592,21 @@ def similarity(self, image, method='correlation'):
             data2 = self.data
             image2 = image.data
 
+        def vector2array(data):
+            if len(data.shape) == 1:
+                return data.reshape(-1,1).T
+            else:
+                return data
+
+        def flatten_array(data):
+            if np.any(np.array(data.shape)==1):
+                data = data.flatten()
+                if len(data)==1 & data.shape[0]==1:
+                    data = data[0]
+                return data
+            else:
+                return data
+
         # Calculate pattern expression
         if method is 'dot_product':
             if len(image2.shape) > 1:
@@ -621,9 +630,19 @@ def similarity(self, image, method='correlation'):
                     pexp = pearson(image2, data2)
             else:
                 pexp = pearson(image2, data2)
+        elif method is 'cosine':
+            image2 = vector2array(image2)
+            data2 = vector2array(data2)
+            if image2.shape[1] > 1:
+                pexp = []
+                for i in range(image2.shape[0]):
+                    pexp.append(cosine_similarity(image2[i, :].reshape(-1,1).T, data2).flatten())
+                pexp = np.array(pexp)
+            else:
+                pexp = cosine_similarity(image2, data2).flatten()
         else:
-            raise ValueError("Method must be one of: correlation, dot_product")
-        return pexp
+            raise ValueError('Method must be one of: correlation, dot_product, cosine')
+        return flatten_array(pexp)
 
     def distance(self, method='euclidean', **kwargs):
         """ Calculate distance between images within a Brain_Data() instance.
@@ -1222,7 +1241,7 @@ def threshold(self, upper=None, lower=None, binarize=False):
         if isinstance(upper, six.string_types):
             if upper[-1] is '%':
                 upper = np.percentile(b.data, float(upper[:-1]))
-        if isinstance(upper, six.string_types):
+        if isinstance(lower, six.string_types):
             if lower[-1] is '%':
                 lower = np.percentile(b.data, float(lower[:-1]))
 
@@ -1289,7 +1308,7 @@ def transform_pairwise(self):
         return out
 
     def bootstrap(self, function, n_samples=5000, save_weights=False,
-                    n_jobs=-1, *args, **kwargs):
+                    n_jobs=-1, random_state=None, *args, **kwargs):
         '''Bootstrap a Brain_Data method.
 
             Example Useage:
@@ -1308,9 +1327,14 @@ def bootstrap(self, function, n_samples=5000, save_weights=False,
 
         '''
 
+        random_state = check_random_state(random_state)
+        seeds = random_state.randint(MAX_INT, size=n_samples)
+
+
         bootstrapped = Parallel(n_jobs=n_jobs)(
                         delayed(_bootstrap_apply_func)(self,
-                        function, *args, **kwargs) for i in range(n_samples))
+                        function, random_state=seeds[i], *args, **kwargs)
+                        for i in range(n_samples))
 
         if function is 'predict':
             bootstrapped = [x['weight_map'] for x in bootstrapped]
@@ -1349,6 +1373,89 @@ def decompose(self, algorithm='pca', axis='voxels', n_components=None,
             out['components'].data = out['decomposition_object'].components_
         return out
 
+    def align(self, target, method='procrustes', n_features=None, axis=0,
+              *args, **kwargs):
+        ''' Align Brain_Data instance to target object
+
+        Can be used to hyperalign source data to target data using
+        Hyperalignemnt from Dartmouth (i.e., procrustes transformation; see
+        nltools.stats.procrustes) or Shared Response Model from Princeton (see
+        nltools.external.srm). (see nltools.stats.align for aligning many data
+        objects together). Common Model is shared response model or centered
+        target data.Transformed data can be back projected to original data
+        using Tranformation matrix.
+
+        Examples:
+            Hyperalign using procrustes transform:
+                out = data.align(target, method='procrustes')
+
+            Align using shared response model:
+                out = data.align(target, method='probabilistic_srm', n_features=None)
+
+            Project aligned data into original data:
+                original_data = np.dot(out['transformed'].data,out['transformation_matrix'].T)
+
+        Args:
+            target: (Brain_Data) object to align to.
+            method: (str) alignment method to use
+                ['probabilistic_srm','deterministic_srm','procrustes']
+            n_features: (int) number of features to align to common space.
+                If None then will select number of voxels
+            axis: (int) axis to align on
+
+        Returns:
+            out: (dict) a dictionary containing transformed object,
+                transformation matrix, and the shared response matrix
+
+        '''
+
+        source = self.copy()
+        common = target.copy()
+
+        assert isinstance(target, Brain_Data), "Target must be Brain_Data instance."
+        assert method in ['probabilistic_srm', 'deterministic_srm','procrustes'], "Method must be ['probabilistic_srm','deterministic_srm','procrustes']"
+
+        data1 = source.data.T
+        data2 = target.data.T
+
+        if axis == 1:
+            data1 = data1.T
+            data2 = data2.T
+
+        out = dict()
+        if method in ['deterministic_srm', 'probabilistic_srm']:
+            if n_features is None:
+                n_features = data1.shape[0]
+            if method == 'deterministic_srm':
+                srm = DetSRM(features=n_features, *args, **kwargs)
+            elif method == 'probabilistic_srm':
+                srm = SRM(features=n_features, *args, **kwargs)
+            srm.fit([data1, data2])
+            source.data = srm.transform([data1, data2])[0].T
+            common.data = srm.s_.T
+            out['transformed'] = source
+            out['common_model'] = common
+            out['transformation_matrix'] = srm.w_[0]
+        elif method == 'procrustes':
+            if n_features != None:
+                raise NotImplementedError('Currently must use all voxels.'
+                                            'Eventually will add a PCA'
+                                            'reduction, must do this manually'
+                                            'for now.')
+
+            mtx1, mtx2, out['disparity'], t, out['scale'] = procrustes(data2.T,
+                                                                       data1.T)
+            source.data = mtx2
+            common.data = mtx1
+            out['transformed'] = source
+            out['common_model'] = common
+            out['transformation_matrix'] = t
+        if axis == 1:
+            out['transformed'].data = out['transformed'].data.T
+            out['common_model'].data = out['common_model'].data.T
+        return out
+
+
 class Groupby(object):
     def __init__(self, data, mask):
 
diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py
index 82612c6f..5e813ae0 100644
--- a/nltools/data/design_matrix.py
+++ b/nltools/data/design_matrix.py
@@ -18,7 +18,7 @@
 import matplotlib.pyplot as plt
 import warnings
 import six
-from nltools.utils import glover_hrf
+from ..external.hrf import glover_hrf
 from nltools.stats import (pearson,
                            fdr,
                            threshold,
diff --git a/nltools/external/__init__.py b/nltools/external/__init__.py
new file mode 100644
index 00000000..412dcd8b
--- /dev/null
+++ b/nltools/external/__init__.py
@@ -0,0 +1,10 @@
+"""
+External functions
+"""
+
+from .srm import DetSRM, SRM
+from .hrf import (spm_hrf,
+                    glover_hrf,
+                    spm_time_derivative,
+                    glover_time_derivative,
+                    spm_dispersion_derivative)
diff --git a/nltools/external/hrf.py b/nltools/external/hrf.py
new file mode 100644
index 00000000..175167a4
--- /dev/null
+++ b/nltools/external/hrf.py
@@ -0,0 +1,177 @@
+'''
+HRF Functions
+=============
+
+Various Hemodynamic Response Functions (HRFs) implemented by NiPy
+
+Copyright (c) 2006-2017, NIPY Developers
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+    * Neither the name of the NIPY Developers nor the names of any
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+__all__ = ['spm_hrf',
+            'glover_hrf',
+            'spm_time_derivative',
+            'glover_time_derivative',
+            'spm_dispersion_derivative']
+
+from os.path import dirname, join, sep as pathsep
+import nibabel as nib
+import importlib
+import os
+from sklearn.pipeline import Pipeline
+from scipy.stats import gamma
+import numpy as np
+import collections
+from types import GeneratorType
+
+def _gamma_difference_hrf(tr, oversampling=16, time_length=32., onset=0.,
+                        delay=6, undershoot=16., dispersion=1.,
+                        u_dispersion=1., ratio=0.167):
+    """ Compute an hrf as the difference of two gamma functions
+    Parameters
+    ----------
+    tr: float, scan repeat time, in seconds
+    oversampling: int, temporal oversampling factor, optional
+    time_length: float, hrf kernel length, in seconds
+    onset: float, onset of the hrf
+    Returns
+    -------
+    hrf: array of shape(length / tr * oversampling, float),
+         hrf sampling on the oversampled time grid
+    """
+    dt = tr / oversampling
+    time_stamps = np.linspace(0, time_length, float(time_length) / dt)
+    time_stamps -= onset / dt
+    hrf = gamma.pdf(time_stamps, delay / dispersion, dt / dispersion) - \
+        ratio * gamma.pdf(
+        time_stamps, undershoot / u_dispersion, dt / u_dispersion)
+    hrf /= hrf.sum()
+    return hrf
+
+
+def spm_hrf(tr, oversampling=16, time_length=32., onset=0.):
+    """ Implementation of the SPM hrf model.
+
+    Args:
+        tr: float, scan repeat time, in seconds
+        oversampling: int, temporal oversampling factor, optional
+        time_length: float, hrf kernel length, in seconds
+        onset: float, onset of the response
+
+    Returns:
+        hrf: array of shape(length / tr * oversampling, float),
+            hrf sampling on the oversampled time grid
+
+    """
+
+    return _gamma_difference_hrf(tr, oversampling, time_length, onset)
+
+
+def glover_hrf(tr, oversampling=16, time_length=32., onset=0.):
+    """ Implementation of the Glover hrf model.
+
+    Args:
+        tr: float, scan repeat time, in seconds
+        oversampling: int, temporal oversampling factor, optional
+        time_length: float, hrf kernel length, in seconds
+        onset: float, onset of the response
+
+    Returns:
+        hrf: array of shape(length / tr * oversampling, float),
+            hrf sampling on the oversampled time grid
+
+    """
+
+    return _gamma_difference_hrf(tr, oversampling, time_length, onset,
+                                delay=6, undershoot=12., dispersion=.9,
+                                u_dispersion=.9, ratio=.35)
+
+
+def spm_time_derivative(tr, oversampling=16, time_length=32., onset=0.):
+    """ Implementation of the SPM time derivative hrf (dhrf) model.
+
+    Args:
+        tr: float, scan repeat time, in seconds
+        oversampling: int, temporal oversampling factor, optional
+        time_length: float, hrf kernel length, in seconds
+        onset: float, onset of the response
+
+    Returns:
+        dhrf: array of shape(length / tr, float),
+              dhrf sampling on the provided grid
+
+    """
+
+    do = .1
+    dhrf = 1. / do * (spm_hrf(tr, oversampling, time_length, onset + do) -
+                      spm_hrf(tr, oversampling, time_length, onset))
+    return dhrf
+
+def glover_time_derivative(tr, oversampling=16, time_length=32., onset=0.):
+    """Implementation of the flover time derivative hrf (dhrf) model.
+
+    Args:
+        tr: float, scan repeat time, in seconds
+        oversampling: int, temporal oversampling factor, optional
+        time_length: float, hrf kernel length, in seconds
+        onset: float, onset of the response
+
+    Returns:
+        dhrf: array of shape(length / tr, float),
+              dhrf sampling on the provided grid
+
+    """
+
+    do = .1
+    dhrf = 1. / do * (glover_hrf(tr, oversampling, time_length, onset + do) -
+                      glover_hrf(tr, oversampling, time_length, onset))
+    return dhrf
+
+def spm_dispersion_derivative(tr, oversampling=16, time_length=32., onset=0.):
+    """Implementation of the SPM dispersion derivative hrf model.
+
+    Args:
+        tr: float, scan repeat time, in seconds
+        oversampling: int, temporal oversampling factor, optional
+        time_length: float, hrf kernel length, in seconds
+        onset: float, onset of the response
+
+    Returns:
+        dhrf: array of shape(length / tr * oversampling, float),
+              dhrf sampling on the oversampled time grid
+
+    """
+
+    dd = .01
+    dhrf = 1. / dd * (_gamma_difference_hrf(tr, oversampling, time_length,
+                                           onset, dispersion=1. + dd) -
+                      spm_hrf(tr, oversampling, time_length, onset))
+    return dhrf
diff --git a/nltools/external/srm.py b/nltools/external/srm.py
new file mode 100644
index 00000000..5195acc3
--- /dev/null
+++ b/nltools/external/srm.py
@@ -0,0 +1,562 @@
+#!/usr/bin/env python
+# coding: latin-1
+
+""" Shared Response Model (SRM)
+    ===========================
+
+    The implementations are based on the following publications:
+
+    Chen, P. H. C., Chen, J., Yeshurun, Y., Hasson, U., Haxby, J., & Ramadge,
+    P. J. (2015). A reduced-dimension fMRI shared response model. In Advances
+    in Neural Information Processing Systems (pp. 460-468).
+
+    Anderson, M. J., Capota, M., Turek, J. S., Zhu, X., Willke, T. L., Wang,
+    Y., & Norman, K. A. (2016, December). Enabling factor analysis on
+    thousand-subject neuroimaging datasets. In Big Data (Big Data),
+    2016 IEEE International Conference on (pp. 1151-1160). IEEE.
+
+    Copyright 2016 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+"""
+
+# Authors: Po-Hsuan Chen (Princeton Neuroscience Institute) and Javier Turek
+# (Intel Labs), 2015
+from __future__ import division
+
+import logging
+
+import numpy as np
+import scipy
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.utils import assert_all_finite
+from sklearn.utils.validation import NotFittedError
+
+__all__ = [
+    "SRM", "DetSRM"
+]
+
+logger = logging.getLogger(__name__)
+
+
+def _init_w_transforms(data, features):
+    """Initialize the mappings (Wi) for the SRM with random orthogonal matrices.
+    Parameters
+    ----------
+    data : list of 2D arrays, element i has shape=[voxels_i, samples]
+        Each element in the list contains the fMRI data of one subject.
+    features : int
+        The number of features in the model.
+    Returns
+    -------
+    w : list of array, element i has shape=[voxels_i, features]
+        The initialized orthogonal transforms (mappings) :math:`W_i` for each
+        subject.
+    voxels : list of int
+        A list with the number of voxels per subject.
+    Note
+    ----
+        This function assumes that the numpy random number generator was
+        initialized.
+        Not thread safe.
+    """
+    w = []
+    subjects = len(data)
+    voxels = np.empty(subjects, dtype=int)
+
+    # Set Wi to a random orthogonal voxels by features matrix
+    for subject in range(subjects):
+        voxels[subject] = data[subject].shape[0]
+        rnd_matrix = np.random.random((voxels[subject], features))
+        q, r = np.linalg.qr(rnd_matrix)
+        w.append(q)
+
+    return w, voxels
+
+class SRM(BaseEstimator, TransformerMixin):
+    """Probabilistic Shared Response Model (SRM)
+    Given multi-subject data, factorize it as a shared response S among all
+    subjects and an orthogonal transform W per subject:
+    .. math:: X_i \\approx W_i S, \\forall i=1 \\dots N
+    Parameters
+    ----------
+    n_iter : int, default: 10
+        Number of iterations to run the algorithm.
+    features : int, default: 50
+        Number of features to compute.
+    rand_seed : int, default: 0
+        Seed for initializing the random number generator.
+    Attributes
+    ----------
+    w_ : list of array, element i has shape=[voxels_i, features]
+        The orthogonal transforms (mappings) for each subject.
+    s_ : array, shape=[features, samples]
+        The shared response.
+    sigma_s_ : array, shape=[features, features]
+        The covariance of the shared response Normal distribution.
+    mu_ : list of array, element i has shape=[voxels_i]
+        The voxel means over the samples for each subject.
+    rho2_ : array, shape=[subjects]
+        The estimated noise variance :math:`\\rho_i^2` for each subject
+    Note
+    ----
+       The number of voxels may be different between subjects. However, the
+       number of samples must be the same across subjects.
+       The probabilistic Shared Response Model is approximated using the
+       Expectation Maximization (EM) algorithm proposed in [Chen2015]_. The
+       implementation follows the optimizations published in [Anderson2016]_.
+       This is a single node version.
+       The run-time complexity is :math:`O(I (V T K + V K^2 + K^3))` and the
+       memory complexity is :math:`O(V T)` with I - the number of iterations,
+       V - the sum of voxels from all subjects, T - the number of samples, and
+       K - the number of features (typically, :math:`V \\gg T \\gg K`).
+    """
+
+    def __init__(self, n_iter=10, features=50, rand_seed=0):
+        self.n_iter = n_iter
+        self.features = features
+        self.rand_seed = rand_seed
+        return
+
+    def fit(self, X, y=None):
+        """Compute the probabilistic Shared Response Model
+        Parameters
+        ----------
+        X :  list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        y : not used
+        """
+        logger.info('Starting Probabilistic SRM')
+
+        # Check the number of subjects
+        if len(X) <= 1:
+            raise ValueError("There are not enough subjects "
+                             "({0:d}) to train the model.".format(len(X)))
+
+        # Check for input data sizes
+        if X[0].shape[1] < self.features:
+            raise ValueError(
+                "There are not enough samples to train the model with "
+                "{0:d} features.".format(self.features))
+
+        # Check if all subjects have same number of TRs
+        number_trs = X[0].shape[1]
+        number_subjects = len(X)
+        for subject in range(number_subjects):
+            assert_all_finite(X[subject])
+            if X[subject].shape[1] != number_trs:
+                raise ValueError("Different number of samples between subjects"
+                                 ".")
+
+        # Run SRM
+        self.sigma_s_, self.w_, self.mu_, self.rho2_, self.s_ = self._srm(X)
+
+        return self
+
+    def transform(self, X, y=None):
+        """Use the model to transform matrix to Shared Response space
+        Parameters
+        ----------
+        X : list of 2D arrays, element i has shape=[voxels_i, samples_i]
+            Each element in the list contains the fMRI data of one subject
+            note that number of voxels and samples can vary across subjects
+        y : not used (as it is unsupervised learning)
+        Returns
+        -------
+        s : list of 2D arrays, element i has shape=[features_i, samples_i]
+            Shared responses from input data (X)
+        """
+
+        # Check if the model exist
+        if hasattr(self, 'w_') is False:
+            raise NotFittedError("The model fit has not been run yet.")
+
+        # Check the number of subjects
+        if len(X) != len(self.w_):
+            raise ValueError("The number of subjects does not match the one"
+                             " in the model.")
+
+        s = [None] * len(X)
+        for subject in range(len(X)):
+            s[subject] = self.w_[subject].T.dot(X[subject])
+
+        return s
+
+    def _init_structures(self, data, subjects):
+        """Initializes data structures for SRM and preprocess the data.
+        Parameters
+        ----------
+        data : list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        subjects : int
+            The total number of subjects in `data`.
+        Returns
+        -------
+        x : list of array, element i has shape=[voxels_i, samples]
+            Demeaned data for each subject.
+        mu : list of array, element i has shape=[voxels_i]
+            Voxel means over samples, per subject.
+        rho2 : array, shape=[subjects]
+            Noise variance :math:`\\rho^2` per subject.
+        trace_xtx : array, shape=[subjects]
+            The squared Frobenius norm of the demeaned data in `x`.
+        """
+        x = []
+        mu = []
+        rho2 = np.zeros(subjects)
+
+        trace_xtx = np.zeros(subjects)
+        for subject in range(subjects):
+            mu.append(np.mean(data[subject], 1))
+            rho2[subject] = 1
+            trace_xtx[subject] = np.sum(data[subject] ** 2)
+            x.append(data[subject] - mu[subject][:, np.newaxis])
+
+        return x, mu, rho2, trace_xtx
+
+    def _likelihood(self, chol_sigma_s_rhos, log_det_psi, chol_sigma_s,
+                    trace_xt_invsigma2_x, inv_sigma_s_rhos, wt_invpsi_x,
+                    samples):
+        """Calculate the log-likelihood function
+        Parameters
+        ----------
+        chol_sigma_s_rhos : array, shape=[features, features]
+            Cholesky factorization of the matrix (Sigma_S + sum_i(1/rho_i^2)
+            * I)
+        log_det_psi : float
+            Determinant of diagonal matrix Psi (containing the rho_i^2 value
+            voxels_i times).
+        chol_sigma_s : array, shape=[features, features]
+            Cholesky factorization of the matrix Sigma_S
+        trace_xt_invsigma2_x : float
+            Trace of :math:`\\sum_i (||X_i||_F^2/\\rho_i^2)`
+        inv_sigma_s_rhos : array, shape=[features, features]
+            Inverse of :math:`(\\Sigma_S + \\sum_i(1/\\rho_i^2) * I)`
+        wt_invpsi_x : array, shape=[features, samples]
+        samples : int
+            The total number of samples in the data.
+        Returns
+        -------
+        loglikehood : float
+            The log-likelihood value.
+        """
+        log_det = (np.log(np.diag(chol_sigma_s_rhos) ** 2).sum() + log_det_psi
+                   + np.log(np.diag(chol_sigma_s) ** 2).sum())
+        loglikehood = -0.5 * samples * log_det - 0.5 * trace_xt_invsigma2_x
+        loglikehood += 0.5 * np.trace(
+            wt_invpsi_x.T.dot(inv_sigma_s_rhos).dot(wt_invpsi_x))
+        # + const --> -0.5*nTR*nvoxel*subjects*math.log(2*math.pi)
+
+        return loglikehood
+
+    def _srm(self, data):
+        """Expectation-Maximization algorithm for fitting the probabilistic SRM.
+        Parameters
+        ----------
+        data : list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        Returns
+        -------
+        sigma_s : array, shape=[features, features]
+            The covariance :math:`\\Sigma_s` of the shared response Normal
+            distribution.
+        w : list of array, element i has shape=[voxels_i, features]
+            The orthogonal transforms (mappings) :math:`W_i` for each subject.
+        mu : list of array, element i has shape=[voxels_i]
+            The voxel means :math:`\\mu_i` over the samples for each subject.
+        rho2 : array, shape=[subjects]
+            The estimated noise variance :math:`\\rho_i^2` for each subject
+        s : array, shape=[features, samples]
+            The shared response.
+        """
+
+        samples = data[0].shape[1]
+        subjects = len(data)
+
+        np.random.seed(self.rand_seed)
+
+        # Initialization step: initialize the outputs with initial values,
+        # voxels with the number of voxels in each subject, and trace_xtx with
+        # the ||X_i||_F^2 of each subject.
+        w, voxels = _init_w_transforms(data, self.features)
+        x, mu, rho2, trace_xtx = self._init_structures(data, subjects)
+        shared_response = np.zeros((self.features, samples))
+        sigma_s = np.identity(self.features)
+
+        # Main loop of the algorithm (run
+        for iteration in range(self.n_iter):
+            logger.info('Iteration %d' % (iteration + 1))
+
+            # E-step:
+
+            # Sum the inverted the rho2 elements for computing W^T * Psi^-1 * W
+            rho0 = (1 / rho2).sum()
+
+            # Invert Sigma_s using Cholesky factorization
+            (chol_sigma_s, lower_sigma_s) = scipy.linalg.cho_factor(
+                sigma_s, check_finite=False)
+            inv_sigma_s = scipy.linalg.cho_solve(
+                (chol_sigma_s, lower_sigma_s), np.identity(self.features),
+                check_finite=False)
+
+            # Invert (Sigma_s + rho_0 * I) using Cholesky factorization
+            sigma_s_rhos = inv_sigma_s + np.identity(self.features) * rho0
+            (chol_sigma_s_rhos, lower_sigma_s_rhos) = scipy.linalg.cho_factor(
+                sigma_s_rhos, check_finite=False)
+            inv_sigma_s_rhos = scipy.linalg.cho_solve(
+                (chol_sigma_s_rhos, lower_sigma_s_rhos),
+                np.identity(self.features), check_finite=False)
+
+            # Compute the sum of W_i^T * rho_i^-2 * X_i, and the sum of traces
+            # of X_i^T * rho_i^-2 * X_i
+            wt_invpsi_x = np.zeros((self.features, samples))
+            trace_xt_invsigma2_x = 0.0
+            for subject in range(subjects):
+                wt_invpsi_x += (w[subject].T.dot(x[subject])) / rho2[subject]
+                trace_xt_invsigma2_x += trace_xtx[subject] / rho2[subject]
+
+            log_det_psi = np.sum(np.log(rho2) * voxels)
+
+            # Update the shared response
+            shared_response = sigma_s.dot(
+                np.identity(self.features) - rho0 * inv_sigma_s_rhos).dot(
+                    wt_invpsi_x)
+
+            # M-step
+
+            # Update Sigma_s and compute its trace
+            sigma_s = (inv_sigma_s_rhos
+                       + shared_response.dot(shared_response.T) / samples)
+            trace_sigma_s = samples * np.trace(sigma_s)
+
+            # Update each subject's mapping transform W_i and error variance
+            # rho_i^2
+            for subject in range(subjects):
+                a_subject = x[subject].dot(shared_response.T)
+                perturbation = np.zeros(a_subject.shape)
+                np.fill_diagonal(perturbation, 0.001)
+                u_subject, s_subject, v_subject = np.linalg.svd(
+                    a_subject + perturbation, full_matrices=False)
+                w[subject] = u_subject.dot(v_subject)
+                rho2[subject] = trace_xtx[subject]
+                rho2[subject] += -2 * np.sum(w[subject] * a_subject).sum()
+                rho2[subject] += trace_sigma_s
+                rho2[subject] /= samples * voxels[subject]
+
+            if logger.isEnabledFor(logging.INFO):
+                # Calculate and log the current log-likelihood for checking
+                # convergence
+                loglike = self._likelihood(
+                    chol_sigma_s_rhos, log_det_psi, chol_sigma_s,
+                    trace_xt_invsigma2_x, inv_sigma_s_rhos, wt_invpsi_x,
+                    samples)
+                logger.info('Objective function %f' % loglike)
+
+        return sigma_s, w, mu, rho2, shared_response
+
+
+class DetSRM(BaseEstimator, TransformerMixin):
+    """Deterministic Shared Response Model (DetSRM)
+    Given multi-subject data, factorize it as a shared response S among all
+    subjects and an orthogonal transform W per subject:
+    .. math:: X_i \\approx W_i S, \\forall i=1 \\dots N
+    Parameters
+    ----------
+    n_iter : int, default: 10
+        Number of iterations to run the algorithm.
+    features : int, default: 50
+        Number of features to compute.
+    rand_seed : int, default: 0
+        Seed for initializing the random number generator.
+    Attributes
+    ----------
+    w_ : list of array, element i has shape=[voxels_i, features]
+        The orthogonal transforms (mappings) for each subject.
+    s_ : array, shape=[features, samples]
+        The shared response.
+    Note
+    ----
+        The number of voxels may be different between subjects. However, the
+        number of samples must be the same across subjects.
+        The Deterministic Shared Response Model is approximated using the
+        Block Coordinate Descent (BCD) algorithm proposed in [Chen2015]_.
+        This is a single node version.
+        The run-time complexity is :math:`O(I (V T K + V K^2))` and the memory
+        complexity is :math:`O(V T)` with I - the number of iterations, V - the
+        sum of voxels from all subjects, T - the number of samples, K - the
+        number of features (typically, :math:`V \\gg T \\gg K`), and N - the
+        number of subjects.
+    """
+
+    def __init__(self, n_iter=10, features=50, rand_seed=0):
+        self.n_iter = n_iter
+        self.features = features
+        self.rand_seed = rand_seed
+        return
+
+    def fit(self, X, y=None):
+        """Compute the Deterministic Shared Response Model
+        Parameters
+        ----------
+        X : list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        y : not used
+        """
+        logger.info('Starting Deterministic SRM')
+
+        # Check the number of subjects
+        if len(X) <= 1:
+            raise ValueError("There are not enough subjects "
+                             "({0:d}) to train the model.".format(len(X)))
+
+        # Check for input data sizes
+        if X[0].shape[1] < self.features:
+            raise ValueError(
+                "There are not enough samples to train the model with "
+                "{0:d} features.".format(self.features))
+
+        # Check if all subjects have same number of TRs
+        number_trs = X[0].shape[1]
+        number_subjects = len(X)
+        for subject in range(number_subjects):
+            assert_all_finite(X[subject])
+            if X[subject].shape[1] != number_trs:
+                raise ValueError("Different number of samples between subjects"
+                                 ".")
+
+        # Run SRM
+        self.w_, self.s_ = self._srm(X)
+
+        return self
+
+    def transform(self, X, y=None):
+        """Use the model to transform data to the Shared Response subspace
+        Parameters
+        ----------
+        X : list of 2D arrays, element i has shape=[voxels_i, samples_i]
+            Each element in the list contains the fMRI data of one subject.
+        y : not used
+        Returns
+        -------
+        s : list of 2D arrays, element i has shape=[features_i, samples_i]
+            Shared responses from input data (X)
+        """
+
+        # Check if the model exist
+        if hasattr(self, 'w_') is False:
+            raise NotFittedError("The model fit has not been run yet.")
+
+        # Check the number of subjects
+        if len(X) != len(self.w_):
+            raise ValueError("The number of subjects does not match the one"
+                             " in the model.")
+
+        s = [None] * len(X)
+        for subject in range(len(X)):
+            s[subject] = self.w_[subject].T.dot(X[subject])
+
+        return s
+
+    def _objective_function(self, data, w, s):
+        """Calculate the objective function
+        Parameters
+        ----------
+        data : list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        w : list of 2D arrays, element i has shape=[voxels_i, features]
+            The orthogonal transforms (mappings) :math:`W_i` for each subject.
+        s : array, shape=[features, samples]
+            The shared response
+        Returns
+        -------
+        objective : float
+            The objective function value.
+        """
+        subjects = len(data)
+        objective = 0.0
+        for m in range(subjects):
+            objective += \
+                np.linalg.norm(data[m] - w[m].dot(s), 'fro')**2
+
+        return objective * 0.5 / data[0].shape[1]
+
+    def _compute_shared_response(self, data, w):
+        """ Compute the shared response S
+        Parameters
+        ----------
+        data : list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        w : list of 2D arrays, element i has shape=[voxels_i, features]
+            The orthogonal transforms (mappings) :math:`W_i` for each subject.
+        Returns
+        -------
+        s : array, shape=[features, samples]
+            The shared response for the subjects data with the mappings in w.
+        """
+        s = np.zeros((w[0].shape[1], data[0].shape[1]))
+        for m in range(len(w)):
+            s = s + w[m].T.dot(data[m])
+        s /= len(w)
+
+        return s
+
+    def _srm(self, data):
+        """Expectation-Maximization algorithm for fitting the probabilistic SRM.
+        Parameters
+        ----------
+        data : list of 2D arrays, element i has shape=[voxels_i, samples]
+            Each element in the list contains the fMRI data of one subject.
+        Returns
+        -------
+        w : list of array, element i has shape=[voxels_i, features]
+            The orthogonal transforms (mappings) :math:`W_i` for each subject.
+        s : array, shape=[features, samples]
+            The shared response.
+        """
+
+        subjects = len(data)
+
+        np.random.seed(self.rand_seed)
+
+        # Initialization step: initialize the outputs with initial values,
+        # voxels with the number of voxels in each subject.
+        w, _ = _init_w_transforms(data, self.features)
+        shared_response = self._compute_shared_response(data, w)
+        if logger.isEnabledFor(logging.INFO):
+            # Calculate the current objective function value
+            objective = self._objective_function(data, w, shared_response)
+            logger.info('Objective function %f' % objective)
+
+        # Main loop of the algorithm
+        for iteration in range(self.n_iter):
+            logger.info('Iteration %d' % (iteration + 1))
+
+            # Update each subject's mapping transform W_i:
+            for subject in range(subjects):
+                a_subject = data[subject].dot(shared_response.T)
+                perturbation = np.zeros(a_subject.shape)
+                np.fill_diagonal(perturbation, 0.001)
+                u_subject, _, v_subject = np.linalg.svd(
+                    a_subject + perturbation, full_matrices=False)
+                w[subject] = u_subject.dot(v_subject)
+
+            # Update the shared response:
+            shared_response = self._compute_shared_response(data, w)
+
+            if logger.isEnabledFor(logging.INFO):
+                # Calculate the current objective function value
+                objective = self._objective_function(data, w, shared_response)
+                logger.info('Objective function %f' % objective)
+
+        return w, shared_response
diff --git a/nltools/file_reader.py b/nltools/file_reader.py
index 34d14337..e131363b 100644
--- a/nltools/file_reader.py
+++ b/nltools/file_reader.py
@@ -3,6 +3,7 @@
 =============================
 
 '''
+
 __all__ = ['onsets_to_dm']
 __author__ = ["Eshin Jolly"]
 __license__ = "MIT"
@@ -17,8 +18,8 @@
 def onsets_to_dm(F, TR, runLength, header='infer', sort=False,
                 addIntercept=False, **kwargs):
     """Function to read in a 2 or 3 column onsets file, specified in seconds,
-        organized as: 'Stimulus,Onset','Onset,Stimulus','Stimulus,Onset,
-        Duration', or 'Onset,Duration,Stimulus'.
+        organized as: 'Stim,Onset','Onset,Stim','Stim,Onset,
+        Duration', or 'Onset,Duration,Stim'.
 
         Args:
             df (str or dataframe): path to file or pandas dataframe
diff --git a/nltools/stats.py b/nltools/stats.py
index 252d548a..2b705bed 100644
--- a/nltools/stats.py
+++ b/nltools/stats.py
@@ -26,7 +26,9 @@
             'make_cosine_basis',
             '_robust_estimator',
             'summarize_bootstrap',
-            'regress']
+            'regress',
+            'procrustes',
+            'align']
 
 import numpy as np
 import pandas as pd
@@ -38,7 +40,12 @@
 import itertools
 from joblib import Parallel, delayed
 import six
-from nltools.utils import attempt_to_import
+from .utils import attempt_to_import
+from .external.srm import SRM, DetSRM
+from scipy.linalg import orthogonal_procrustes
+from sklearn.utils import check_random_state
+
+MAX_INT = np.iinfo(np.int32).max
 
 # Optional dependencies
 ARMA = attempt_to_import('statsmodels.tsa.arima_model',name='ARMA',
@@ -387,15 +394,17 @@ def fisher_r_to_z(r):
 
     return .5*np.log((1+r)/(1-r))
 
-def _permute_sign(dat):
-    return np.mean(dat*np.random.choice([1, -1], len(dat)))
+def _permute_sign(data, random_state=None):
+    random_state = check_random_state(random_state)
+    return np.mean(data*random_state.choice([1, -1], len(data)))
 
-def _permute_group(data):
-    perm_label = np.random.permutation(data['Group'])
+def _permute_group(data, random_state=None):
+    random_state = check_random_state(random_state)
+    perm_label = random_state.permutation(data['Group'])
     return (np.mean(data.loc[perm_label==1, 'Values']) -
             np.mean(data.loc[perm_label==0, 'Values']))
 
-def one_sample_permutation(data, n_permute=5000, n_jobs=-1):
+def one_sample_permutation(data, n_permute=5000, n_jobs=-1, random_state=None):
     ''' One sample permutation test using randomization.
 
         Args:
@@ -409,19 +418,23 @@ def one_sample_permutation(data, n_permute=5000, n_jobs=-1):
 
     '''
 
+    random_state = check_random_state(random_state)
+    seeds = random_state.randint(MAX_INT, size=n_permute)
+
     data = np.array(data)
     stats = dict()
     stats['mean'] = np.mean(data)
 
-    all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_sign)(data)
-                     for i in range(n_permute))
+    all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_sign)(data,
+                     random_state=seeds[i]) for i in range(n_permute))
     if stats['mean'] >= 0:
         stats['p'] = np.mean(all_p >= stats['mean'])
     else:
         stats['p'] = np.mean(all_p <= stats['mean'])
     return stats
 
-def two_sample_permutation(data1, data2, n_permute=5000, n_jobs=-1):
+def two_sample_permutation(data1, data2, n_permute=5000,
+                           n_jobs=-1, random_state=None):
     ''' Independent sample permutation test.
 
         Args:
@@ -435,23 +448,26 @@ def two_sample_permutation(data1, data2, n_permute=5000, n_jobs=-1):
 
     '''
 
+    random_state = check_random_state(random_state)
+    seeds = random_state.randint(MAX_INT, size=n_permute)
+
     stats = dict()
     stats['mean'] = np.mean(data1)-np.mean(data2)
-    data = pd.DataFrame(data={'Values':data1,'Group':np.ones(len(data1))})
+    data = pd.DataFrame(data={'Values':data1, 'Group':np.ones(len(data1))})
     data = data.append(pd.DataFrame(data={
                                         'Values':data2,
                                         'Group':np.zeros(len(data2))}))
-    all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_group)(data)
-                     for i in range(n_permute))
+    all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_group)(data,
+                     random_state=seeds[i]) for i in range(n_permute))
 
     if stats['mean']>=0:
-        stats['p'] = np.mean(all_p>=stats['mean'])
+        stats['p'] = np.mean(all_p >= stats['mean'])
     else:
-        stats['p'] = np.mean(all_p<=stats['mean'])
+        stats['p'] = np.mean(all_p <= stats['mean'])
     return stats
 
 def correlation_permutation(data1, data2, n_permute=5000, metric='spearman',
-                            n_jobs=-1):
+                            n_jobs=-1, random_state=None):
     ''' Permute correlation.
 
         Args:
@@ -468,30 +484,32 @@ def correlation_permutation(data1, data2, n_permute=5000, metric='spearman',
 
     '''
 
+    random_state = check_random_state(random_state)
+
     stats = dict()
     data1 = np.array(data1)
     data2 = np.array(data2)
 
-    if metric is 'spearman':
+    if metric == 'spearman':
         stats['correlation'] = spearmanr(data1, data2)[0]
-    elif metric is 'pearson':
+    elif metric == 'pearson':
         stats['correlation'] = pearsonr(data1, data2)[0]
-    elif metric is 'kendall':
+    elif metric == 'kendall':
         stats['correlation'] = kendalltau(data1, data2)[0]
     else:
         raise ValueError('metric must be "spearman" or "pearson" or "kendall"')
 
     if metric is 'spearman':
         all_p = Parallel(n_jobs=n_jobs)(delayed(spearmanr)(
-                        np.random.permutation(data1), data2)
+                        random_state.permutation(data1), data2)
                         for i in range(n_permute))
     elif metric is 'pearson':
         all_p = Parallel(n_jobs=n_jobs)(delayed(pearsonr)(
-                        np.random.permutation(data1), data2)
+                        random_state.permutation(data1), data2)
                         for i in range(n_permute))
     elif metric is 'kendall':
         all_p = Parallel(n_jobs=n_jobs)(delayed(kendalltau)(
-                        np.random.permutation(data1), data2)
+                        random_state.permutation(data1), data2)
                         for i in range(n_permute))
     all_p = [x[0] for x in all_p]
 
@@ -501,14 +519,21 @@ def correlation_permutation(data1, data2, n_permute=5000, metric='spearman',
         stats['p'] = np.mean(all_p <= stats['correlation'])
     return stats
 
-def make_cosine_basis(nsamples,sampling_rate,filter_length,drop=0):
-    """ Create a series of cosines basic functions for discrete cosine transform. Based off of implementation in spm_filter and spm_dctmtx because scipy dct can only apply transforms but not return the basis functions. Like SPM, does not add constant (i.e. intercept), but does retain first basis (i.e. sigmoidal/linear drift)
+def make_cosine_basis(nsamples, sampling_rate, filter_length, drop=0):
+    """ Create a series of cosines basic functions for discrete cosine
+        transform. Based off of implementation in spm_filter and spm_dctmtx
+        because scipy dct can only apply transforms but not return the basis
+        functions. Like SPM, does not add constant (i.e. intercept), but does
+        retain first basis (i.e. sigmoidal/linear drift)
 
     Args:
         nsamples (int): number of observations (e.g. TRs)
         sampling_freq (float): sampling rate in seconds (e.g. TR length)
         filter_length (int): length of filter in seconds
-        drop (int): index of which early/slow bases to drop if any; default is to drop constant (i.e. intercept) like SPM. Unlike SPM, retains first basis (i.e. linear/sigmoidal). Will cumulatively drop bases upto and inclusive of index provided (e.g. 2, drops bases 0,1,2)
+        drop (int): index of which early/slow bases to drop if any; default is
+            to drop constant (i.e. intercept) like SPM. Unlike SPM, retains
+            first basis (i.e. linear/sigmoidal). Will cumulatively drop bases
+            up to and inclusive of index provided (e.g. 2, drops bases 0,1,2)
 
     Returns:
         out (ndarray): nsamples x number of basis sets numpy array
@@ -527,14 +552,15 @@ def make_cosine_basis(nsamples,sampling_rate,filter_length,drop=0):
     C[:,0] = np.ones((1,len(n)))/np.sqrt(nsamples)
 
     #Insert higher order cosine basis functions
-    for i in xrange(1,order):
+    for i in range(1,order):
         C[:,i] = np.sqrt(2./nsamples) * np.cos(np.pi*(2*n+1) * i/(2*nsamples))
 
     #Drop desired bases
-    drop +=1
-    C = C[:,drop:]
+    drop += 1
+    C = C[:, drop:]
     if C.size == 0:
-        raise ValueError('Basis function creation failed! nsamples is too small for requested filter_length.')
+        raise ValueError('Basis function creation failed! nsamples is too small'
+                         'for requested filter_length.')
     else:
         return C
 
@@ -644,7 +670,6 @@ def _robust_estimator(vals,X,robust_estimator='hc0',nlags=1):
     return np.sqrt(np.diag(vcv))
 
 
-
 def summarize_bootstrap(data, save_weights=False):
     """ Calculate summary of bootstrap samples
 
@@ -678,7 +703,7 @@ def regress(X,Y,mode='ols',**kwargs):
     This function can compute regression in 3 ways:
     1) Standard OLS
     2) OLS with robust sandwich estimators for standard errors. 3 robust types of estimators exist:
-        1) 'hc1' - classic huber-white estimator robust to heteroscedasticity (default)
+        1) 'hc0' - classic huber-white estimator robust to heteroscedasticity (default)
         2) 'hc3' - a variant on huber-white estimator slightly more conservative when sample sizes are small
         3) 'hac' - an estimator robust to both heteroscedasticity and auto-correlation; auto-correlation lag can be controlled with the 'nlags' keyword argument; default is 1
     3) ARMA (auto-regressive moving-average) model. This model is fit through statsmodels.tsa.arima_model.ARMA, so more information about options can be found there. Any settings can be passed in as kwargs. By default fits a (1,1) model with starting lags of 2. This mode is **computationally intensive** and can take quite a while if Y has many columns.  If Y is a 2d array joblib.Parallel is used for faster fitting by parallelizing fits across columns of Y. Parallelization can be controlled by passing in kwargs. Defaults to multi-threading using 10 separate threads, as threads don't require large arrays to be duplicated in memory. Defaults are also set to enable memory-mapping for very large arrays if backend='multiprocessing' to prevent crashes and hangs. Various levels of progress can be monitored using the 'disp' (statsmodels) and 'verbose' (joblib) keyword arguments with integer values > 0.
@@ -797,3 +822,231 @@ def _arma_func(idx=None,**kwargs):
             b,t,p,df,res = _arma_func()
 
         return b, t, p, df, res
+
+
+def align(data, method='deterministic_srm', n_features=None, axis=0,
+            *args, **kwargs):
+    ''' Align subject data into a common response model.
+
+        Can be used to hyperalign source data to target data using
+        Hyperalignemnt from Dartmouth (i.e., procrustes transformation; see
+        nltools.stats.procrustes) or Shared Response Model from Princeton (see
+        nltools.external.srm). (see nltools.data.Brain_Data.align for aligning
+        a single Brain object to another). Common Model is shared response
+        model or centered target data.Transformed data can be back projected to
+        original data using Tranformation matrix.
+
+        Examples:
+            Hyperalign using procrustes transform:
+                out = align(data, method='procrustes')
+
+            Align using shared response model:
+                out = align(data, method='probabilistic_srm', n_features=None)
+
+            Project aligned data into original data:
+                original_data = [np.dot(t.data,tm.T) for t,tm in zip(out['transformed'], out['transformation_matrix'])]
+
+        Args:
+            data: (list) A list of Brain_Data objects
+            method: (str) alignment method to use
+                ['probabilistic_srm','deterministic_srm','procrustes']
+            n_features: (int) number of features to align to common space.
+                If None then will select number of voxels
+            axis: (int) axis to align on
+
+        Returns:
+            out: (dict) a dictionary containing a list of transformed subject
+                matrices, a list of transformation matrices, and the shared
+                response matrix
+
+    '''
+
+    from nltools.data import Brain_Data
+
+    assert isinstance(data, list), 'Make sure you are inputting data is a list.'
+    assert all([type(x) for x in data]), 'Make sure all objects in the list are the same type.'
+    assert method in ['probabilistic_srm','deterministic_srm','procrustes'], "Method must be ['probabilistic_srm','deterministic_srm','procrustes']"
+
+    data = deepcopy(data)
+
+    if isinstance(data[0], Brain_Data):
+        data_type = 'Brain_Data'
+        data_out = [x.copy() for x in data]
+        data = [x.data.T for x in data]
+    elif isinstance(data[0], np.ndarray):
+        data_type = 'numpy'
+    else:
+        raise ValueError('Type %s is not implemented yet.' % type(data[0]))
+
+    # Align over time or voxels
+    if axis == 1:
+        data = [x.T for x in data]
+    elif axis != 0:
+        raise ValueError('axis must be 0 or 1.')
+
+    out = dict()
+    if method in ['deterministic_srm', 'probabilistic_srm']:
+        if n_features is None:
+            n_features = int(data[0].shape[0])
+        if method == 'deterministic_srm':
+            srm = DetSRM(features=n_features, *args, **kwargs)
+        elif method == 'probabilistic_srm':
+            srm = SRM(features=n_features, *args, **kwargs)
+        srm.fit(data)
+        out['transformed'] = [x for x in srm.transform(data)]
+        out['common_model'] = srm.s_
+        out['transformation_matrix'] = srm.w_
+
+    elif method == 'procrustes':
+        if n_features != None:
+            raise NotImplementedError('Currently must use all voxels.'
+                                      'Eventually will add a PCA reduction,'
+                                      'must do this manually for now.')
+
+        ##STEP 0: STANDARDIZE SIZE AND SHAPE##
+        sizes_0 = [x.shape[0] for x in data]
+        sizes_1 = [x.shape[1] for x in data]
+
+        #find the smallest number of rows
+        R = min(sizes_0)
+        C = max(sizes_1)
+
+        m = [np.empty((R, C), dtype=np.ndarray)] * len(data)
+
+        #Pad rows with different sizes with zeros
+        for i, x in enumerate(data):
+            y = x[0:R, :]
+            missing = C - y.shape[1]
+            add = np.zeros((y.shape[0], missing))
+            y = np.append(y, add, axis=1)
+            m[i] = y
+
+        ##STEP 1: CREATE INITIAL AVERAGE TEMPLATE##
+        for i, x in enumerate(m):
+            if i == 0:
+                # use first data as template
+                template = np.copy(x.T)
+            else:
+                _, trans, _, _, _ = procrustes(template/i, x.T)
+                template += trans
+        template /= len(m)
+
+        ##STEP 2: CREATE NEW COMMON TEMPLATE##
+        #align each subj to the template from STEP 1
+        #and create a new common template based on avg
+        common = np.zeros(template.shape)
+        for i, x in enumerate(m):
+            _, trans, _, _, _ = procrustes(template, x.T)
+            common += trans
+        common /= len(m)
+
+        #STEP 3 (below): ALIGN TO NEW TEMPLATE
+        aligned = []; transformation_matrix = []; disparity = []; scale = []
+        for i, x in enumerate(m):
+            _, transformed, d, t, s = procrustes(common, x.T)
+            aligned.append(transformed.T)
+            transformation_matrix.append(t)
+            disparity.append(d)
+            scale.append(s)
+        out['transformed'] = aligned
+        out['common_model'] = common.T
+        out['transformation_matrix'] = transformation_matrix
+        out['disparity'] = disparity
+        out['scale'] = scale
+
+    if axis == 1:
+        out['transformed'] = [x.T for x in out['transformed']]
+        out['common_model'] = out['common_model'].T
+
+    if data_type == 'Brain_Data':
+        for i, x in enumerate(out['transformed']):
+            data_out[i].data = x.T
+        out['transformed'] = data_out
+        common = data_out[0].copy()
+        common.data = out['common_model'].T
+        out['common_model'] = common
+    return out
+
+def procrustes(data1, data2):
+    '''Procrustes analysis, a similarity test for two data sets.
+
+    Each input matrix is a set of points or vectors (the rows of the matrix).
+    The dimension of the space is the number of columns of each matrix. Given
+    two identically sized matrices, procrustes standardizes both such that:
+    - :math:`tr(AA^{T}) = 1`.
+    - Both sets of points are centered around the origin.
+    Procrustes ([1]_, [2]_) then applies the optimal transform to the second
+    matrix (including scaling/dilation, rotations, and reflections) to minimize
+    :math:`M^{2}=\sum(data1-data2)^{2}`, or the sum of the squares of the
+    pointwise differences between the two input datasets.
+    This function was not designed to handle datasets with different numbers of
+    datapoints (rows).  If two data sets have different dimensionality
+    (different number of columns), this function will add columns of zeros to
+    the smaller of the two.
+
+    Args:
+        data1 : array_like
+            Matrix, n rows represent points in k (columns) space `data1` is the
+            reference data, after it is standardised, the data from `data2`
+            will be transformed to fit the pattern in `data1` (must have >1
+            unique points).
+        data2 : array_like
+            n rows of data in k space to be fit to `data1`.  Must be the  same
+            shape ``(numrows, numcols)`` as data1 (must have >1 unique points).
+
+    Returns:
+        mtx1 : array_like
+            A standardized version of `data1`.
+        mtx2 : array_like
+            The orientation of `data2` that best fits `data1`. Centered, but not
+            necessarily :math:`tr(AA^{T}) = 1`.
+        disparity : float
+            :math:`M^{2}` as defined above.
+        R : (N, N) ndarray
+            The matrix solution of the orthogonal Procrustes problem.
+            Minimizes the Frobenius norm of dot(data1, R) - data2, subject to
+            dot(R.T, R) == I.
+        scale : float
+            Sum of the singular values of ``dot(data1.T, data2)``.
+
+
+    '''
+
+    mtx1 = np.array(data1, dtype=np.double, copy=True)
+    mtx2 = np.array(data2, dtype=np.double, copy=True)
+
+    if mtx1.ndim != 2 or mtx2.ndim != 2:
+        raise ValueError("Input matrices must be two-dimensional")
+    if mtx1.shape[0] != mtx2.shape[0]:
+        raise ValueError("Input matrices must have same number of rows.")
+    if mtx1.size == 0:
+        raise ValueError("Input matrices must be >0 rows and >0 cols")
+    if mtx1.shape[1] != mtx2.shape[1]:
+        # Pad with zeros
+        if mtx1.shape[1] > mtx2.shape[1]:
+            mtx2 = np.append(mtx2, np.zeros((mtx1.shape[0], mtx1.shape[1] - mtx2.shape[1])), axis=1)
+        else:
+            mtx1 = np.append(mtx1, np.zeros((mtx1.shape[0], mtx2.shape[1] - mtx1.shape[1])), axis=1)
+
+    # translate all the data to the origin
+    mtx1 -= np.mean(mtx1, 0)
+    mtx2 -= np.mean(mtx2, 0)
+
+    norm1 = np.linalg.norm(mtx1)
+    norm2 = np.linalg.norm(mtx2)
+
+    if norm1 == 0 or norm2 == 0:
+        raise ValueError("Input matrices must contain >1 unique points")
+
+    # change scaling of data (in rows) such that trace(mtx*mtx') = 1
+    mtx1 /= norm1
+    mtx2 /= norm2
+
+    # transform mtx2 to minimize disparity
+    R, s = orthogonal_procrustes(mtx1, mtx2)
+    mtx2 = np.dot(mtx2, R.T) * s
+
+    # measure the dissimilarity between the two datasets
+    disparity = np.sum(np.square(mtx1 - mtx2))
+
+    return mtx1, mtx2, disparity, R, s
diff --git a/nltools/tests/conftest.py b/nltools/tests/conftest.py
index 4ccb82e2..fddd8f7f 100644
--- a/nltools/tests/conftest.py
+++ b/nltools/tests/conftest.py
@@ -1,7 +1,7 @@
 import pytest
 
 
-@pytest.fixture(scope='function')
-def sim():
-    from nltools import simulator
-    return simulator.Simulator()
+# @pytest.fixture(scope='function')
+# def sim():
+#     from nltools import simulator
+#     return simulator.Simulator()
diff --git a/nltools/tests/test_data.py b/nltools/tests/test_data.py
index a638b901..47d241f4 100644
--- a/nltools/tests/test_data.py
+++ b/nltools/tests/test_data.py
@@ -8,7 +8,7 @@
                         Adjacency,
                         Groupby,
                         Design_Matrix)
-from nltools.stats import threshold
+from nltools.stats import threshold, align
 from nltools.mask import create_sphere
 from sklearn.metrics import pairwise_distances
 import matplotlib
@@ -18,8 +18,8 @@
 
 matplotlib.use('TkAgg')
 
-def test_brain_data_3mm(tmpdir):
-    MNI_Template["resolution"] = '3mm'
+def test_brain_data_2mm(tmpdir):
+    MNI_Template["resolution"] = '2mm'
     sim = Simulator()
     r = 10
     sigma = 1
@@ -28,8 +28,13 @@ def test_brain_data_3mm(tmpdir):
     output_dir = str(tmpdir)
     dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)
 
-    shape_3d = (60, 72, 60)
-    shape_2d = (6, 71020)
+    if MNI_Template["resolution"] == '2mm':
+        shape_3d = (91, 109, 91)
+        shape_2d = (6, 238955)
+    elif MNI_Template["resolution"] == '3mm':
+        shape_3d = (60, 72, 60)
+        shape_2d = (6, 71020)
+
     y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))),header=None, index_col=None)
     holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))),header=None,index_col=None)
 
@@ -167,6 +172,16 @@ def test_brain_data_3mm(tmpdir):
     assert len(r) == shape_2d[0]
     r2 = dat.similarity(stats['weight_map'].to_nifti())
     assert len(r2) == shape_2d[0]
+    r = dat.similarity(stats['weight_map'], method='dot_product')
+    assert len(r) == shape_2d[0]
+    r = dat.similarity(stats['weight_map'], method='cosine')
+    assert len(r) == shape_2d[0]
+    r = dat.similarity(dat, method='correlation')
+    assert r.shape == (dat.shape()[0],dat.shape()[0])
+    r = dat.similarity(dat, method='dot_product')
+    assert r.shape == (dat.shape()[0],dat.shape()[0])
+    r = dat.similarity(dat, method='cosine')
+    assert r.shape == (dat.shape()[0],dat.shape()[0])
 
     # Test apply_mask - might move part of this to test mask suite
     s1 = create_sphere([12, 10, -8], radius=10)
@@ -301,280 +316,93 @@ def test_brain_data_3mm(tmpdir):
     assert n_components == len(stats['components'])
     assert stats['weights'].shape == (len(dat), n_components)
 
-def test_brain_data_2mm(tmpdir):
-    MNI_Template["resolution"] = '2mm'
+    # Test Hyperalignment Method
     sim = Simulator()
-    r = 10
-    sigma = 1
     y = [0, 1]
-    n_reps = 3
-    output_dir = str(tmpdir)
-    dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)
-
-    shape_3d = (91, 109, 91)
-    shape_2d = (6, 238955)
-    y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))),header=None, index_col=None)
-    holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))),header=None,index_col=None)
-
-    # Test load list
-    dat = Brain_Data(data=str(tmpdir.join('data.nii.gz')), Y=y)
-
-    # Test concatenate
-    out = Brain_Data([x for x in dat])
-    assert isinstance(out, Brain_Data)
-    assert len(out)==len(dat)
-
-    # Test to_nifti
-    d = dat.to_nifti()
-    assert d.shape[0:3] == shape_3d
-
-    # Test load nibabel
-    assert Brain_Data(d)
-
-    # Test shape
-    assert dat.shape() == shape_2d
-
-    # Test Mean
-    assert dat.mean().shape()[0] == shape_2d[1]
-
-    # Test Std
-    assert dat.std().shape()[0] == shape_2d[1]
-
-    # Test add
-    new = dat + dat
-    assert new.shape() == shape_2d
-
-    # Test subtract
-    new = dat - dat
-    assert new.shape() == shape_2d
-
-    # Test multiply
-    new = dat * dat
-    assert new.shape() == shape_2d
-
-    # Test Indexing
-    index = [0, 3, 1]
-    assert len(dat[index]) == len(index)
-    index = range(4)
-    assert len(dat[index]) == len(index)
-    index = dat.Y == 1
-
-    assert len(dat[index.values.flatten()]) == index.values.sum()
-
-    assert len(dat[index]) == index.values.sum()
-    assert len(dat[:3]) == 3
-
-    # Test Iterator
-    x = [x for x in dat]
-    assert len(x) == len(dat)
-    assert len(x[0].data.shape) == 1
-
-    # # Test T-test
-    out = dat.ttest()
-    assert out['t'].shape()[0] == shape_2d[1]
-
-    # # # Test T-test - permutation method
-    # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1})
-    # assert out['t'].shape()[0]==shape_2d[1]
-
-    # Test Regress
-    dat.X = pd.DataFrame({'Intercept':np.ones(len(dat.Y)), 'X1':np.array(dat.Y).flatten()},index=None)
-    out = dat.regress()
-
-    assert type(out['beta'].data) == np.ndarray
-    assert type(out['t'].data) == np.ndarray
-    assert type(out['p'].data) == np.ndarray
-    assert type(out['residual'].data) == np.ndarray
-    assert type(out['df'].data) == np.ndarray
-
-    assert out['beta'].shape() == (2,shape_2d[1])
-
-    # Test indexing
-    assert out['t'][1].shape()[0] == shape_2d[1]
-
-    # Test threshold
-    i=1
-    tt = threshold(out['t'][i], out['p'][i], .05)
-    assert isinstance(tt,Brain_Data)
-
-    # Test write
-    dat.write(os.path.join(str(tmpdir.join('test_write.nii'))))
-    assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii'))))
-
-    # Test append
-    assert dat.append(dat).shape()[0]==shape_2d[0]*2
-
-    # Test distance
-    distance = dat.distance(method='euclidean')
-    assert isinstance(distance,Adjacency)
-    assert distance.square_shape()[0]==shape_2d[0]
-
-    # Test predict
-    stats = dat.predict(algorithm='svm', cv_dict={'type': 'kfolds','n_folds': 2}, plot=False,**{'kernel':"linear"})
-
-    # Support Vector Regression, with 5 fold cross-validation with Platt Scaling
-    # This will output probabilities of each class
-    stats = dat.predict(algorithm='svm', cv_dict=None, plot=False,**{'kernel':'linear', 'probability':True})
-    assert isinstance(stats['weight_map'],Brain_Data)
-
-    # Logistic classificiation, with 2 fold cross-validation.
-    stats = dat.predict(algorithm='logistic', cv_dict={'type': 'kfolds', 'n_folds': 2}, plot=False)
-    assert isinstance(stats['weight_map'],Brain_Data)
-
-    # Ridge classificiation,
-    stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None,plot=False)
-    assert isinstance(stats['weight_map'],Brain_Data)
-
-    # Ridge
-    stats = dat.predict(algorithm='ridge', cv_dict={'type': 'kfolds', 'n_folds': 2,'subject_id':holdout}, plot=False,**{'alpha':.1})
-
-    # Lasso
-    stats = dat.predict(algorithm='lasso', cv_dict={'type': 'kfolds', 'n_folds': 2,'stratified':dat.Y}, plot=False,**{'alpha':.1})
-
-    # PCR
-    stats = dat.predict(algorithm='pcr', cv_dict=None, plot=False)
-
-    # Test Similarity
-    r = dat.similarity(stats['weight_map'])
-    assert len(r) == shape_2d[0]
-    r2 = dat.similarity(stats['weight_map'].to_nifti())
-    assert len(r2) == shape_2d[0]
-
-    # Test apply_mask - might move part of this to test mask suite
-    s1 = create_sphere([12, 10, -8], radius=10)
-    assert isinstance(s1, nb.Nifti1Image)
-    s2 = Brain_Data(s1)
-    masked_dat = dat.apply_mask(s1)
-    assert masked_dat.shape()[1] == np.sum(s2.data != 0)
-
-    # Test extract_roi
-    mask = create_sphere([12, 10, -8], radius=10)
-    assert len(dat.extract_roi(mask)) == shape_2d[0]
-
-    # Test r_to_z
-    z = dat.r_to_z()
-    assert z.shape() == dat.shape()
-
-    # Test copy
-    d_copy = dat.copy()
-    assert d_copy.shape() == dat.shape()
-
-    # Test detrend
-    detrend = dat.detrend()
-    assert detrend.shape() == dat.shape()
-
-    # Test standardize
-    s = dat.standardize()
-    assert s.shape() == dat.shape()
-    assert np.isclose(np.sum(s.mean().data), 0, atol=.1)
-    s = dat.standardize(method='zscore')
-    assert s.shape() == dat.shape()
-    assert np.isclose(np.sum(s.mean().data), 0, atol=.1)
-
-    # Test Sum
-    s = dat.sum()
-    assert s.shape() == dat[1].shape()
-
-    # Test Groupby
-    s1 = create_sphere([12, 10, -8], radius=10)
-    s2 = create_sphere([22, -2, -22], radius=10)
-    mask = Brain_Data([s1, s2])
-    d = dat.groupby(mask)
-    assert isinstance(d, Groupby)
-
-    # Test Aggregate
-    mn = dat.aggregate(mask, 'mean')
-    assert isinstance(mn, Brain_Data)
-    assert len(mn.shape()) == 1
-
-    # Test Threshold
-    s1 = create_sphere([12, 10, -8], radius=10)
-    s2 = create_sphere([22, -2, -22], radius=10)
-    mask = Brain_Data(s1)*5
-    mask = mask + Brain_Data(s2)
-
-    m1 = mask.threshold(upper=.5)
-    m2 = mask.threshold(upper=3)
-    m3 = mask.threshold(upper='98%')
-    m4 = Brain_Data(s1)*5 + Brain_Data(s2)*-.5
-    m4 = mask.threshold(upper=.5,lower=-.3)
-    assert np.sum(m1.data > 0) > np.sum(m2.data > 0)
-    assert np.sum(m1.data > 0) == np.sum(m3.data > 0)
-    assert np.sum(m4.data[(m4.data > -.3) & (m4.data <.5)]) == 0
-    assert np.sum(m4.data[(m4.data < -.3) | (m4.data >.5)]) > 0
-
-    # Test Regions
-    r = mask.regions(min_region_size=10)
-    m1 = Brain_Data(s1)
-    m2 = r.threshold(1, binarize=True)
-    # assert len(r)==2
-    assert len(np.unique(r.to_nifti().get_data())) == 2 # JC edit: I think this is what you were trying to do
-    diff = m2-m1
-    assert np.sum(diff.data) == 0
-    # # Test Plot
-    # dat.plot()
-
-    # Test Bootstrap
-    masked = dat.apply_mask(create_sphere(radius=10, coordinates=[0, 0, 0]))
-    n_samples = 3
-    b = masked.bootstrap('mean', n_samples=n_samples)
-    assert isinstance(b['Z'], Brain_Data)
-    b = masked.bootstrap('std', n_samples=n_samples)
-    assert isinstance(b['Z'], Brain_Data)
-    b = masked.bootstrap('predict', n_samples=n_samples, plot=False)
-    assert isinstance(b['Z'], Brain_Data)
-    b = masked.bootstrap('predict', n_samples=n_samples,
-                    plot=False, cv_dict={'type':'kfolds','n_folds':3})
-    assert isinstance(b['Z'], Brain_Data)
-    b = masked.bootstrap('predict', n_samples=n_samples,
-                    save_weights=True, plot=False)
-    assert len(b['samples'])==n_samples
-
-    # Test decompose
-    n_components = 3
-    stats = dat.decompose(algorithm='pca', axis='voxels',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
-
-    stats = dat.decompose(algorithm='ica', axis='voxels',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
-
-    dat.data = dat.data + 2
-    dat.data[dat.data<0] = 0
-    stats = dat.decompose(algorithm='nnmf', axis='voxels',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
-
-    stats = dat.decompose(algorithm='fa', axis='voxels',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
-
-    stats = dat.decompose(algorithm='pca', axis='images',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
-
-    stats = dat.decompose(algorithm='ica', axis='images',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
-
-    dat.data = dat.data + 2
-    dat.data[dat.data<0] = 0
-    stats = dat.decompose(algorithm='nnmf', axis='images',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
+    n_reps = 10
+    s1 = create_sphere([0, 0, 0], radius=3)
+    d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1)
+    d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1)
+    d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1)
+
+    # Test procrustes using align
+    data = [d1, d2, d3]
+    out = align(data, method='procrustes')
+    assert len(data) == len(out['transformed'])
+    assert len(data) == len(out['transformation_matrix'])
+    assert data[0].shape() == out['common_model'].shape()
+    transformed = np.dot(d1.data, out['transformation_matrix'][0])
+    centered = d1.data - np.mean(d1.data, 0)
+    transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0])*out['scale'][0])
+    np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - transformed), decimal=5)
+
+    # Test deterministic brain_data
+    bout = d1.align(out['common_model'], method='deterministic_srm')
+    assert d1.shape() == bout['transformed'].shape()
+    assert d1.shape() == bout['common_model'].shape()
+    assert d1.shape()[1] == bout['transformation_matrix'].shape[0]
+    btransformed = np.dot(d1.data, bout['transformation_matrix'])
+    np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data - btransformed))
+
+    # Test deterministic brain_data
+    bout = d1.align(out['common_model'], method='probabilistic_srm')
+    assert d1.shape() == bout['transformed'].shape()
+    assert d1.shape() == bout['common_model'].shape()
+    assert d1.shape()[1] == bout['transformation_matrix'].shape[0]
+    btransformed = np.dot(d1.data, bout['transformation_matrix'])
+    np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed))
+
+    # Test procrustes brain_data
+    bout = d1.align(out['common_model'], method='procrustes')
+    assert d1.shape() == bout['transformed'].shape()
+    assert d1.shape() == bout['common_model'].shape()
+    assert d1.shape()[1] == bout['transformation_matrix'].shape[0]
+    centered = d1.data - np.mean(d1.data, 0)
+    btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'])*bout['scale'])
+    np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed), decimal=5)
+    np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - bout['transformed'].data))
+
+    # Test hyperalignment on Brain_Data over time (axis=1)
+    sim = Simulator()
+    y = [0, 1]
+    n_reps = 10
+    s1 = create_sphere([0, 0, 0], radius=5)
+    d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1)
+    d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1)
+    d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1)
+    data = [d1, d2, d3]
+
+    out = align(data, method='procrustes', axis=1)
+    assert len(data) == len(out['transformed'])
+    assert len(data) == len(out['transformation_matrix'])
+    assert data[0].shape() == out['common_model'].shape()
+    centered = data[0].data.T-np.mean(data[0].data.T, 0)
+    transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0])*out['scale'][0])
+    np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed.T), decimal=5)
+
+    bout = d1.align(out['common_model'], method='deterministic_srm', axis=1)
+    assert d1.shape() == bout['transformed'].shape()
+    assert d1.shape() == bout['common_model'].shape()
+    assert d1.shape()[0] == bout['transformation_matrix'].shape[0]
+    btransformed = np.dot(d1.data.T, bout['transformation_matrix'])
+    np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T))
+
+    bout = d1.align(out['common_model'], method='probabilistic_srm', axis=1)
+    assert d1.shape() == bout['transformed'].shape()
+    assert d1.shape() == bout['common_model'].shape()
+    assert d1.shape()[0] == bout['transformation_matrix'].shape[0]
+    btransformed = np.dot(d1.data.T, bout['transformation_matrix'])
+    np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T))
+
+    bout = d1.align(out['common_model'], method='procrustes', axis=1)
+    assert d1.shape() == bout['transformed'].shape()
+    assert d1.shape() == bout['common_model'].shape()
+    assert d1.shape()[0] == bout['transformation_matrix'].shape[0]
+    centered = d1.data.T-np.mean(d1.data.T, 0)
+    btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'])*bout['scale'])
+    np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T), decimal=5)
+    np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data-bout['transformed'].data))
 
-    stats = dat.decompose(algorithm='fa', axis='images',
-                          n_components=n_components)
-    assert n_components == len(stats['components'])
-    assert stats['weights'].shape == (len(dat), n_components)
 
 def test_adjacency(tmpdir):
     n = 10
diff --git a/nltools/tests/test_stats.py b/nltools/tests/test_stats.py
index 848b9520..607c4612 100644
--- a/nltools/tests/test_stats.py
+++ b/nltools/tests/test_stats.py
@@ -5,7 +5,10 @@
 							correlation_permutation,
 							downsample,
 							upsample,
-							winsorize)
+							winsorize,
+							align)
+from nltools.simulator import Simulator
+from nltools.mask import create_sphere
 
 def test_permutation():
 	dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000)
@@ -70,3 +73,128 @@ def test_winsorize():
 								78., 10., 13., -40., 101., 86., 85., 15., 89.,
 								89., 28., -5., 41.])
 	assert(np.round(np.mean(out)) == np.round(np.mean(correct_result)))
+
+def test_align():
+	# Test hyperalignment matrix
+	sim = Simulator()
+	y = [0, 1]
+	n_reps = 10
+	s1 = create_sphere([0, 0, 0], radius=3)
+	d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1)
+	d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1)
+	d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1)
+
+	data = [d1.data.T,d2.data.T,d3.data.T]
+	out = align(data, method='deterministic_srm')
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape == out['common_model'].shape
+	transformed = np.dot(data[0].T,out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0]-transformed.T))
+
+	out = align(data, method='probabilistic_srm')
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape == out['common_model'].shape
+	transformed = np.dot(data[0].T,out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0]-transformed.T))
+
+	out2 = align(data, method='procrustes')
+	assert len(data) == len(out2['transformed'])
+	assert data[0].shape == out2['common_model'].shape
+	assert len(data) == len(out2['transformation_matrix'])
+	assert len(data) == len(out2['disparity'])
+	centered = data[0].T-np.mean(data[0].T,0)
+	transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0])*out2['scale'][0])
+	np.testing.assert_almost_equal(0,np.sum(out2['transformed'][0]-transformed.T))
+	assert out['transformed'][0].shape == out2['transformed'][0].shape
+	assert out['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape
+
+	# Test hyperalignment on Brain_Data
+	data = [d1,d2,d3]
+	out = align(data, method='deterministic_srm')
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape() == out['common_model'].shape()
+	transformed = np.dot(d1.data,out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed))
+
+	out = align(data, method='probabilistic_srm')
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape() == out['common_model'].shape()
+	transformed = np.dot(d1.data,out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed))
+
+	out2 = align(data, method='procrustes')
+	assert len(data) == len(out2['transformed'])
+	assert data[0].shape() == out2['common_model'].shape()
+	assert len(data) == len(out2['transformation_matrix'])
+	assert len(data) == len(out2['disparity'])
+	centered = data[0].data-np.mean(data[0].data,0)
+	transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0])*out2['scale'][0])
+	np.testing.assert_almost_equal(0,np.sum(out2['transformed'][0].data-transformed))
+	assert out['transformed'][0].shape() == out2['transformed'][0].shape()
+	assert out['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape
+
+	# Test hyperalignment on matrix over time (axis=1)
+	sim = Simulator()
+	y = [0, 1]
+	n_reps = 10
+	s1 = create_sphere([0, 0, 0], radius=5)
+	d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1)
+	d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1)
+	d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1)
+	data = [d1.data.T,d2.data.T,d3.data.T]
+	out = align(data, method='deterministic_srm', axis=1)
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape == out['common_model'].shape
+	transformed = np.dot(data[0],out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0]-transformed))
+
+	out = align(data, method='probabilistic_srm', axis=1)
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape == out['common_model'].shape
+	transformed = np.dot(data[0],out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0]-transformed))
+
+	out2 = align(data, method='procrustes', axis=1)
+	assert len(data) == len(out2['transformed'])
+	assert data[0].shape == out2['common_model'].shape
+	assert len(data) == len(out2['transformation_matrix'])
+	assert len(data) == len(out2['disparity'])
+	centered = data[0]-np.mean(data[0],0)
+	transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0])*out2['scale'][0])
+	np.testing.assert_almost_equal(0,np.sum(out2['transformed'][0]-transformed))
+	assert out['transformed'][0].shape == out2['transformed'][0].shape
+	assert out['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape
+
+	# Test hyperalignment on Brain_Data over time (axis=1)
+	data = [d1, d2, d3]
+	out = align(data, method='deterministic_srm', axis=1)
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape() == out['common_model'].shape()
+	transformed = np.dot(d1.data.T,out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed.T))
+
+	out = align(data, method='probabilistic_srm', axis=1)
+	assert len(data) == len(out['transformed'])
+	assert len(data) == len(out['transformation_matrix'])
+	assert data[0].shape() == out['common_model'].shape()
+	transformed = np.dot(d1.data.T,out['transformation_matrix'][0])
+	np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed.T))
+
+	out2 = align(data, method='procrustes', axis=1)
+	assert len(data) == len(out2['transformed'])
+	assert data[0].shape() == out2['common_model'].shape()
+	assert len(data) == len(out2['transformation_matrix'])
+	assert len(data) == len(out2['disparity'])
+	centered = data[0].data.T-np.mean(data[0].data.T,0)
+	transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0])*out2['scale'][0])
+	np.testing.assert_almost_equal(0,np.sum(out2['transformed'][0].data-transformed.T))
+	assert out['transformed'][0].shape() == out2['transformed'][0].shape()
+	assert out['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape
+	
diff --git a/nltools/utils.py b/nltools/utils.py
index 6dfca728..539521b5 100644
--- a/nltools/utils.py
+++ b/nltools/utils.py
@@ -8,11 +8,6 @@
 __all__ = ['get_resource_path',
             'get_anatomical',
             'set_algorithm',
-            'spm_hrf',
-            'glover_hrf',
-            'spm_time_derivative',
-            'glover_time_derivative',
-            'spm_dispersion_derivative',
             'attempt_to_import',
             'all_same',
             'concatenate',
@@ -27,10 +22,12 @@
 import importlib
 import os
 from sklearn.pipeline import Pipeline
+from sklearn.utils import check_random_state
 from scipy.stats import gamma
 import numpy as np
 import collections
 from types import GeneratorType
+from sklearn.utils import check_random_state
 
 def get_resource_path():
     """ Get path to nltools resource directory. """
@@ -161,133 +158,6 @@ def load_class(import_string):
             Valid options are 'pca','ica', 'nnmf', 'fa'""")
     return alg
 
-# The following are nipy source code implementations of the hemodynamic response function HRF
-# See the included nipy license file for use permission.
-
-def _gamma_difference_hrf(tr, oversampling=16, time_length=32., onset=0.,
-                        delay=6, undershoot=16., dispersion=1.,
-                        u_dispersion=1., ratio=0.167):
-    """ Compute an hrf as the difference of two gamma functions
-    Parameters
-    ----------
-    tr: float, scan repeat time, in seconds
-    oversampling: int, temporal oversampling factor, optional
-    time_length: float, hrf kernel length, in seconds
-    onset: float, onset of the hrf
-    Returns
-    -------
-    hrf: array of shape(length / tr * oversampling, float),
-         hrf sampling on the oversampled time grid
-    """
-    dt = tr / oversampling
-    time_stamps = np.linspace(0, time_length, float(time_length) / dt)
-    time_stamps -= onset / dt
-    hrf = gamma.pdf(time_stamps, delay / dispersion, dt / dispersion) - \
-        ratio * gamma.pdf(
-        time_stamps, undershoot / u_dispersion, dt / u_dispersion)
-    hrf /= hrf.sum()
-    return hrf
-
-
-def spm_hrf(tr, oversampling=16, time_length=32., onset=0.):
-    """ Implementation of the SPM hrf model.
-
-    Args:
-        tr: float, scan repeat time, in seconds
-        oversampling: int, temporal oversampling factor, optional
-        time_length: float, hrf kernel length, in seconds
-        onset: float, onset of the response
-
-    Returns:
-        hrf: array of shape(length / tr * oversampling, float),
-            hrf sampling on the oversampled time grid
-
-    """
-
-    return _gamma_difference_hrf(tr, oversampling, time_length, onset)
-
-
-def glover_hrf(tr, oversampling=16, time_length=32., onset=0.):
-    """ Implementation of the Glover hrf model.
-
-    Args:
-        tr: float, scan repeat time, in seconds
-        oversampling: int, temporal oversampling factor, optional
-        time_length: float, hrf kernel length, in seconds
-        onset: float, onset of the response
-
-    Returns:
-        hrf: array of shape(length / tr * oversampling, float),
-            hrf sampling on the oversampled time grid
-
-    """
-
-    return _gamma_difference_hrf(tr, oversampling, time_length, onset,
-                                delay=6, undershoot=12., dispersion=.9,
-                                u_dispersion=.9, ratio=.35)
-
-
-def spm_time_derivative(tr, oversampling=16, time_length=32., onset=0.):
-    """ Implementation of the SPM time derivative hrf (dhrf) model.
-
-    Args:
-        tr: float, scan repeat time, in seconds
-        oversampling: int, temporal oversampling factor, optional
-        time_length: float, hrf kernel length, in seconds
-        onset: float, onset of the response
-
-    Returns:
-        dhrf: array of shape(length / tr, float),
-              dhrf sampling on the provided grid
-
-    """
-
-    do = .1
-    dhrf = 1. / do * (spm_hrf(tr, oversampling, time_length, onset + do) -
-                      spm_hrf(tr, oversampling, time_length, onset))
-    return dhrf
-
-def glover_time_derivative(tr, oversampling=16, time_length=32., onset=0.):
-    """Implementation of the flover time derivative hrf (dhrf) model.
-
-    Args:
-        tr: float, scan repeat time, in seconds
-        oversampling: int, temporal oversampling factor, optional
-        time_length: float, hrf kernel length, in seconds
-        onset: float, onset of the response
-
-    Returns:
-        dhrf: array of shape(length / tr, float),
-              dhrf sampling on the provided grid
-
-    """
-
-    do = .1
-    dhrf = 1. / do * (glover_hrf(tr, oversampling, time_length, onset + do) -
-                      glover_hrf(tr, oversampling, time_length, onset))
-    return dhrf
-
-def spm_dispersion_derivative(tr, oversampling=16, time_length=32., onset=0.):
-    """Implementation of the SPM dispersion derivative hrf model.
-
-    Args:
-        tr: float, scan repeat time, in seconds
-        oversampling: int, temporal oversampling factor, optional
-        time_length: float, hrf kernel length, in seconds
-        onset: float, onset of the response
-
-    Returns:
-        dhrf: array of shape(length / tr * oversampling, float),
-              dhrf sampling on the oversampled time grid
-
-    """
-
-    dd = .01
-    dhrf = 1. / dd * (_gamma_difference_hrf(tr, oversampling, time_length,
-                                           onset, dispersion=1. + dd) -
-                      spm_hrf(tr, oversampling, time_length, onset))
-    return dhrf
-
 def isiterable(obj):
     ''' Returns True if the object is one of allowable iterable types. '''
     return isinstance(obj, (list, tuple, GeneratorType))
@@ -327,10 +197,11 @@ def concatenate(data):
         raise ValueError('Make sure all objects in the list are the same type.')
     return out
 
-def _bootstrap_apply_func(data, function, *args, **kwargs):
+def _bootstrap_apply_func(data, function, random_state=None, *args, **kwargs):
     '''Bootstrap helper function. Sample with replacement and apply function'''
+    random_state = check_random_state(random_state)
     data_row_id = range(data.shape()[0])
-    new_dat = data[np.random.choice(data_row_id,
+    new_dat = data[random_state.choice(data_row_id,
                                    size=len(data_row_id),
                                    replace=True)]
     return getattr(new_dat, function)( *args, **kwargs)
diff --git a/nltools/version.py b/nltools/version.py
index 539ce5ff..5365f613 100644
--- a/nltools/version.py
+++ b/nltools/version.py
@@ -1,4 +1,4 @@
 """Specifies current version of nltools to be used by setup.py and __init__.py
 """
 
-__version__ = '0.3.5'
+__version__ = '0.3.6'
diff --git a/setup.py b/setup.py
index 3d5e67d8..3255d542 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,8 @@
                      'analysis engine powering www.neuro-learn.org.',
     keywords = ['neuroimaging', 'preprocessing', 'analysis','machine-learning'],
     classifiers = [
-        "Programming Language :: Python",
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3.6",
         "Operating System :: OS Independent",
         "Intended Audience :: Science/Research",
         "License :: OSI Approved :: MIT License"