brainiak · mihaic · Sep 22, 2016 · Sep 2, 2016 · Sep 2, 2016 · Sep 2, 2016
diff --git a/brainiak/reprsimil/__init__.py b/brainiak/reprsimil/__init__.py
@@ -0,0 +1,15 @@
+#  Copyright 2016 Mingbo Cai, Princeton Neuroscience Instititute,
+#  Princeton University
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""A Bayesian method to perform Representational Similarity Analysis"""
diff --git a/brainiak/reprsimil/brsa.py b/brainiak/reprsimil/brsa.py
diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py
@@ -1,4 +1,4 @@
-#  Copyright 2016 Intel Corporation
+#  Copyright 2016 Intel Corporation, Princeton University
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,6 +13,9 @@
 #  limitations under the License.
 import numpy as np
 import logging
+import re
+import warnings
+import os.path
 
 """
 Some utility functions that can be used by different algorithms
@@ -49,7 +52,6 @@ def from_sym_2_tri(symm):
     """convert a 2D symmetric matrix to an upper
        triangular matrix in 1D format
 
-
     Parameters
     ----------
 
@@ -71,6 +73,7 @@ def from_sym_2_tri(symm):
 
 def fast_inv(a):
     """to invert a 2D matrix
+
     Parameters
     ----------
 
@@ -100,3 +103,164 @@ def fast_inv(a):
     except np.linalg.linalg.LinAlgError:
         logging.exception('Error from np.linalg.solve')
         raise
+
+
+def cov2corr(cov):
+    """Calculate the correlation matrix based on a
+        covariance matrix
+
+    Parameters
+    ----------
+
+    cov: 2D array
+
+    Returns
+    -------
+
+    corr: 2D array
+        correlation converted from the covarince matrix
+
+
+    """
+    assert cov.ndim == 2, 'covariance matrix should be 2D array'
+    inv_sd = 1 / np.sqrt(np.diag(cov))
+    corr = cov * inv_sd[None, :] * inv_sd[:, None]
+    return corr
+
+
+class read_design:
+    """ A class which has the ability of reading in design matrix in .1D file,
+        generated by AFNI's 3dDeconvolve.
+
+        Parameters
+        ----------
+
+        fname: string, the address of the file to read.
+
+        include_orth: Boollean, whether to include "orthogonal" regressors in
+            the design matrix which are usually head motion parameters. All
+            the columns of design matrix are still going to be read in, but
+            the attribute cols_used will reflect whether these orthogonal
+            regressors are to be included for furhter analysis.
+
+        include_pols: Boollean, whether to include polynomial regressors in
+            the design matrix which are used to capture slow drift of signals.
+            This will be reflected in the indices in the attribute cols_used.
+
+        Attributes
+        ----------
+
+        design: 2d array. The design matrix read in from the csv file.
+
+        n_col: number of total columns in the design matrix.
+
+        column_types: 1d array. the types of each column in the design matrix.
+            0 for orthogonal regressors (usually head motion parameters),
+            -1 for polynomial basis (capturing slow drift of signals),
+            values > 0 for stimulus conditions
+
+        n_basis: scalar. The number of polynomial bases in the designn matrix.
+
+        n_stim: scalar. The number of stimulus conditions.
+
+        n_orth: scalar. The number of orthogoanal regressors (usually head
+            motions)
+
+        StimLabels: list. The names of each column in the design matrix.
+    """
+    def __init__(self, fname=None, include_orth=False, include_pols=False):
+        if fname is None:
+            # fname is the name of the file to read in the design matrix
+            self.design = np.zeros([0, 0])
+            self.n_col = 0
+            # number of columns (conditions) in the design matrix
+            self.column_types = np.ones(0)
+            self.n_basis = 0
+            self.n_stim = 0
+            self.n_orth = 0
+            self.StimLabels = []
+        else:
+            # isAFNI = re.match(r'.+[.](1D|1d|txt)$', fname)
+            filename, ext = os.path.splitext(fname)
+            # We assume all AFNI 1D files have extension of 1D or 1d or txt
+            if ext in ['.1D', '.1d', '.txt']:
+                self.readAFNI(fname=fname)
+
+        self.include_orth = include_orth
+        self.include_pols = include_pols
+        self.cols_used = np.where(self.column_types == 1)[0]
+        if self.include_orth:
+            self.cols_used = np.sort(
+                np.append(self.cols_used, np.where(self.column_types == 0)[0]))
+        if self.include_pols:
+            self.cols_used = np.sort(np.append(
+                self.cols_used, np.where(self.column_types == -1)[0]))
+        self.design_used = self.design[:, self.cols_used]
+        if not self.include_pols:
+            # baseline is not included, then we add a column of all 1's
+            self.design_used = np.insert(self.design_used, 0, 1, axis=1)
+        self.n_TR = np.size(self.design_used, axis=0)
+
+    def readAFNI(self, fname):
+        # Read design file written by AFNI
+        self.n_basis = 0
+        self.n_stim = 0
+        self.n_orth = 0
+        self.StimLabels = []
+        self.design = np.loadtxt(fname, ndmin=2)
+        with open(fname) as f:
+            all_text = f.read()
+
+        find_n_column = re.compile(
+            r'^#[ ]+ni_type[ ]+=[ ]+"(?P<n_col>\d+)[*]', re.MULTILINE)
+        n_col_found = find_n_column.search(all_text)
+        if n_col_found:
+            self.n_col = int(n_col_found.group('n_col'))
+            if self.n_col != np.size(self.design, axis=1):
+                warnings.warn(
+                    'The number of columns in the design matrix'
+                    + 'does not match the header information')
+                self.n_col = np.size(self.design, axis=1)
+        else:
+            self.n_col = np.size(self.design, axis=1)
+
+        self.column_types = np.ones(self.n_col)
+        # default that all columns are conditions of interest
+
+        find_ColumnGroups = re.compile(
+            r'^#[ ]+ColumnGroups[ ]+=[ ]+"(?P<CGtext>.+)"', re.MULTILINE)
+        CG_found = find_ColumnGroups.search(all_text)
+        if CG_found:
+            CG_text = re.split(',', CG_found.group('CGtext'))
+            curr_idx = 0
+            for CG in CG_text:
+                split_by_at = re.split('@', CG)
+                if len(split_by_at) == 2:
+                    # the first tells the number of columns in this condition
+                    # the second tells the condition type
+                    n_this_cond = int(split_by_at[0])
+                    self.column_types[curr_idx:curr_idx + n_this_cond] = \
+                        int(split_by_at[1])
+                    curr_idx += n_this_cond
+                elif len(split_by_at) == 1 and \
+                        not re.search('..', split_by_at[0]):
+                    # Just a number, and not the type like '1..4'
+                    self.column_types[curr_idx] = int(split_by_at[0])
+                    curr_idx += 1
+                else:  # must be a single stimulus condition
+                    split_by_dots = re.split('\..', CG)
+                    n_this_cond = int(split_by_dots[1])
+                    self.column_types[curr_idx:curr_idx + n_this_cond] = 1
+                    curr_idx += n_this_cond
+            self.n_basis = np.sum(self.column_types == -1)
+            self.n_stim = np.sum(self.column_types > 0)
+            self.n_orth = np.sum(self.column_types == 0)
+
+        find_StimLabels = re.compile(
+            r'^#[ ]+StimLabels[ ]+=[ ]+"(?P<SLtext>.+)"', re.MULTILINE)
+        StimLabels_found = find_StimLabels.search(all_text)
+        if StimLabels_found:
+            self.StimLabels = \
+                re.split(r'[ ;]+', StimLabels_found.group('SLtext'))
+        else:
+            self.StimLabels = []
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,6 +1,7 @@
 coverage
 flake8
 flake8-print
+numdifftools
 pytest
 pytest-cython
 restructuredtext-lint