Merge pull request #1342 from flatironinstitute/dev

Dev -> Main for release of 1.11.0
flatironinstitute · May 3, 2024 · a30d1b1 · a30d1b1
2 parents 59708bd + 63c1932
commit a30d1b1
Show file tree

Hide file tree

Showing 42 changed files with 1,237 additions and 1,349 deletions.
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -26,7 +26,7 @@ jobs:
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python-version }}
-          environment-file: environment.yml
+          environment-file: environment-minimal.yml
           activate-environment: caiman
 
       - name: Install OS Dependencies

diff --git a/README.md b/README.md
@@ -62,6 +62,8 @@ The main use cases and notebooks are listed in the following table:
 
 A comprehensive list of references, where you can find detailed discussion of the methods and their development, can be found [here](https://caiman.readthedocs.io/en/master/CaImAn_features_and_references.html#references). 
 
+# CLI demos
+Caiman also provides commandline demos, similar to the notebooks, demonstrating how to work with the codebase outside of Jupyter. They take their configuration primarily from json files (which you will want to modify to work with your data and its specifics) and should be reasonably easy to modify if they don't already do what you want them to do (in particular, saving things; a standard output format for Caiman is something intended for future releases). To run them, activate your environment, and find the demos in demos/general under your caiman data directory; you can run them like you would any other python application, or edit them with your code editor. Each demo comes with a json configuration file that you can customise. There is a README in the demos directory that covers some of this.
 
 # How to get help
 - [Online documentation](https://caiman.readthedocs.io/en/latest/) contains a lot of general information about Caiman, the parameters, how to interpret its outputs, and more.

diff --git a/caiman/base/movies.py b/caiman/base/movies.py
@@ -673,69 +673,6 @@ def NonnegativeMatrixFactorization(self,
 
         return space_components, time_components
 
-    def online_NMF(self,
-                   n_components: int = 30,
-                   method: str = 'nnsc',
-                   lambda1: int = 100,
-                   iterations: int = -5,
-                   model=None,
-                   **kwargs) -> tuple[np.ndarray, np.ndarray]:
-        """ Method performing online matrix factorization and using the spams
-
-        (http://spams-devel.gforge.inria.fr/doc-python/html/index.html) package from Inria.
-        Implements bith the nmf and nnsc methods
-
-        Args:
-            n_components: int
-
-            method: 'nnsc' or 'nmf' (see http://spams-devel.gforge.inria.fr/doc-python/html/index.html)
-
-            lambda1: see http://spams-devel.gforge.inria.fr/doc-python/html/index.html
-
-            iterations: see http://spams-devel.gforge.inria.fr/doc-python/html/index.html
-
-            batchsize: see http://spams-devel.gforge.inria.fr/doc-python/html/index.html
-
-            model: see http://spams-devel.gforge.inria.fr/doc-python/html/index.html
-
-            **kwargs: more arguments to be passed to nmf or nnsc
-
-        Returns:
-            time_comps
-
-            space_comps
-        """
-        try:
-            import spams       # XXX consider moving this to the head of the file
-        except:
-            logging.error("You need to install the SPAMS package")
-            raise
-
-        T, d1, d2 = np.shape(self)
-        d = d1 * d2
-        X = np.asfortranarray(np.reshape(self, [T, d], order='F'))
-
-        if method == 'nmf':
-            (time_comps, V) = spams.nmf(X, return_lasso=True, K=n_components, numThreads=4, iter=iterations, **kwargs)
-
-        elif method == 'nnsc':
-            (time_comps, V) = spams.nnsc(X,
-                                         return_lasso=True,
-                                         K=n_components,
-                                         lambda1=lambda1,
-                                         iter=iterations,
-                                         model=model,
-                                         **kwargs)
-        else:
-            raise Exception('Method unknown')
-
-        space_comps = []
-
-        for _, mm in enumerate(V):
-            space_comps.append(np.reshape(mm.todense(), (d1, d2), order='F'))
-
-        return time_comps, np.array(space_comps)
-
     def IPCA(self, components: int = 50, batch: int = 1000) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Iterative Principal Component analysis, see sklearn.decomposition.incremental_pca
@@ -1244,7 +1181,8 @@ def load(file_name: Union[str, list[str]],
             dimension of the movie along x and y if loading from a two dimensional numpy array
 
         var_name_hdf5: str
-            if loading from hdf5/n5 name of the dataset inside the file to load (ignored if the file only has one dataset)
+            if loading from hdf5/n5 name of the dataset inside the file to load (ignored if the file only has one dataset).
+            This is also used for (new-style) mat files
 
         in_memory: bool=False
             This changes the behaviour of the function for npy files to be a readwrite rather than readonly memmap,
@@ -1314,17 +1252,6 @@ def load(file_name: Union[str, list[str]],
         basename, extension = os.path.splitext(file_name)
 
         extension = extension.lower()
-        if extension == '.mat':
-            logging.warning('Loading a *.mat file. x- and y- dimensions ' +
-                            'might have been swapped.')
-            try: # scipy >= 1.8
-                byte_stream, file_opened = scipy.io.matlab._mio._open_file(file_name, appendmat=False)
-                mjv, mnv = scipy.io.matlab.miobase.get_matfile_version(byte_stream)
-            except: # scipy <= 1.7
-                byte_stream, file_opened = scipy.io.matlab.mio._open_file(file_name, appendmat=False)
-                mjv, mnv = scipy.io.matlab.mio.get_matfile_version(byte_stream)
-            if mjv == 2:
-                extension = '.h5'
 
         if extension in ['.tif', '.tiff', '.btf']:  # load tif file
             with tifffile.TiffFile(file_name) as tffl:
@@ -1512,23 +1439,23 @@ def load(file_name: Union[str, list[str]],
                 else:
                     input_arr = input_arr[np.newaxis, :, :]
 
-        elif extension == '.mat':      # load npy file
-            input_arr = scipy.io.loadmat(file_name)['data']
-            input_arr = np.rollaxis(input_arr, 2, -3)
-            if subindices is not None:
-                input_arr = input_arr[subindices]
-
         elif extension == '.npz':      # load movie from saved file
             if subindices is not None:
                 raise Exception('Subindices not implemented')
             with np.load(file_name) as f:
                 return movie(**f).astype(outtype)
 
-        elif extension in ('.hdf5', '.h5', '.nwb', 'n5', 'zarr'):
+        elif extension in ('.hdf5', '.h5', '.mat', '.nwb', 'n5', 'zarr'):
             if extension in ('n5', 'zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
                 f = zarr.open(file_name, "r")
             else:
-                f = h5py.File(file_name, "r")
+                try:
+                    f = h5py.File(file_name, "r")
+                except:
+                    if extension == '.mat':
+                        raise Exception(f"Problem loading {file_name}: Unknown format. This may be in the original version 1 (non-hdf5) mat format; please convert it first")
+                    else:
+                        raise Exception(f"Problem loading {file_name}: Unknown format.")
             ignore_keys = ['__DATA_TYPES__'] # Known metadata that tools provide, add to this as needed. Sync with get_file_size() !!
             fkeys = list(filter(lambda x: x not in ignore_keys, f.keys()))
             if len(fkeys) == 1: # If the file we're parsing has only one dataset inside it,
@@ -1951,11 +1878,17 @@ def load_iter(file_name: Union[str, list[str]], subindices=None, var_name_hdf5:
                             yield frame # was frame[..., 0].astype(outtype)
                         return
 
-            elif extension in ('.hdf5', '.h5', '.nwb', '.mat', 'n5', 'zarr'):
-                if extension in ('n5', 'zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
+            elif extension in ('.hdf5', '.h5', '.nwb', '.mat', '.n5', '.zarr'):
+                if extension in ('.n5', '.zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
                     f = zarr.open(file_name, "r")
                 else:
-                    f = h5py.File(file_name, "r")
+                    try:
+                        f = h5py.File(file_name, "r")
+                    except:
+                        if extension == '.mat':
+                            raise Exception(f"Problem loading {file_name}: Unknown format. This may be in the original version 1 (non-hdf5) mat format; please convert it first")
+                        else:
+                            raise Exception(f"Problem loading {file_name}: Unknown format.")
                 ignore_keys = ['__DATA_TYPES__'] # Known metadata that tools provide, add to this as needed.
                 fkeys = list(filter(lambda x: x not in ignore_keys, f.keys()))
                 if len(fkeys) == 1: # If the hdf5 file we're parsing has only one dataset inside it,
@@ -2010,11 +1943,7 @@ def get_file_size(file_name, var_name_hdf5:str='mov') -> tuple[tuple, Union[int,
         if os.path.exists(file_name):
             _, extension = os.path.splitext(file_name)[:2]
             extension = extension.lower()
-            if extension == '.mat':
-                byte_stream, file_opened = scipy.io.matlab.mio._open_file(file_name, appendmat=False)
-                mjv, mnv = scipy.io.matlab.mio.get_matfile_version(byte_stream)
-                if mjv == 2:
-                    extension = '.h5'
+
             if extension in ['.tif', '.tiff', '.btf']:
                 tffl = tifffile.TiffFile(file_name)
                 siz = tffl.series[0].shape
@@ -2042,12 +1971,18 @@ def get_file_size(file_name, var_name_hdf5:str='mov') -> tuple[tuple, Union[int,
                 filename = os.path.split(file_name)[-1]
                 Yr, dims, T = caiman.mmapping.load_memmap(os.path.join(
                         os.path.split(file_name)[0], filename))
-            elif extension in ('.h5', '.hdf5', '.nwb', 'n5', 'zarr'):
+            elif extension in ('.h5', '.hdf5', '.mat', '.nwb', 'n5', 'zarr'):
                 # FIXME this doesn't match the logic in load()
                 if extension in ('n5', 'zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
                     f = zarr.open(file_name, "r")
                 else:
-                    f = h5py.File(file_name, "r")
+                    try:
+                        f = h5py.File(file_name, "r")
+                    except:
+                        if extension == '.mat':
+                            raise Exception(f"Problem loading {file_name}: Unknown format. This may be in the original version 1 (non-hdf5) mat format; please convert it first")
+                        else:
+                            raise Exception(f"Problem loading {file_name}: Unknown format.")
                 ignore_keys = ['__DATA_TYPES__'] # Known metadata that tools provide, add to this as needed. Sync with movies.my:load() !!
                 kk = list(filter(lambda x: x not in ignore_keys, f.keys()))
                 if len(kk) == 1: # TODO: Consider recursing into a group to find a dataset

diff --git a/caiman/base/timeseries.py b/caiman/base/timeseries.py
@@ -147,6 +147,7 @@ def save(self,
         Args:
             file_name: str
                 name of file. Possible formats are tif, avi, npz, mmap and hdf5
+                If a path is not part of the filename, it will be saved into a temporary directory under caiman_data
 
             to32: Bool
                 whether to transform to 32 bits
@@ -165,6 +166,9 @@ def save(self,
                 if saving as .tif, specifies the compression level
                 if saving as .avi or .mkv, compress=0 uses the IYUV codec, otherwise the FFV1 codec is used
 
+        Returns:
+            generated_filename: The full filename, path included, where the data was saved
+
         Raises:
             Exception 'Extension Unknown'
 
@@ -197,6 +201,8 @@ def foo(i):
                         if to32 and not ('float32' in str(self.dtype)):
                             curfr = curfr.astype(np.float32)
                         tif.save(curfr, compress=compress)
+            return file_name
+
         elif extension == '.npz':
             if to32 and not ('float32' in str(self.dtype)):
                 input_arr = self.astype(np.float32)
@@ -209,6 +215,8 @@ def foo(i):
                      fr=self.fr,
                      meta_data=self.meta_data,
                      file_name=self.file_name)
+            return file_name
+
         elif extension in ('.avi', '.mkv'):
             codec = None
             if compress == 0:
@@ -241,6 +249,7 @@ def foo(i):
             for d in data:
                 vw.write(cv2.cvtColor(d, cv2.COLOR_GRAY2BGR))
             vw.release()
+            return file_name
 
         elif extension == '.mat':
             if self.file_name[0] is not None:
@@ -271,6 +280,7 @@ def foo(i):
                         'meta_data': self.meta_data,
                         'file_name': f_name
                     })
+            return file_name
 
         elif extension in ('.hdf5', '.h5'):
             with h5py.File(file_name, "w") as f:
@@ -289,6 +299,7 @@ def foo(i):
                 if self.meta_data[0] is not None:
                     logging.debug("Metadata for saved file: " + str(self.meta_data))
                     dset.attrs["meta_data"] = cpk.dumps(self.meta_data)
+            return file_name
         elif extension == '.mmap':
             base_name = name
 

diff --git a/caiman/behavior/behavior.py b/caiman/behavior/behavior.py
@@ -37,6 +37,10 @@ def select_roi(img: np.ndarray, n_rois: int = 1) -> list:
             each element is an the mask considered a ROIs
     """
 
+    # FIXME This function depends on particular builds of OpenCV
+    #       and may be difficult to support moving forward; would be good to
+    #       move this kind of code out of the core and find more portable ways
+    #       to do it
     masks = []
     for _ in range(n_rois):
         fig = plt.figure()
@@ -130,8 +134,8 @@ def extract_magnitude_and_angle_from_OF(spatial_filter_,
             x, y = scipy.signal.medfilt(time_trace, kernel_size=[1, 1]).T
             x = scipy.signal.savgol_filter(x.squeeze(), sav_filter_size, 1)
             y = scipy.signal.savgol_filter(y.squeeze(), sav_filter_size, 1)
-            mag, dirct = to_polar(x - caiman.components_evaluation.mode_robust(x),
-                                  y - caiman.components_evaluation.mode_robust(y))
+            mag, dirct = to_polar(x - caiman.utils.stats.mode_robust(x),
+                                  y - caiman.utils.stats.mode_robust(y))
             dirct = scipy.signal.medfilt(dirct.squeeze(), kernel_size=1).T
 
         # normalize to pixel units
@@ -325,25 +329,12 @@ def extract_components(mov_tot,
 
     if method_factorization == 'nmf':
         nmf = NMF(n_components=n_components, **kwargs)
-
         time_trace = nmf.fit_transform(newm)
         spatial_filter = nmf.components_
         spatial_filter = np.concatenate([np.reshape(sp, (d1, d2))[np.newaxis, :, :] for sp in spatial_filter], axis=0)
-
-    elif method_factorization == 'dict_learn':
-        import spams
-        newm = np.asfortranarray(newm, dtype=np.float32)
-        time_trace = spams.trainDL(newm, K=n_components, mode=0, lambda1=1, posAlpha=True, iter=max_iter_DL)
-
-        spatial_filter = spams.lasso(newm,
-                                     D=time_trace,
-                                     return_reg_path=False,
-                                     lambda1=0.01,
-                                     mode=spams.spams_wrap.PENALTY,
-                                     pos=True)
-
-        spatial_filter = np.concatenate([np.reshape(sp, (d1, d2))[np.newaxis, :, :] for sp in spatial_filter.toarray()],
-                                        axis=0)
+    else:
+        # Caiman used to support a method_factorization called dict_learn, implemented using spams.lasso
+        raise Exception(f"Unknown or unsupported method_factorization: {method_factorization}")
 
     time_trace = [np.reshape(ttr, (c, T)).T for ttr in time_trace.T]
 

diff --git a/caiman/caimanmanager.py b/caiman/caimanmanager.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
 import argparse
-import distutils.dir_util
 import filecmp
 import glob
 import os
@@ -53,20 +52,21 @@
 
 def do_install_to(targdir: str, inplace: bool = False, force: bool = False) -> None:
     global sourcedir_base
+    ignore_pycache=shutil.ignore_patterns('__pycache__')
     if os.path.isdir(targdir) and not force:
         raise Exception(targdir + " already exists. You may move it out of the way, remove it, or use --force")
     if not inplace:    # In this case we rely on what setup.py put in the share directory for the module
         if not force:
-            shutil.copytree(sourcedir_base, targdir)
+            shutil.copytree(sourcedir_base, targdir, ignore=ignore_pycache)
         else:
-            distutils.dir_util.copy_tree(sourcedir_base, targdir)
+            shutil.copytree(sourcedir_base, targdir, ignore=ignore_pycache, dirs_exist_ok=True)
         os.makedirs(os.path.join(targdir, 'temp'          ), exist_ok=True)
     else:          # here we recreate the other logical path here. Maintenance concern: Keep these reasonably in sync with what's in setup.py
         for copydir in extra_dirs:
             if not force:
-                shutil.copytree(copydir, os.path.join(targdir, copydir))
+                shutil.copytree(copydir, os.path.join(targdir, copydir), ignore=ignore_pycache)
             else:
-                distutils.dir_util.copy_tree(copydir, os.path.join(targdir, copydir))
+                shutil.copytree(copydir, os.path.join(targdir, copydir), ignore=ignore_pycache, dirs_exist_ok=True)
         os.makedirs(os.path.join(targdir, 'example_movies'), exist_ok=True)
         os.makedirs(os.path.join(targdir, 'temp'          ), exist_ok=True)
         for stdmovie in standard_movies:

diff --git a/caiman/mmapping.py b/caiman/mmapping.py
@@ -405,7 +405,7 @@ def save_memmap(filenames:list[str],
                 recompute_each_memmap = True
 
 
-        if recompute_each_memmap or (remove_init>0) or (idx_xy is not None)\
+        if recompute_each_memmap or (remove_init > 0) or (idx_xy is not None)\
                 or (xy_shifts is not None) or (add_to_movie != 0) or (border_to_0>0)\
                 or slices is not None:
 
@@ -527,7 +527,7 @@ def save_memmap(filenames:list[str],
             sys.stdout.flush()
             Ttot = Ttot + T
 
-        fname_new = caiman.paths.fn_relocated(fname_tot + f'_frames_{Ttot}.mmap')
+        fname_new = os.path.join(caiman.paths.get_tempdir(), caiman.paths.fn_relocated(f'{fname_tot}_frames_{Ttot}.mmap'))
         try:
             # need to explicitly remove destination on windows
             os.unlink(fname_new)