From 73b3eddc35fbe5cb55b6a946bb3449c0fe448f6c Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Mon, 20 Apr 2020 15:01:37 +0100
Subject: [PATCH 01/11] save a sample of waveforms

---
 phylib/io/alf.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 73ebcea..74c743f 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -28,6 +28,7 @@
 #------------------------------------------------------------------------------
 
 NCH_WAVEFORMS = 32  # number of channels to be saved in templates.waveforms and channels.waveforms
+NSAMPLE_WAVEFORMS = 500  # number of waveforrms sampled out of the raw data
 
 _FILE_RENAMES = [  # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy)
     ('params.py', 'params.py', None),
@@ -116,7 +117,7 @@ def convert(self, out_path, force=False, label='', ampfactor=1):
         if not self.out_path.exists():
             self.out_path.mkdir()
 
-        with tqdm(desc="Converting to ALF", total=95) as bar:
+        with tqdm(desc="Converting to ALF", total=125) as bar:
             self.copy_files(force=force)
             bar.update(10)
             self.make_spike_times_amplitudes()
@@ -125,6 +126,8 @@ def convert(self, out_path, force=False, label='', ampfactor=1):
             bar.update(10)
             self.make_channel_objects()
             bar.update(5)
+            self.model.save_spikes_subset_waveforms(NSAMPLE_WAVEFORMS, NCH_WAVEFORMS)
+            bar.update(30)
             self.make_depths()
             bar.update(20)
             self.make_template_object()

From d652d7fc2937043b33d5c26cfd46735b15ca85c4 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Mon, 1 Jun 2020 19:00:27 +0100
Subject: [PATCH 02/11] add waveforms samples to copy script

---
 phylib/io/alf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 74c743f..f44f255 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -118,8 +118,6 @@ def convert(self, out_path, force=False, label='', ampfactor=1):
             self.out_path.mkdir()
 
         with tqdm(desc="Converting to ALF", total=125) as bar:
-            self.copy_files(force=force)
-            bar.update(10)
             self.make_spike_times_amplitudes()
             bar.update(10)
             self.make_cluster_objects()
@@ -134,6 +132,8 @@ def convert(self, out_path, force=False, label='', ampfactor=1):
             bar.update(30)
             self.rm_files()
             bar.update(10)
+            self.copy_files(force=force)
+            bar.update(10)
             self.rename_with_label()
 
         # Return the TemplateModel of the converted ALF dataset if the params.py file exists.

From 2444635fd77bdcbe023831840c392778a89de008 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Wed, 3 Jun 2020 00:36:58 +0100
Subject: [PATCH 03/11] fix physical unit amplitudes for waveforms and spikes -
 fix spike depths

---
 phylib/io/alf.py    | 42 +++++++++++++++++++++++++------------
 phylib/io/model.py  | 51 ++++++++++++++++++++++++++++++---------------
 phylib/io/traces.py |  9 ++++----
 3 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index f44f255..f057f2a 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -118,18 +118,18 @@ def convert(self, out_path, force=False, label='', ampfactor=1):
             self.out_path.mkdir()
 
         with tqdm(desc="Converting to ALF", total=125) as bar:
-            self.make_spike_times_amplitudes()
             bar.update(10)
             self.make_cluster_objects()
             bar.update(10)
             self.make_channel_objects()
             bar.update(5)
-            self.model.save_spikes_subset_waveforms(NSAMPLE_WAVEFORMS, NCH_WAVEFORMS)
+            self.make_template_and_spikes_objects()
             bar.update(30)
+            self.model.save_spikes_subset_waveforms(
+                NSAMPLE_WAVEFORMS, NCH_WAVEFORMS, sample2unit=self.ampfactor)
+            bar.update(50)
             self.make_depths()
             bar.update(20)
-            self.make_template_object()
-            bar.update(30)
             self.rm_files()
             bar.update(10)
             self.copy_files(force=force)
@@ -168,13 +168,6 @@ def _save_npy(self, filename, arr):
         """Save an array into a .npy file."""
         np.save(self.out_path / filename, arr)
 
-    def make_spike_times_amplitudes(self):
-        """We cannot just rename/copy spike_times.npy because it is in unit of
-        *samples*, and not in seconds."""
-        self._save_npy('spikes.times.npy', self.model.spike_times)
-        self._save_npy('spikes.samples.npy', self.model.spike_samples)
-        self._save_npy('spikes.amps.npy', self.model.get_amplitudes_true() * self.ampfactor)
-
     def make_cluster_objects(self):
         """Create clusters.channels, clusters.waveformsDuration and clusters.amps"""
 
@@ -229,14 +222,37 @@ def make_depths(self):
         else:
             spikes_depths = self.model.get_depths()
             # if PC features are provided, compute the depth as the weighted sum of coordinates
-
+            nbatch = 50000
+            c = 0
+            spikes_depths = np.zeros_like(self.model.spike_times) * np.nan
+            nspi = spikes_depths.shape[0]
+            while True:
+                ispi = np.arange(c, min(c + nbatch, nspi))
+                # take only first component
+                features = self.model.sparse_features.data[ispi, :, 0]
+                features = np.maximum(features, 0) ** 2  # takes only positive values into account
+                ichannels = self.model.sparse_features.cols[self.model.spike_clusters[ispi]]
+                ypos = self.model.channel_positions[ichannels, 1]
+                with np.errstate(divide='ignore'):
+                    spikes_depths[ispi] = (np.sum(np.transpose(ypos * features) /
+                                                  np.sum(features, axis=1), axis=0))
+                c += nbatch
+                if c >= nspi:
+                    break
         self._save_npy('spikes.depths.npy', spikes_depths)
         self._save_npy('clusters.depths.npy', clusters_depths)
 
-    def make_template_object(self):
+    def make_template_and_spikes_objects(self):
         """Creates the template waveforms sparse object
         Without manual curation, it also corresponds to clusters waveforms objects.
         """
+        # "We cannot just rename/copy spike_times.npy because it is in unit of samples,
+        # and not seconds
+        self._save_npy('spikes.times.npy', self.model.spike_times)
+        self._save_npy('spikes.samples.npy', self.model.spike_samples)
+        spike_amps, templates_volts = self.model.get_amplitudes_true(self.ampfactor)
+        self._save_npy('spikes.amps.npy', spike_amps)
+
         if self.model.sparse_templates.cols:
             raise(NotImplementedError("Sparse template export to ALF not implemented yet"))
         else:
diff --git a/phylib/io/model.py b/phylib/io/model.py
index b74c464..74470b5 100644
--- a/phylib/io/model.py
+++ b/phylib/io/model.py
@@ -956,23 +956,39 @@ def get_depths(self):
 
         return spike_depths
 
-    def get_amplitudes_true(self):
+    def get_amplitudes_true(self, sample2unit=1.):
         """Convert spike amplitude values to input amplitudes units
-         via scaling by unwhitened template waveform."""
-        # unwhiten template waveforms on their channels of max amplitude
-        templates_chs = self.templates_channels
-        templates_wfs = self.sparse_templates.data[np.arange(self.n_templates), :, templates_chs]
-        templates_wfs_unw = templates_wfs.T * self.wmi[templates_chs, templates_chs]
-        templates_amps = np.abs(
-            np.max(templates_wfs_unw, axis=0) - np.min(templates_wfs_unw, axis=0))
-
-        # scale the spike amplitude values by the template amplitude values
-        amplitudes_v = np.zeros_like(self.amplitudes)
-        for t in range(self.n_templates):
-            idxs = self.get_template_spikes(t)
-            amplitudes_v[idxs] = self.amplitudes[idxs] * templates_amps[t]
+         via scaling by unwhitened template waveform.
+         :param sample2unit float: factor to convert the raw data to a physical unit (defaults 1.)
+         :returns: spike_amplitudes: np.array [nspikes] spike amplitudes in raw data units
+         :returns: templates_physical_unit: np.array[ntemplates, nsamples, nchannels]: templates
+         scaled by their raw data units amplitude estimate"""
 
-        return amplitudes_v
+        # unwhiten template waveforms on their channels of max amplitude
+        if self.sparse_templates.cols:
+            raise NotImplementedError
+        # apply the inverse whitening matrix to the template
+        templates_wfs = np.zeros_like(self.sparse_templates.data)  # nt, ns, nc
+        for n in np.arange(self.n_templates):
+            templates_wfs[n, :, :] = np.matmul(self.sparse_templates.data[n, :, :], self.wmi)
+
+        # The amplitude on each channel is the positive peak minus the negative
+        templates_ch_amps = np.abs(np.max(templates_wfs, axis=1) - np.min(templates_wfs, axis=1))
+
+        # The template amplitude is the amplitude of its largest channel
+        # (but see below for true tempAmps)
+        templates_amps_unscaled = np.max(templates_ch_amps, axis=1)
+        spike_amps = templates_amps_unscaled[self.spike_templates] * self.amplitudes
+
+        with np.errstate(divide='ignore'):
+            # take the average per template
+            templates_amps = (np.bincount(self.spike_templates, weights=spike_amps) /
+                              np.bincount(self.spike_templates))
+            # scale back the template according to the spikes units
+            templates_physical_unit = templates_wfs / (templates_amps_unscaled * templates_amps
+                                                       )[:, np.newaxis, np.newaxis]
+
+        return spike_amps * sample2unit, templates_physical_unit * sample2unit
 
     #--------------------------------------------------------------------------
     # Internal helper methods for public high-level methods
@@ -1132,7 +1148,8 @@ def save_spike_clusters(self, spike_clusters):
         logger.debug("Save spike clusters to `%s`.", path)
         np.save(path, spike_clusters)
 
-    def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_channels=None):
+    def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_channels=None,
+                                     sample2unit=1.):
         if self.traces is None:
             logger.warning(
                 "Spike waveforms could not be extracted as the raw data file is not available.")
@@ -1175,7 +1192,7 @@ def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_cha
         # Extract waveforms from the raw data on a chunk by chunk basis.
         export_waveforms(
             path, self.traces, self.spike_samples[spike_ids], spike_channels,
-            n_samples_waveforms=self.n_samples_waveforms)
+            n_samples_waveforms=self.n_samples_waveforms, sample2unit=sample2unit)
 
         # Reload spike waveforms.
         self.spike_waveforms = self._load_spike_waveforms()
diff --git a/phylib/io/traces.py b/phylib/io/traces.py
index 9fdf2ad..d7fd322 100644
--- a/phylib/io/traces.py
+++ b/phylib/io/traces.py
@@ -641,20 +641,21 @@ def iter_waveforms(traces, spike_samples, spike_channels, n_samples_waveforms=No
 
 
 def export_waveforms(
-        path, traces, spike_samples, spike_channels, n_samples_waveforms=None, cache=False):
+        path, traces, spike_samples, spike_channels, n_samples_waveforms=None, cache=False,
+        sample2unit=1):
     """Export a selection of spike waveforms to a npy file by iterating over the data on a chunk
     by chunk basis."""
     n_spikes = len(spike_samples)
     spike_channels = np.asarray(spike_channels, dtype=np.int32)
     n_channels_loc = spike_channels.shape[1]
     shape = (n_spikes, n_samples_waveforms, n_channels_loc)
-
-    writer = NpyWriter(path, shape, traces.dtype)
+    dtype = traces.dtype if sample2unit is None else np.float
+    writer = NpyWriter(path, shape, dtype)
     size_written = 0
     for waveforms in iter_waveforms(
             traces, spike_samples, spike_channels, n_samples_waveforms=n_samples_waveforms,
             cache=cache):
-        writer.append(waveforms)
+        writer.append(waveforms * sample2unit)
         size_written += waveforms.size
     writer.close()
     assert prod(shape) == size_written

From 4fdb1fa31013e39d4f1f91da051536b6cd916009 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Wed, 17 Jun 2020 09:31:05 +0100
Subject: [PATCH 04/11] template amplitudes object

---
 phylib/io/alf.py   | 11 ++++++-----
 phylib/io/model.py | 33 +++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index f057f2a..dc17a3f 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -250,13 +250,14 @@ def make_template_and_spikes_objects(self):
         # and not seconds
         self._save_npy('spikes.times.npy', self.model.spike_times)
         self._save_npy('spikes.samples.npy', self.model.spike_samples)
-        spike_amps, templates_volts = self.model.get_amplitudes_true(self.ampfactor)
+        spike_amps, templates_v, template_amps = self.model.get_amplitudes_true(self.ampfactor)
         self._save_npy('spikes.amps.npy', spike_amps)
+        self._save_npy('templates.amps.npy', template_amps)
 
         if self.model.sparse_templates.cols:
             raise(NotImplementedError("Sparse template export to ALF not implemented yet"))
         else:
-            n_templates, n_wavsamps, nchall = self.model.sparse_templates.data.shape
+            n_templates, n_wavsamps, nchall = templates_v.shape
             ncw = min(NCH_WAVEFORMS, nchall)  # for some datasets, 32 may be too much
             assert(n_templates == self.model.n_templates)
             templates = np.zeros((n_templates, n_wavsamps, ncw), dtype=np.float32)
@@ -269,10 +270,10 @@ def make_template_and_spikes_objects(self):
                     self.model.channel_positions[self.model.templates_channels[t]]), axis=1)
                 channel_distance[self.model.channel_probes != current_probe] += np.inf
                 templates_inds[t, :] = np.argsort(channel_distance)[:ncw]
-                templates[t, ...] = self.model.sparse_templates.data[t, :][:, templates_inds[t, :]]
-            np.save(self.out_path.joinpath('templates.waveforms'), templates * self.ampfactor)
+                templates[t, ...] = templates_v[t, :][:, templates_inds[t, :]]
+            np.save(self.out_path.joinpath('templates.waveforms'), templates)
             np.save(self.out_path.joinpath('templates.waveformsChannels'), templates_inds)
-            np.save(self.out_path.joinpath('clusters.waveforms'), templates * self.ampfactor)
+            np.save(self.out_path.joinpath('clusters.waveforms'), templates)
             np.save(self.out_path.joinpath('clusters.waveformsChannels'), templates_inds)
 
     def rename_with_label(self):
diff --git a/phylib/io/model.py b/phylib/io/model.py
index 74470b5..7237fab 100644
--- a/phylib/io/model.py
+++ b/phylib/io/model.py
@@ -960,9 +960,16 @@ def get_amplitudes_true(self, sample2unit=1.):
         """Convert spike amplitude values to input amplitudes units
          via scaling by unwhitened template waveform.
          :param sample2unit float: factor to convert the raw data to a physical unit (defaults 1.)
-         :returns: spike_amplitudes: np.array [nspikes] spike amplitudes in raw data units
-         :returns: templates_physical_unit: np.array[ntemplates, nsamples, nchannels]: templates
-         scaled by their raw data units amplitude estimate"""
+         :returns: spike_amplitudes_volts: np.array [nspikes] spike amplitudes in raw data units
+         :returns: templates_volts: np.array[ntemplates, nsamples, nchannels]: templates
+         in raw data units
+         :returns: template_amps_volts: np.array[ntemplates]: average templates amplitudes
+          in raw data units
+         To scale the template for template matching,
+         raw_data_volts = templates_volts * spike_amplitudes_volts / template_amps_volts
+         """
+        # spike_amp = ks2_spike_amps * maxmin(inv_whitening(ks2_template_amps))
+        # to rescale the template,
 
         # unwhiten template waveforms on their channels of max amplitude
         if self.sparse_templates.cols:
@@ -973,22 +980,24 @@ def get_amplitudes_true(self, sample2unit=1.):
             templates_wfs[n, :, :] = np.matmul(self.sparse_templates.data[n, :, :], self.wmi)
 
         # The amplitude on each channel is the positive peak minus the negative
-        templates_ch_amps = np.abs(np.max(templates_wfs, axis=1) - np.min(templates_wfs, axis=1))
+        templates_ch_amps = np.max(templates_wfs, axis=1) - np.min(templates_wfs, axis=1)
 
-        # The template amplitude is the amplitude of its largest channel
+        # The template arbitrary unit amplitude is the amplitude of its largest channel
         # (but see below for true tempAmps)
-        templates_amps_unscaled = np.max(templates_ch_amps, axis=1)
-        spike_amps = templates_amps_unscaled[self.spike_templates] * self.amplitudes
+        templates_amps_au = np.max(templates_ch_amps, axis=1)
+        spike_amps = templates_amps_au[self.spike_templates] * self.amplitudes
 
         with np.errstate(divide='ignore'):
-            # take the average per template
-            templates_amps = (np.bincount(self.spike_templates, weights=spike_amps) /
-                              np.bincount(self.spike_templates))
+            # take the average spike amplitude per template
+            templates_amps_v = (np.bincount(self.spike_templates, weights=spike_amps) /
+                                np.bincount(self.spike_templates))
             # scale back the template according to the spikes units
-            templates_physical_unit = templates_wfs / (templates_amps_unscaled * templates_amps
+            templates_physical_unit = templates_wfs * (templates_amps_v / templates_amps_au
                                                        )[:, np.newaxis, np.newaxis]
 
-        return spike_amps * sample2unit, templates_physical_unit * sample2unit
+        return (spike_amps * sample2unit,
+                templates_physical_unit * sample2unit,
+                templates_amps_v * sample2unit)
 
     #--------------------------------------------------------------------------
     # Internal helper methods for public high-level methods

From f5c5f2764b7f3a6f5f2b19a0a4b54a85b8732a57 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Fri, 19 Jun 2020 07:35:22 +0100
Subject: [PATCH 05/11] number of channels exported from
 model.n_closest_channels

---
 phylib/io/alf.py   | 24 +++---------------------
 phylib/io/model.py | 29 ++++++++++++++++-------------
 2 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index dc17a3f..08be3b0 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -27,7 +27,6 @@
 # File utils
 #------------------------------------------------------------------------------
 
-NCH_WAVEFORMS = 32  # number of channels to be saved in templates.waveforms and channels.waveforms
 NSAMPLE_WAVEFORMS = 500  # number of waveforrms sampled out of the raw data
 
 _FILE_RENAMES = [  # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy)
@@ -126,7 +125,7 @@ def convert(self, out_path, force=False, label='', ampfactor=1):
             self.make_template_and_spikes_objects()
             bar.update(30)
             self.model.save_spikes_subset_waveforms(
-                NSAMPLE_WAVEFORMS, NCH_WAVEFORMS, sample2unit=self.ampfactor)
+                NSAMPLE_WAVEFORMS, sample2unit=self.ampfactor)
             bar.update(50)
             self.make_depths()
             bar.update(20)
@@ -221,24 +220,6 @@ def make_depths(self):
             spikes_depths = clusters_depths[spike_clusters]
         else:
             spikes_depths = self.model.get_depths()
-            # if PC features are provided, compute the depth as the weighted sum of coordinates
-            nbatch = 50000
-            c = 0
-            spikes_depths = np.zeros_like(self.model.spike_times) * np.nan
-            nspi = spikes_depths.shape[0]
-            while True:
-                ispi = np.arange(c, min(c + nbatch, nspi))
-                # take only first component
-                features = self.model.sparse_features.data[ispi, :, 0]
-                features = np.maximum(features, 0) ** 2  # takes only positive values into account
-                ichannels = self.model.sparse_features.cols[self.model.spike_clusters[ispi]]
-                ypos = self.model.channel_positions[ichannels, 1]
-                with np.errstate(divide='ignore'):
-                    spikes_depths[ispi] = (np.sum(np.transpose(ypos * features) /
-                                                  np.sum(features, axis=1), axis=0))
-                c += nbatch
-                if c >= nspi:
-                    break
         self._save_npy('spikes.depths.npy', spikes_depths)
         self._save_npy('clusters.depths.npy', clusters_depths)
 
@@ -258,7 +239,8 @@ def make_template_and_spikes_objects(self):
             raise(NotImplementedError("Sparse template export to ALF not implemented yet"))
         else:
             n_templates, n_wavsamps, nchall = templates_v.shape
-            ncw = min(NCH_WAVEFORMS, nchall)  # for some datasets, 32 may be too much
+            # for some datasets, 32 may be too much
+            ncw = min(self.model.n_closest_channels, nchall)
             assert(n_templates == self.model.n_templates)
             templates = np.zeros((n_templates, n_wavsamps, ncw), dtype=np.float32)
             templates_inds = np.zeros((n_templates, ncw), dtype=np.int32)
diff --git a/phylib/io/model.py b/phylib/io/model.py
index 7237fab..dd12d6e 100644
--- a/phylib/io/model.py
+++ b/phylib/io/model.py
@@ -937,24 +937,25 @@ def get_template_features(self, spike_ids):
     def get_depths(self):
         """Compute spike depths based on spike pc features and probe depths."""
         # compute the depth as the weighted sum of coordinates
-        batch_sz = 50000  # number of spikes per batch
+        # if PC features are provided, compute the depth as the weighted sum of coordinates
+        nbatch = 50000
         c = 0
-        spike_depths = np.zeros_like(self.spike_times)
-        nspi = spike_depths.shape[0]
+        spikes_depths = np.zeros_like(self.spike_times) * np.nan
+        nspi = spikes_depths.shape[0]
         while True:
-            ispi = np.arange(c, min(c + batch_sz, nspi))
+            ispi = np.arange(c, min(c + nbatch, nspi))
             # take only first component
-            features = np.square(self.sparse_features.data[ispi, :, 0])
-            ichannels = self.sparse_features.cols[self.spike_clusters[ispi]].astype(np.int64)
+            features = self.sparse_features.data[ispi, :, 0]
+            features = np.maximum(features, 0) ** 2  # takes only positive values into account
+            ichannels = self.sparse_features.cols[self.spike_clusters[ispi]].astype(np.uint32)
             ypos = self.channel_positions[ichannels, 1]
-
-            spike_depths[ispi] = np.sum(np.transpose(ypos * features) /
-                                        np.sum(features, axis=1), axis=0)
-            c += batch_sz
+            with np.errstate(divide='ignore'):
+                spikes_depths[ispi] = (np.sum(np.transpose(ypos * features) /
+                                              np.sum(features, axis=1), axis=0))
+            c += nbatch
             if c >= nspi:
                 break
-
-        return spike_depths
+        return spikes_depths
 
     def get_amplitudes_true(self, sample2unit=1.):
         """Convert spike amplitude values to input amplitudes units
@@ -1166,7 +1167,9 @@ def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_cha
 
         n_chunks_kept = 20  # TODO: better choice
         nst = max_n_spikes_per_template
-        nc = max_n_channels
+        nc = max_n_channels or self.n_closest_channels
+        nc = max(nc, self.n_closest_channels)
+
         assert nst > 0
         assert nc > 0
 

From 0715f848b79c588995d2c01168f89861f1c8d40d Mon Sep 17 00:00:00 2001
From: Cyrille Rossant <rossant@users.noreply.github.com>
Date: Tue, 4 Aug 2020 13:49:55 +0200
Subject: [PATCH 06/11] On-the-fly computation of features from spike waveform
 subset (cherry-picked from dev)

---
 phylib/io/model.py | 98 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/phylib/io/model.py b/phylib/io/model.py
index dd12d6e..beae9aa 100644
--- a/phylib/io/model.py
+++ b/phylib/io/model.py
@@ -162,6 +162,84 @@ def get_closest_channels(channel_positions, channel_index, n=None):
 
 
 #------------------------------------------------------------------------------
+# PC computation
+#------------------------------------------------------------------------------
+
+def _compute_pcs(x, npcs):
+    """Compute the PCs of an array x, where each row is an observation.
+    x can be a 2D or 3D array. In the latter case, the PCs are computed
+    and concatenated iteratively along the last axis."""
+
+    # Ensure x is a 3D array.
+    assert x.ndim == 3
+    # Ensure double precision
+    x = x.astype(np.float64)
+
+    nspikes, nsamples, nchannels = x.shape
+
+    cov_reg = np.eye(nsamples)
+    assert cov_reg.shape == (nsamples, nsamples)
+
+    pcs_list = []
+    # Loop over channels
+    for channel in range(nchannels):
+        x_channel = x[:, :, channel]
+        # Compute cov matrix for the channel
+        assert x_channel.ndim == 2
+        # Don't compute the cov matrix if there are no unmasked spikes
+        # on that channel.
+        alpha = 1. / nspikes
+        if x_channel.shape[0] <= 1:
+            cov = alpha * cov_reg
+        else:
+            cov_channel = np.cov(x_channel, rowvar=0)
+            assert cov_channel.shape == (nsamples, nsamples)
+            cov = alpha * cov_reg + cov_channel
+        # Compute the eigenelements
+        vals, vecs = np.linalg.eigh(cov)
+        pcs = vecs.T.astype(np.float32)[np.argsort(vals)[::-1]]
+        # Take the first npcs components.
+        pcs_list.append(pcs[:npcs, ...])
+    # Return the concatenation of the PCs on all channels, along the 3d axis,
+    # except if there is only one element in the 3d axis. In this case
+    # we convert to a 2D array.
+    pcs = np.dstack(pcs_list)
+    assert pcs.ndim == 3
+    return pcs
+
+
+def _project_pcs(x, pcs):
+    """Project data points onto principal components.
+    Arguments:
+      * x: a 2D array.
+      * pcs: the PCs as returned by `compute_pcs`.
+    """
+    assert x.ndim == 3
+    assert pcs.ndim == 3
+    features = []
+    # for i, waveform in enumerate(x):
+    #     assert waveform.ndim == 2
+    #     # pcs.shape is (3, n_samples, n_channels)
+    #     # waveform.shape is (n_samples, n_channels)
+    #     features.append(np.einsum('ijk,jk->ki', pcs, waveform))
+    features = np.einsum('ijk,ljk->lki', pcs, x)
+    # features = np.stack(features, axis=0)
+    assert features.ndim == 3
+    return features
+
+
+def compute_features(waveforms):
+    assert waveforms.ndim == 3
+    nspk, nsmp, nc = waveforms.shape
+    pcs = _compute_pcs(waveforms, 3)
+    assert pcs.ndim == 3
+    features = _project_pcs(waveforms, pcs)
+    assert features.ndim == 3
+    assert features.shape == (nspk, nc, 3)
+    return features
+
+
+#-----------------------------------------------------------------------------
 # I/O util functions
 #------------------------------------------------------------------------------
 
@@ -872,7 +950,23 @@ def get_waveforms(self, spike_ids, channel_ids=None):
     def get_features(self, spike_ids, channel_ids):
         """Return sparse features for given spikes."""
         sf = self.sparse_features
-        if sf is None:
+        if sf is None and self.spike_waveforms is not None:
+            ns = len(spike_ids)
+            nc = len(channel_ids)
+            n_pcs = 3
+            features = np.zeros((ns, nc, n_pcs), dtype=np.float32)
+            spike_ids_exist = np.intersect1d(spike_ids, self.spike_waveforms.spike_ids)
+            # Compute PCs from the waveforms for the spikes that are in spike_waveforms.spike_ids.
+            waveforms = self.get_waveforms(spike_ids_exist, channel_ids)
+            features_existing = compute_features(waveforms)
+            assert features.shape[1:] == (nc, n_pcs)
+            # Now we need to integrate the computed features into the output array, knowing
+            # that some spikes may be missing if there were requested here in spike_ids, but
+            # were absent in spike_waveforms.spike_ids.
+            ind = _index_of(spike_ids_exist, spike_ids)
+            features[ind, ...] = features_existing
+            return features
+        elif sf is None:
             return
         _, n_channels_loc, n_pcs = sf.data.shape
         ns = len(spike_ids)
@@ -942,6 +1036,8 @@ def get_depths(self):
         c = 0
         spikes_depths = np.zeros_like(self.spike_times) * np.nan
         nspi = spikes_depths.shape[0]
+        if self.sparse_features is None or self.sparse_features.data.shape[0] != self.n_spikes:
+            return None
         while True:
             ispi = np.arange(c, min(c + nbatch, nspi))
             # take only first component

From abd7bca6a52eee2ecc0abc4cff8526ba8f579d3b Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Wed, 16 Dec 2020 19:43:22 +0000
Subject: [PATCH 07/11] :bug: alf: fix cluster objects size when first cluster
 empty :bug:

---
 phylib/io/alf.py            |  5 ++---
 phylib/io/tests/test_alf.py | 14 ++++++++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 08be3b0..60e1827 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -169,7 +169,6 @@ def _save_npy(self, filename, arr):
 
     def make_cluster_objects(self):
         """Create clusters.channels, clusters.waveformsDuration and clusters.amps"""
-
         peak_channel_path = self.dir_path / 'clusters.channels.npy'
         if not peak_channel_path.exists():
             self._save_npy(peak_channel_path.name, self.model.templates_channels)
@@ -179,8 +178,8 @@ def make_cluster_objects(self):
             self._save_npy(waveform_duration_path.name, self.model.templates_waveforms_durations)
 
         # group by average over cluster number
-        camps = np.zeros(np.max(self.cluster_ids) - np.min(self.cluster_ids) + 1,) * np.nan
-        camps[self.cluster_ids - np.min(self.cluster_ids)] = self.model.templates_amplitudes
+        camps = np.zeros(self.model.templates_channels.shape[0],) * np.nan
+        camps[self.cluster_ids] = self.model.templates_amplitudes
         amps_path = self.dir_path / 'clusters.amps.npy'
         self._save_npy(amps_path.name, camps * self.ampfactor)
 
diff --git a/phylib/io/tests/test_alf.py b/phylib/io/tests/test_alf.py
index 0eefcd6..6384291 100644
--- a/phylib/io/tests/test_alf.py
+++ b/phylib/io/tests/test_alf.py
@@ -36,7 +36,7 @@ def __init__(self, tempdir):
         self.nt = 5
         self.ncd = 1000
         np.save(p / 'spike_times.npy', .01 * np.cumsum(nr.exponential(size=self.ns)))
-        np.save(p / 'spike_clusters.npy', nr.randint(low=0, high=self.nt, size=self.ns))
+        np.save(p / 'spike_clusters.npy', nr.randint(low=1, high=self.nt, size=self.ns))
         shutil.copy(p / 'spike_clusters.npy', p / 'spike_templates.npy')
         np.save(p / 'amplitudes.npy', nr.uniform(low=0.5, high=1.5, size=self.ns))
         np.save(p / 'channel_positions.npy', np.c_[np.arange(self.nc), np.zeros(self.nc)])
@@ -174,16 +174,22 @@ def check_conversion_output():
             assert f.exists()
 
         # makes sure the output dimensions match (especially clusters which should be 4)
-        cl_shape = [np.load(f).shape[0] for f in new_files if f.name.startswith('clusters.') and
-                    f.name.endswith('.npy')]
+        cl_shape = []
+        for f in new_files:
+            if f.name.startswith('clusters.') and f.name.endswith('.npy'):
+                cl_shape.append(np.load(f).shape[0])
+            elif f.name.startswith('clusters.') and f.name.endswith('.csv'):
+                with open(f) as fid:
+                    cl_shape.append(len(fid.readlines()) - 1)
         sp_shape = [np.load(f).shape[0] for f in new_files if f.name.startswith('spikes.')]
         ch_shape = [np.load(f).shape[0] for f in new_files if f.name.startswith('channels.')]
+
         assert len(set(cl_shape)) == 1
         assert len(set(sp_shape)) == 1
         assert len(set(ch_shape)) == 1
 
         dur = np.load(next(out_path.glob('clusters.peakToTrough*.npy')))
-        assert np.all(dur == np.array([18., -1., 9.5, 2.5, -2.]))
+        assert np.all(dur == np.array([-9.5, 3., 13., -4.5, -2.5]))
 
     def read_after_write():
         model = TemplateModel(dir_path=out_path, dat_path=dataset.dat_path,

From 3f533500d592ee1c128547b0b8ad318ee7dcd831 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Thu, 24 Dec 2020 14:18:15 +0000
Subject: [PATCH 08/11] remove the cluster metrics from alf conversion - will
 be recomputed downstream

---
 phylib/io/alf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 60e1827..540b433 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -31,7 +31,6 @@
 
 _FILE_RENAMES = [  # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy)
     ('params.py', 'params.py', None),
-    ('cluster_metrics.csv', 'clusters.metrics.csv', None),
     ('spike_clusters.npy', 'spikes.clusters.npy', True),
     ('spike_templates.npy', 'spikes.templates.npy', True),
     ('channel_positions.npy', 'channels.localCoordinates.npy', False),

From 384ce2c21c08db27c9e5dc4ffd20722d68a38835 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Thu, 24 Dec 2020 17:30:13 +0000
Subject: [PATCH 09/11] unit qcs in ephys pipeline

---
 phylib/io/alf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 540b433..6f93210 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -31,6 +31,7 @@
 
 _FILE_RENAMES = [  # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy)
     ('params.py', 'params.py', None),
+    ('cluster_KSLabel.tsv', 'cluster_KSLabel.tsv', None),
     ('spike_clusters.npy', 'spikes.clusters.npy', True),
     ('spike_templates.npy', 'spikes.templates.npy', True),
     ('channel_positions.npy', 'channels.localCoordinates.npy', False),

From b4d121ad02987a4dadb32cd6a31bfdd3a6891c0e Mon Sep 17 00:00:00 2001
From: Niccolo Bonacchi <nbonacchi@gmail.com>
Date: Thu, 18 Mar 2021 14:26:35 +0000
Subject: [PATCH 10/11] numpy deprecations

---
 phylib/io/alf.py                 |  2 +-
 phylib/io/array.py               |  4 ++--
 phylib/io/traces.py              |  2 +-
 phylib/stats/ccg.py              |  2 +-
 phylib/utils/_types.py           |  4 ++--
 phylib/utils/tests/test_types.py | 10 +++++-----
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 6f93210..51316df 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -192,7 +192,7 @@ def make_cluster_objects(self):
     def make_channel_objects(self):
         """If there is no rawInd file, create it"""
         rawInd_path = self.dir_path / 'channels.rawInd.npy'
-        rawInd = np.zeros_like(self.model.channel_probes).astype(np.int)
+        rawInd = np.zeros_like(self.model.channel_probes).astype(int)
         channel_offset = 0
         for probe in np.unique(self.model.channel_probes):
             ind = self.model.channel_probes == probe
diff --git a/phylib/io/array.py b/phylib/io/array.py
index 29a8d3d..4ad9829 100644
--- a/phylib/io/array.py
+++ b/phylib/io/array.py
@@ -115,7 +115,7 @@ def _index_of(arr, lookup):
     # values
     lookup = np.asarray(lookup, dtype=np.int32)
     m = (lookup.max() if len(lookup) else 0) + 1
-    tmp = np.zeros(m + 1, dtype=np.int)
+    tmp = np.zeros(m + 1, dtype=int)
     # Ensure that -1 values are kept.
     tmp[-1] = -1
     if len(lookup):
@@ -327,7 +327,7 @@ def get_excerpts(data, n_excerpts=None, excerpt_size=None):
 def _spikes_in_clusters(spike_clusters, clusters):
     """Return the ids of all spikes belonging to the specified clusters."""
     if len(spike_clusters) == 0 or len(clusters) == 0:
-        return np.array([], dtype=np.int)
+        return np.array([], dtype=int)
     return np.nonzero(np.in1d(spike_clusters, clusters))[0]
 
 
diff --git a/phylib/io/traces.py b/phylib/io/traces.py
index d7fd322..320837a 100644
--- a/phylib/io/traces.py
+++ b/phylib/io/traces.py
@@ -649,7 +649,7 @@ def export_waveforms(
     spike_channels = np.asarray(spike_channels, dtype=np.int32)
     n_channels_loc = spike_channels.shape[1]
     shape = (n_spikes, n_samples_waveforms, n_channels_loc)
-    dtype = traces.dtype if sample2unit is None else np.float
+    dtype = traces.dtype if sample2unit is None else float
     writer = NpyWriter(path, shape, dtype)
     size_written = 0
     for waveforms in iter_waveforms(
diff --git a/phylib/stats/ccg.py b/phylib/stats/ccg.py
index 2c4a912..08febc7 100644
--- a/phylib/stats/ccg.py
+++ b/phylib/stats/ccg.py
@@ -148,7 +148,7 @@ def correlograms(
 
     # At a given shift, the mask precises which spikes have matching spikes
     # within the correlogram time window.
-    mask = np.ones_like(spike_samples, dtype=np.bool)
+    mask = np.ones_like(spike_samples, dtype=bool)
 
     correlograms = _create_correlograms_array(n_clusters, winsize_bins)
 
diff --git a/phylib/utils/_types.py b/phylib/utils/_types.py
index ee28962..3881d6b 100644
--- a/phylib/utils/_types.py
+++ b/phylib/utils/_types.py
@@ -15,8 +15,8 @@
 #------------------------------------------------------------------------------
 
 _ACCEPTED_ARRAY_DTYPES = (
-    np.float, np.float32, np.float64, np.int, np.int8, np.int16, np.uint8, np.uint16,
-    np.int32, np.int64, np.uint32, np.uint64, np.bool)
+    float, np.float32, np.float64, int, np.int8, np.int16, np.uint8, np.uint16,
+    np.int32, np.int64, np.uint32, np.uint64, bool)
 
 
 class Bunch(dict):
diff --git a/phylib/utils/tests/test_types.py b/phylib/utils/tests/test_types.py
index 35c0273..4bf9a17 100644
--- a/phylib/utils/tests/test_types.py
+++ b/phylib/utils/tests/test_types.py
@@ -88,13 +88,13 @@ def _check(arr):
     _check(_as_array(3.))
     _check(_as_array([3]))
 
-    _check(_as_array(3, np.float))
-    _check(_as_array(3., np.float))
-    _check(_as_array([3], np.float))
+    _check(_as_array(3, float))
+    _check(_as_array(3., float))
+    _check(_as_array([3], float))
     _check(_as_array(np.array([3])))
     with raises(ValueError):
-        _check(_as_array(np.array([3]), dtype=np.object))
-    _check(_as_array(np.array([3]), np.float))
+        _check(_as_array(np.array([3]), dtype=object))
+    _check(_as_array(np.array([3]), float))
 
     assert _as_array(None) is None
     assert not _is_array_like(None)

From f72849c55d77607c90ebcc34a411d8884b9caf22 Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Sun, 8 Aug 2021 17:49:54 +0200
Subject: [PATCH 11/11] add drift registration datasets

---
 phylib/io/alf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/phylib/io/alf.py b/phylib/io/alf.py
index 51316df..ce2b9a4 100644
--- a/phylib/io/alf.py
+++ b/phylib/io/alf.py
@@ -42,6 +42,9 @@
     ('_phy_spikes_subset.channels.npy', '_phy_spikes_subset.channels.npy', False),
     ('_phy_spikes_subset.spikes.npy', '_phy_spikes_subset.spikes.npy', False),
     ('_phy_spikes_subset.waveforms.npy', '_phy_spikes_subset.waveforms.npy', False),
+    ('drift.depth_scale.npy', 'drift.depth_scale.npy', False),
+    ('drift.time_scale.npy', 'drift.time_scale.npy', False),
+    ('drift.um.npy', 'drift.um.npy', False),
     # ('cluster_group.tsv', 'ks2/clusters.phyAnnotation.tsv', False), # todo check indexing, add2QC
 ]