From 73b3eddc35fbe5cb55b6a946bb3449c0fe448f6c Mon Sep 17 00:00:00 2001 From: olivier Date: Mon, 20 Apr 2020 15:01:37 +0100 Subject: [PATCH 01/11] save a sample of waveforms --- phylib/io/alf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 73ebcea..74c743f 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -28,6 +28,7 @@ #------------------------------------------------------------------------------ NCH_WAVEFORMS = 32 # number of channels to be saved in templates.waveforms and channels.waveforms +NSAMPLE_WAVEFORMS = 500 # number of waveforrms sampled out of the raw data _FILE_RENAMES = [ # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy) ('params.py', 'params.py', None), @@ -116,7 +117,7 @@ def convert(self, out_path, force=False, label='', ampfactor=1): if not self.out_path.exists(): self.out_path.mkdir() - with tqdm(desc="Converting to ALF", total=95) as bar: + with tqdm(desc="Converting to ALF", total=125) as bar: self.copy_files(force=force) bar.update(10) self.make_spike_times_amplitudes() @@ -125,6 +126,8 @@ def convert(self, out_path, force=False, label='', ampfactor=1): bar.update(10) self.make_channel_objects() bar.update(5) + self.model.save_spikes_subset_waveforms(NSAMPLE_WAVEFORMS, NCH_WAVEFORMS) + bar.update(30) self.make_depths() bar.update(20) self.make_template_object() From d652d7fc2937043b33d5c26cfd46735b15ca85c4 Mon Sep 17 00:00:00 2001 From: olivier Date: Mon, 1 Jun 2020 19:00:27 +0100 Subject: [PATCH 02/11] add waveforms samples to copy script --- phylib/io/alf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 74c743f..f44f255 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -118,8 +118,6 @@ def convert(self, out_path, force=False, label='', ampfactor=1): self.out_path.mkdir() with tqdm(desc="Converting to ALF", total=125) as bar: - self.copy_files(force=force) - bar.update(10) self.make_spike_times_amplitudes() bar.update(10) self.make_cluster_objects() @@ -134,6 +132,8 @@ def convert(self, out_path, force=False, label='', ampfactor=1): bar.update(30) self.rm_files() bar.update(10) + self.copy_files(force=force) + bar.update(10) self.rename_with_label() # Return the TemplateModel of the converted ALF dataset if the params.py file exists. From 2444635fd77bdcbe023831840c392778a89de008 Mon Sep 17 00:00:00 2001 From: olivier Date: Wed, 3 Jun 2020 00:36:58 +0100 Subject: [PATCH 03/11] fix physical unit amplitudes for waveforms and spikes - fix spike depths --- phylib/io/alf.py | 42 +++++++++++++++++++++++++------------ phylib/io/model.py | 51 ++++++++++++++++++++++++++++++--------------- phylib/io/traces.py | 9 ++++---- 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index f44f255..f057f2a 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -118,18 +118,18 @@ def convert(self, out_path, force=False, label='', ampfactor=1): self.out_path.mkdir() with tqdm(desc="Converting to ALF", total=125) as bar: - self.make_spike_times_amplitudes() bar.update(10) self.make_cluster_objects() bar.update(10) self.make_channel_objects() bar.update(5) - self.model.save_spikes_subset_waveforms(NSAMPLE_WAVEFORMS, NCH_WAVEFORMS) + self.make_template_and_spikes_objects() bar.update(30) + self.model.save_spikes_subset_waveforms( + NSAMPLE_WAVEFORMS, NCH_WAVEFORMS, sample2unit=self.ampfactor) + bar.update(50) self.make_depths() bar.update(20) - self.make_template_object() - bar.update(30) self.rm_files() bar.update(10) self.copy_files(force=force) @@ -168,13 +168,6 @@ def _save_npy(self, filename, arr): """Save an array into a .npy file.""" np.save(self.out_path / filename, arr) - def make_spike_times_amplitudes(self): - """We cannot just rename/copy spike_times.npy because it is in unit of - *samples*, and not in seconds.""" - self._save_npy('spikes.times.npy', self.model.spike_times) - self._save_npy('spikes.samples.npy', self.model.spike_samples) - self._save_npy('spikes.amps.npy', self.model.get_amplitudes_true() * self.ampfactor) - def make_cluster_objects(self): """Create clusters.channels, clusters.waveformsDuration and clusters.amps""" @@ -229,14 +222,37 @@ def make_depths(self): else: spikes_depths = self.model.get_depths() # if PC features are provided, compute the depth as the weighted sum of coordinates - + nbatch = 50000 + c = 0 + spikes_depths = np.zeros_like(self.model.spike_times) * np.nan + nspi = spikes_depths.shape[0] + while True: + ispi = np.arange(c, min(c + nbatch, nspi)) + # take only first component + features = self.model.sparse_features.data[ispi, :, 0] + features = np.maximum(features, 0) ** 2 # takes only positive values into account + ichannels = self.model.sparse_features.cols[self.model.spike_clusters[ispi]] + ypos = self.model.channel_positions[ichannels, 1] + with np.errstate(divide='ignore'): + spikes_depths[ispi] = (np.sum(np.transpose(ypos * features) / + np.sum(features, axis=1), axis=0)) + c += nbatch + if c >= nspi: + break self._save_npy('spikes.depths.npy', spikes_depths) self._save_npy('clusters.depths.npy', clusters_depths) - def make_template_object(self): + def make_template_and_spikes_objects(self): """Creates the template waveforms sparse object Without manual curation, it also corresponds to clusters waveforms objects. """ + # "We cannot just rename/copy spike_times.npy because it is in unit of samples, + # and not seconds + self._save_npy('spikes.times.npy', self.model.spike_times) + self._save_npy('spikes.samples.npy', self.model.spike_samples) + spike_amps, templates_volts = self.model.get_amplitudes_true(self.ampfactor) + self._save_npy('spikes.amps.npy', spike_amps) + if self.model.sparse_templates.cols: raise(NotImplementedError("Sparse template export to ALF not implemented yet")) else: diff --git a/phylib/io/model.py b/phylib/io/model.py index b74c464..74470b5 100644 --- a/phylib/io/model.py +++ b/phylib/io/model.py @@ -956,23 +956,39 @@ def get_depths(self): return spike_depths - def get_amplitudes_true(self): + def get_amplitudes_true(self, sample2unit=1.): """Convert spike amplitude values to input amplitudes units - via scaling by unwhitened template waveform.""" - # unwhiten template waveforms on their channels of max amplitude - templates_chs = self.templates_channels - templates_wfs = self.sparse_templates.data[np.arange(self.n_templates), :, templates_chs] - templates_wfs_unw = templates_wfs.T * self.wmi[templates_chs, templates_chs] - templates_amps = np.abs( - np.max(templates_wfs_unw, axis=0) - np.min(templates_wfs_unw, axis=0)) - - # scale the spike amplitude values by the template amplitude values - amplitudes_v = np.zeros_like(self.amplitudes) - for t in range(self.n_templates): - idxs = self.get_template_spikes(t) - amplitudes_v[idxs] = self.amplitudes[idxs] * templates_amps[t] + via scaling by unwhitened template waveform. + :param sample2unit float: factor to convert the raw data to a physical unit (defaults 1.) + :returns: spike_amplitudes: np.array [nspikes] spike amplitudes in raw data units + :returns: templates_physical_unit: np.array[ntemplates, nsamples, nchannels]: templates + scaled by their raw data units amplitude estimate""" - return amplitudes_v + # unwhiten template waveforms on their channels of max amplitude + if self.sparse_templates.cols: + raise NotImplementedError + # apply the inverse whitening matrix to the template + templates_wfs = np.zeros_like(self.sparse_templates.data) # nt, ns, nc + for n in np.arange(self.n_templates): + templates_wfs[n, :, :] = np.matmul(self.sparse_templates.data[n, :, :], self.wmi) + + # The amplitude on each channel is the positive peak minus the negative + templates_ch_amps = np.abs(np.max(templates_wfs, axis=1) - np.min(templates_wfs, axis=1)) + + # The template amplitude is the amplitude of its largest channel + # (but see below for true tempAmps) + templates_amps_unscaled = np.max(templates_ch_amps, axis=1) + spike_amps = templates_amps_unscaled[self.spike_templates] * self.amplitudes + + with np.errstate(divide='ignore'): + # take the average per template + templates_amps = (np.bincount(self.spike_templates, weights=spike_amps) / + np.bincount(self.spike_templates)) + # scale back the template according to the spikes units + templates_physical_unit = templates_wfs / (templates_amps_unscaled * templates_amps + )[:, np.newaxis, np.newaxis] + + return spike_amps * sample2unit, templates_physical_unit * sample2unit #-------------------------------------------------------------------------- # Internal helper methods for public high-level methods @@ -1132,7 +1148,8 @@ def save_spike_clusters(self, spike_clusters): logger.debug("Save spike clusters to `%s`.", path) np.save(path, spike_clusters) - def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_channels=None): + def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_channels=None, + sample2unit=1.): if self.traces is None: logger.warning( "Spike waveforms could not be extracted as the raw data file is not available.") @@ -1175,7 +1192,7 @@ def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_cha # Extract waveforms from the raw data on a chunk by chunk basis. export_waveforms( path, self.traces, self.spike_samples[spike_ids], spike_channels, - n_samples_waveforms=self.n_samples_waveforms) + n_samples_waveforms=self.n_samples_waveforms, sample2unit=sample2unit) # Reload spike waveforms. self.spike_waveforms = self._load_spike_waveforms() diff --git a/phylib/io/traces.py b/phylib/io/traces.py index 9fdf2ad..d7fd322 100644 --- a/phylib/io/traces.py +++ b/phylib/io/traces.py @@ -641,20 +641,21 @@ def iter_waveforms(traces, spike_samples, spike_channels, n_samples_waveforms=No def export_waveforms( - path, traces, spike_samples, spike_channels, n_samples_waveforms=None, cache=False): + path, traces, spike_samples, spike_channels, n_samples_waveforms=None, cache=False, + sample2unit=1): """Export a selection of spike waveforms to a npy file by iterating over the data on a chunk by chunk basis.""" n_spikes = len(spike_samples) spike_channels = np.asarray(spike_channels, dtype=np.int32) n_channels_loc = spike_channels.shape[1] shape = (n_spikes, n_samples_waveforms, n_channels_loc) - - writer = NpyWriter(path, shape, traces.dtype) + dtype = traces.dtype if sample2unit is None else np.float + writer = NpyWriter(path, shape, dtype) size_written = 0 for waveforms in iter_waveforms( traces, spike_samples, spike_channels, n_samples_waveforms=n_samples_waveforms, cache=cache): - writer.append(waveforms) + writer.append(waveforms * sample2unit) size_written += waveforms.size writer.close() assert prod(shape) == size_written From 4fdb1fa31013e39d4f1f91da051536b6cd916009 Mon Sep 17 00:00:00 2001 From: olivier Date: Wed, 17 Jun 2020 09:31:05 +0100 Subject: [PATCH 04/11] template amplitudes object --- phylib/io/alf.py | 11 ++++++----- phylib/io/model.py | 33 +++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index f057f2a..dc17a3f 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -250,13 +250,14 @@ def make_template_and_spikes_objects(self): # and not seconds self._save_npy('spikes.times.npy', self.model.spike_times) self._save_npy('spikes.samples.npy', self.model.spike_samples) - spike_amps, templates_volts = self.model.get_amplitudes_true(self.ampfactor) + spike_amps, templates_v, template_amps = self.model.get_amplitudes_true(self.ampfactor) self._save_npy('spikes.amps.npy', spike_amps) + self._save_npy('templates.amps.npy', template_amps) if self.model.sparse_templates.cols: raise(NotImplementedError("Sparse template export to ALF not implemented yet")) else: - n_templates, n_wavsamps, nchall = self.model.sparse_templates.data.shape + n_templates, n_wavsamps, nchall = templates_v.shape ncw = min(NCH_WAVEFORMS, nchall) # for some datasets, 32 may be too much assert(n_templates == self.model.n_templates) templates = np.zeros((n_templates, n_wavsamps, ncw), dtype=np.float32) @@ -269,10 +270,10 @@ def make_template_and_spikes_objects(self): self.model.channel_positions[self.model.templates_channels[t]]), axis=1) channel_distance[self.model.channel_probes != current_probe] += np.inf templates_inds[t, :] = np.argsort(channel_distance)[:ncw] - templates[t, ...] = self.model.sparse_templates.data[t, :][:, templates_inds[t, :]] - np.save(self.out_path.joinpath('templates.waveforms'), templates * self.ampfactor) + templates[t, ...] = templates_v[t, :][:, templates_inds[t, :]] + np.save(self.out_path.joinpath('templates.waveforms'), templates) np.save(self.out_path.joinpath('templates.waveformsChannels'), templates_inds) - np.save(self.out_path.joinpath('clusters.waveforms'), templates * self.ampfactor) + np.save(self.out_path.joinpath('clusters.waveforms'), templates) np.save(self.out_path.joinpath('clusters.waveformsChannels'), templates_inds) def rename_with_label(self): diff --git a/phylib/io/model.py b/phylib/io/model.py index 74470b5..7237fab 100644 --- a/phylib/io/model.py +++ b/phylib/io/model.py @@ -960,9 +960,16 @@ def get_amplitudes_true(self, sample2unit=1.): """Convert spike amplitude values to input amplitudes units via scaling by unwhitened template waveform. :param sample2unit float: factor to convert the raw data to a physical unit (defaults 1.) - :returns: spike_amplitudes: np.array [nspikes] spike amplitudes in raw data units - :returns: templates_physical_unit: np.array[ntemplates, nsamples, nchannels]: templates - scaled by their raw data units amplitude estimate""" + :returns: spike_amplitudes_volts: np.array [nspikes] spike amplitudes in raw data units + :returns: templates_volts: np.array[ntemplates, nsamples, nchannels]: templates + in raw data units + :returns: template_amps_volts: np.array[ntemplates]: average templates amplitudes + in raw data units + To scale the template for template matching, + raw_data_volts = templates_volts * spike_amplitudes_volts / template_amps_volts + """ + # spike_amp = ks2_spike_amps * maxmin(inv_whitening(ks2_template_amps)) + # to rescale the template, # unwhiten template waveforms on their channels of max amplitude if self.sparse_templates.cols: @@ -973,22 +980,24 @@ def get_amplitudes_true(self, sample2unit=1.): templates_wfs[n, :, :] = np.matmul(self.sparse_templates.data[n, :, :], self.wmi) # The amplitude on each channel is the positive peak minus the negative - templates_ch_amps = np.abs(np.max(templates_wfs, axis=1) - np.min(templates_wfs, axis=1)) + templates_ch_amps = np.max(templates_wfs, axis=1) - np.min(templates_wfs, axis=1) - # The template amplitude is the amplitude of its largest channel + # The template arbitrary unit amplitude is the amplitude of its largest channel # (but see below for true tempAmps) - templates_amps_unscaled = np.max(templates_ch_amps, axis=1) - spike_amps = templates_amps_unscaled[self.spike_templates] * self.amplitudes + templates_amps_au = np.max(templates_ch_amps, axis=1) + spike_amps = templates_amps_au[self.spike_templates] * self.amplitudes with np.errstate(divide='ignore'): - # take the average per template - templates_amps = (np.bincount(self.spike_templates, weights=spike_amps) / - np.bincount(self.spike_templates)) + # take the average spike amplitude per template + templates_amps_v = (np.bincount(self.spike_templates, weights=spike_amps) / + np.bincount(self.spike_templates)) # scale back the template according to the spikes units - templates_physical_unit = templates_wfs / (templates_amps_unscaled * templates_amps + templates_physical_unit = templates_wfs * (templates_amps_v / templates_amps_au )[:, np.newaxis, np.newaxis] - return spike_amps * sample2unit, templates_physical_unit * sample2unit + return (spike_amps * sample2unit, + templates_physical_unit * sample2unit, + templates_amps_v * sample2unit) #-------------------------------------------------------------------------- # Internal helper methods for public high-level methods From f5c5f2764b7f3a6f5f2b19a0a4b54a85b8732a57 Mon Sep 17 00:00:00 2001 From: olivier Date: Fri, 19 Jun 2020 07:35:22 +0100 Subject: [PATCH 05/11] number of channels exported from model.n_closest_channels --- phylib/io/alf.py | 24 +++--------------------- phylib/io/model.py | 29 ++++++++++++++++------------- 2 files changed, 19 insertions(+), 34 deletions(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index dc17a3f..08be3b0 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -27,7 +27,6 @@ # File utils #------------------------------------------------------------------------------ -NCH_WAVEFORMS = 32 # number of channels to be saved in templates.waveforms and channels.waveforms NSAMPLE_WAVEFORMS = 500 # number of waveforrms sampled out of the raw data _FILE_RENAMES = [ # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy) @@ -126,7 +125,7 @@ def convert(self, out_path, force=False, label='', ampfactor=1): self.make_template_and_spikes_objects() bar.update(30) self.model.save_spikes_subset_waveforms( - NSAMPLE_WAVEFORMS, NCH_WAVEFORMS, sample2unit=self.ampfactor) + NSAMPLE_WAVEFORMS, sample2unit=self.ampfactor) bar.update(50) self.make_depths() bar.update(20) @@ -221,24 +220,6 @@ def make_depths(self): spikes_depths = clusters_depths[spike_clusters] else: spikes_depths = self.model.get_depths() - # if PC features are provided, compute the depth as the weighted sum of coordinates - nbatch = 50000 - c = 0 - spikes_depths = np.zeros_like(self.model.spike_times) * np.nan - nspi = spikes_depths.shape[0] - while True: - ispi = np.arange(c, min(c + nbatch, nspi)) - # take only first component - features = self.model.sparse_features.data[ispi, :, 0] - features = np.maximum(features, 0) ** 2 # takes only positive values into account - ichannels = self.model.sparse_features.cols[self.model.spike_clusters[ispi]] - ypos = self.model.channel_positions[ichannels, 1] - with np.errstate(divide='ignore'): - spikes_depths[ispi] = (np.sum(np.transpose(ypos * features) / - np.sum(features, axis=1), axis=0)) - c += nbatch - if c >= nspi: - break self._save_npy('spikes.depths.npy', spikes_depths) self._save_npy('clusters.depths.npy', clusters_depths) @@ -258,7 +239,8 @@ def make_template_and_spikes_objects(self): raise(NotImplementedError("Sparse template export to ALF not implemented yet")) else: n_templates, n_wavsamps, nchall = templates_v.shape - ncw = min(NCH_WAVEFORMS, nchall) # for some datasets, 32 may be too much + # for some datasets, 32 may be too much + ncw = min(self.model.n_closest_channels, nchall) assert(n_templates == self.model.n_templates) templates = np.zeros((n_templates, n_wavsamps, ncw), dtype=np.float32) templates_inds = np.zeros((n_templates, ncw), dtype=np.int32) diff --git a/phylib/io/model.py b/phylib/io/model.py index 7237fab..dd12d6e 100644 --- a/phylib/io/model.py +++ b/phylib/io/model.py @@ -937,24 +937,25 @@ def get_template_features(self, spike_ids): def get_depths(self): """Compute spike depths based on spike pc features and probe depths.""" # compute the depth as the weighted sum of coordinates - batch_sz = 50000 # number of spikes per batch + # if PC features are provided, compute the depth as the weighted sum of coordinates + nbatch = 50000 c = 0 - spike_depths = np.zeros_like(self.spike_times) - nspi = spike_depths.shape[0] + spikes_depths = np.zeros_like(self.spike_times) * np.nan + nspi = spikes_depths.shape[0] while True: - ispi = np.arange(c, min(c + batch_sz, nspi)) + ispi = np.arange(c, min(c + nbatch, nspi)) # take only first component - features = np.square(self.sparse_features.data[ispi, :, 0]) - ichannels = self.sparse_features.cols[self.spike_clusters[ispi]].astype(np.int64) + features = self.sparse_features.data[ispi, :, 0] + features = np.maximum(features, 0) ** 2 # takes only positive values into account + ichannels = self.sparse_features.cols[self.spike_clusters[ispi]].astype(np.uint32) ypos = self.channel_positions[ichannels, 1] - - spike_depths[ispi] = np.sum(np.transpose(ypos * features) / - np.sum(features, axis=1), axis=0) - c += batch_sz + with np.errstate(divide='ignore'): + spikes_depths[ispi] = (np.sum(np.transpose(ypos * features) / + np.sum(features, axis=1), axis=0)) + c += nbatch if c >= nspi: break - - return spike_depths + return spikes_depths def get_amplitudes_true(self, sample2unit=1.): """Convert spike amplitude values to input amplitudes units @@ -1166,7 +1167,9 @@ def save_spikes_subset_waveforms(self, max_n_spikes_per_template=None, max_n_cha n_chunks_kept = 20 # TODO: better choice nst = max_n_spikes_per_template - nc = max_n_channels + nc = max_n_channels or self.n_closest_channels + nc = max(nc, self.n_closest_channels) + assert nst > 0 assert nc > 0 From 0715f848b79c588995d2c01168f89861f1c8d40d Mon Sep 17 00:00:00 2001 From: Cyrille Rossant Date: Tue, 4 Aug 2020 13:49:55 +0200 Subject: [PATCH 06/11] On-the-fly computation of features from spike waveform subset (cherry-picked from dev) --- phylib/io/model.py | 98 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/phylib/io/model.py b/phylib/io/model.py index dd12d6e..beae9aa 100644 --- a/phylib/io/model.py +++ b/phylib/io/model.py @@ -162,6 +162,84 @@ def get_closest_channels(channel_positions, channel_index, n=None): #------------------------------------------------------------------------------ +# PC computation +#------------------------------------------------------------------------------ + +def _compute_pcs(x, npcs): + """Compute the PCs of an array x, where each row is an observation. + x can be a 2D or 3D array. In the latter case, the PCs are computed + and concatenated iteratively along the last axis.""" + + # Ensure x is a 3D array. + assert x.ndim == 3 + # Ensure double precision + x = x.astype(np.float64) + + nspikes, nsamples, nchannels = x.shape + + cov_reg = np.eye(nsamples) + assert cov_reg.shape == (nsamples, nsamples) + + pcs_list = [] + # Loop over channels + for channel in range(nchannels): + x_channel = x[:, :, channel] + # Compute cov matrix for the channel + assert x_channel.ndim == 2 + # Don't compute the cov matrix if there are no unmasked spikes + # on that channel. + alpha = 1. / nspikes + if x_channel.shape[0] <= 1: + cov = alpha * cov_reg + else: + cov_channel = np.cov(x_channel, rowvar=0) + assert cov_channel.shape == (nsamples, nsamples) + cov = alpha * cov_reg + cov_channel + # Compute the eigenelements + vals, vecs = np.linalg.eigh(cov) + pcs = vecs.T.astype(np.float32)[np.argsort(vals)[::-1]] + # Take the first npcs components. + pcs_list.append(pcs[:npcs, ...]) + # Return the concatenation of the PCs on all channels, along the 3d axis, + # except if there is only one element in the 3d axis. In this case + # we convert to a 2D array. + pcs = np.dstack(pcs_list) + assert pcs.ndim == 3 + return pcs + + +def _project_pcs(x, pcs): + """Project data points onto principal components. + Arguments: + * x: a 2D array. + * pcs: the PCs as returned by `compute_pcs`. + """ + assert x.ndim == 3 + assert pcs.ndim == 3 + features = [] + # for i, waveform in enumerate(x): + # assert waveform.ndim == 2 + # # pcs.shape is (3, n_samples, n_channels) + # # waveform.shape is (n_samples, n_channels) + # features.append(np.einsum('ijk,jk->ki', pcs, waveform)) + features = np.einsum('ijk,ljk->lki', pcs, x) + # features = np.stack(features, axis=0) + assert features.ndim == 3 + return features + + +def compute_features(waveforms): + assert waveforms.ndim == 3 + nspk, nsmp, nc = waveforms.shape + pcs = _compute_pcs(waveforms, 3) + assert pcs.ndim == 3 + features = _project_pcs(waveforms, pcs) + assert features.ndim == 3 + assert features.shape == (nspk, nc, 3) + return features + + +#----------------------------------------------------------------------------- # I/O util functions #------------------------------------------------------------------------------ @@ -872,7 +950,23 @@ def get_waveforms(self, spike_ids, channel_ids=None): def get_features(self, spike_ids, channel_ids): """Return sparse features for given spikes.""" sf = self.sparse_features - if sf is None: + if sf is None and self.spike_waveforms is not None: + ns = len(spike_ids) + nc = len(channel_ids) + n_pcs = 3 + features = np.zeros((ns, nc, n_pcs), dtype=np.float32) + spike_ids_exist = np.intersect1d(spike_ids, self.spike_waveforms.spike_ids) + # Compute PCs from the waveforms for the spikes that are in spike_waveforms.spike_ids. + waveforms = self.get_waveforms(spike_ids_exist, channel_ids) + features_existing = compute_features(waveforms) + assert features.shape[1:] == (nc, n_pcs) + # Now we need to integrate the computed features into the output array, knowing + # that some spikes may be missing if there were requested here in spike_ids, but + # were absent in spike_waveforms.spike_ids. + ind = _index_of(spike_ids_exist, spike_ids) + features[ind, ...] = features_existing + return features + elif sf is None: return _, n_channels_loc, n_pcs = sf.data.shape ns = len(spike_ids) @@ -942,6 +1036,8 @@ def get_depths(self): c = 0 spikes_depths = np.zeros_like(self.spike_times) * np.nan nspi = spikes_depths.shape[0] + if self.sparse_features is None or self.sparse_features.data.shape[0] != self.n_spikes: + return None while True: ispi = np.arange(c, min(c + nbatch, nspi)) # take only first component From abd7bca6a52eee2ecc0abc4cff8526ba8f579d3b Mon Sep 17 00:00:00 2001 From: olivier Date: Wed, 16 Dec 2020 19:43:22 +0000 Subject: [PATCH 07/11] :bug: alf: fix cluster objects size when first cluster empty :bug: --- phylib/io/alf.py | 5 ++--- phylib/io/tests/test_alf.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 08be3b0..60e1827 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -169,7 +169,6 @@ def _save_npy(self, filename, arr): def make_cluster_objects(self): """Create clusters.channels, clusters.waveformsDuration and clusters.amps""" - peak_channel_path = self.dir_path / 'clusters.channels.npy' if not peak_channel_path.exists(): self._save_npy(peak_channel_path.name, self.model.templates_channels) @@ -179,8 +178,8 @@ def make_cluster_objects(self): self._save_npy(waveform_duration_path.name, self.model.templates_waveforms_durations) # group by average over cluster number - camps = np.zeros(np.max(self.cluster_ids) - np.min(self.cluster_ids) + 1,) * np.nan - camps[self.cluster_ids - np.min(self.cluster_ids)] = self.model.templates_amplitudes + camps = np.zeros(self.model.templates_channels.shape[0],) * np.nan + camps[self.cluster_ids] = self.model.templates_amplitudes amps_path = self.dir_path / 'clusters.amps.npy' self._save_npy(amps_path.name, camps * self.ampfactor) diff --git a/phylib/io/tests/test_alf.py b/phylib/io/tests/test_alf.py index 0eefcd6..6384291 100644 --- a/phylib/io/tests/test_alf.py +++ b/phylib/io/tests/test_alf.py @@ -36,7 +36,7 @@ def __init__(self, tempdir): self.nt = 5 self.ncd = 1000 np.save(p / 'spike_times.npy', .01 * np.cumsum(nr.exponential(size=self.ns))) - np.save(p / 'spike_clusters.npy', nr.randint(low=0, high=self.nt, size=self.ns)) + np.save(p / 'spike_clusters.npy', nr.randint(low=1, high=self.nt, size=self.ns)) shutil.copy(p / 'spike_clusters.npy', p / 'spike_templates.npy') np.save(p / 'amplitudes.npy', nr.uniform(low=0.5, high=1.5, size=self.ns)) np.save(p / 'channel_positions.npy', np.c_[np.arange(self.nc), np.zeros(self.nc)]) @@ -174,16 +174,22 @@ def check_conversion_output(): assert f.exists() # makes sure the output dimensions match (especially clusters which should be 4) - cl_shape = [np.load(f).shape[0] for f in new_files if f.name.startswith('clusters.') and - f.name.endswith('.npy')] + cl_shape = [] + for f in new_files: + if f.name.startswith('clusters.') and f.name.endswith('.npy'): + cl_shape.append(np.load(f).shape[0]) + elif f.name.startswith('clusters.') and f.name.endswith('.csv'): + with open(f) as fid: + cl_shape.append(len(fid.readlines()) - 1) sp_shape = [np.load(f).shape[0] for f in new_files if f.name.startswith('spikes.')] ch_shape = [np.load(f).shape[0] for f in new_files if f.name.startswith('channels.')] + assert len(set(cl_shape)) == 1 assert len(set(sp_shape)) == 1 assert len(set(ch_shape)) == 1 dur = np.load(next(out_path.glob('clusters.peakToTrough*.npy'))) - assert np.all(dur == np.array([18., -1., 9.5, 2.5, -2.])) + assert np.all(dur == np.array([-9.5, 3., 13., -4.5, -2.5])) def read_after_write(): model = TemplateModel(dir_path=out_path, dat_path=dataset.dat_path, From 3f533500d592ee1c128547b0b8ad318ee7dcd831 Mon Sep 17 00:00:00 2001 From: olivier Date: Thu, 24 Dec 2020 14:18:15 +0000 Subject: [PATCH 08/11] remove the cluster metrics from alf conversion - will be recomputed downstream --- phylib/io/alf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 60e1827..540b433 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -31,7 +31,6 @@ _FILE_RENAMES = [ # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy) ('params.py', 'params.py', None), - ('cluster_metrics.csv', 'clusters.metrics.csv', None), ('spike_clusters.npy', 'spikes.clusters.npy', True), ('spike_templates.npy', 'spikes.templates.npy', True), ('channel_positions.npy', 'channels.localCoordinates.npy', False), From 384ce2c21c08db27c9e5dc4ffd20722d68a38835 Mon Sep 17 00:00:00 2001 From: olivier Date: Thu, 24 Dec 2020 17:30:13 +0000 Subject: [PATCH 09/11] unit qcs in ephys pipeline --- phylib/io/alf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 540b433..6f93210 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -31,6 +31,7 @@ _FILE_RENAMES = [ # file_in, file_out, squeeze (bool to squeeze vector from matlab in npy) ('params.py', 'params.py', None), + ('cluster_KSLabel.tsv', 'cluster_KSLabel.tsv', None), ('spike_clusters.npy', 'spikes.clusters.npy', True), ('spike_templates.npy', 'spikes.templates.npy', True), ('channel_positions.npy', 'channels.localCoordinates.npy', False), From b4d121ad02987a4dadb32cd6a31bfdd3a6891c0e Mon Sep 17 00:00:00 2001 From: Niccolo Bonacchi Date: Thu, 18 Mar 2021 14:26:35 +0000 Subject: [PATCH 10/11] numpy deprecations --- phylib/io/alf.py | 2 +- phylib/io/array.py | 4 ++-- phylib/io/traces.py | 2 +- phylib/stats/ccg.py | 2 +- phylib/utils/_types.py | 4 ++-- phylib/utils/tests/test_types.py | 10 +++++----- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 6f93210..51316df 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -192,7 +192,7 @@ def make_cluster_objects(self): def make_channel_objects(self): """If there is no rawInd file, create it""" rawInd_path = self.dir_path / 'channels.rawInd.npy' - rawInd = np.zeros_like(self.model.channel_probes).astype(np.int) + rawInd = np.zeros_like(self.model.channel_probes).astype(int) channel_offset = 0 for probe in np.unique(self.model.channel_probes): ind = self.model.channel_probes == probe diff --git a/phylib/io/array.py b/phylib/io/array.py index 29a8d3d..4ad9829 100644 --- a/phylib/io/array.py +++ b/phylib/io/array.py @@ -115,7 +115,7 @@ def _index_of(arr, lookup): # values lookup = np.asarray(lookup, dtype=np.int32) m = (lookup.max() if len(lookup) else 0) + 1 - tmp = np.zeros(m + 1, dtype=np.int) + tmp = np.zeros(m + 1, dtype=int) # Ensure that -1 values are kept. tmp[-1] = -1 if len(lookup): @@ -327,7 +327,7 @@ def get_excerpts(data, n_excerpts=None, excerpt_size=None): def _spikes_in_clusters(spike_clusters, clusters): """Return the ids of all spikes belonging to the specified clusters.""" if len(spike_clusters) == 0 or len(clusters) == 0: - return np.array([], dtype=np.int) + return np.array([], dtype=int) return np.nonzero(np.in1d(spike_clusters, clusters))[0] diff --git a/phylib/io/traces.py b/phylib/io/traces.py index d7fd322..320837a 100644 --- a/phylib/io/traces.py +++ b/phylib/io/traces.py @@ -649,7 +649,7 @@ def export_waveforms( spike_channels = np.asarray(spike_channels, dtype=np.int32) n_channels_loc = spike_channels.shape[1] shape = (n_spikes, n_samples_waveforms, n_channels_loc) - dtype = traces.dtype if sample2unit is None else np.float + dtype = traces.dtype if sample2unit is None else float writer = NpyWriter(path, shape, dtype) size_written = 0 for waveforms in iter_waveforms( diff --git a/phylib/stats/ccg.py b/phylib/stats/ccg.py index 2c4a912..08febc7 100644 --- a/phylib/stats/ccg.py +++ b/phylib/stats/ccg.py @@ -148,7 +148,7 @@ def correlograms( # At a given shift, the mask precises which spikes have matching spikes # within the correlogram time window. - mask = np.ones_like(spike_samples, dtype=np.bool) + mask = np.ones_like(spike_samples, dtype=bool) correlograms = _create_correlograms_array(n_clusters, winsize_bins) diff --git a/phylib/utils/_types.py b/phylib/utils/_types.py index ee28962..3881d6b 100644 --- a/phylib/utils/_types.py +++ b/phylib/utils/_types.py @@ -15,8 +15,8 @@ #------------------------------------------------------------------------------ _ACCEPTED_ARRAY_DTYPES = ( - np.float, np.float32, np.float64, np.int, np.int8, np.int16, np.uint8, np.uint16, - np.int32, np.int64, np.uint32, np.uint64, np.bool) + float, np.float32, np.float64, int, np.int8, np.int16, np.uint8, np.uint16, + np.int32, np.int64, np.uint32, np.uint64, bool) class Bunch(dict): diff --git a/phylib/utils/tests/test_types.py b/phylib/utils/tests/test_types.py index 35c0273..4bf9a17 100644 --- a/phylib/utils/tests/test_types.py +++ b/phylib/utils/tests/test_types.py @@ -88,13 +88,13 @@ def _check(arr): _check(_as_array(3.)) _check(_as_array([3])) - _check(_as_array(3, np.float)) - _check(_as_array(3., np.float)) - _check(_as_array([3], np.float)) + _check(_as_array(3, float)) + _check(_as_array(3., float)) + _check(_as_array([3], float)) _check(_as_array(np.array([3]))) with raises(ValueError): - _check(_as_array(np.array([3]), dtype=np.object)) - _check(_as_array(np.array([3]), np.float)) + _check(_as_array(np.array([3]), dtype=object)) + _check(_as_array(np.array([3]), float)) assert _as_array(None) is None assert not _is_array_like(None) From f72849c55d77607c90ebcc34a411d8884b9caf22 Mon Sep 17 00:00:00 2001 From: owinter Date: Sun, 8 Aug 2021 17:49:54 +0200 Subject: [PATCH 11/11] add drift registration datasets --- phylib/io/alf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/phylib/io/alf.py b/phylib/io/alf.py index 51316df..ce2b9a4 100644 --- a/phylib/io/alf.py +++ b/phylib/io/alf.py @@ -42,6 +42,9 @@ ('_phy_spikes_subset.channels.npy', '_phy_spikes_subset.channels.npy', False), ('_phy_spikes_subset.spikes.npy', '_phy_spikes_subset.spikes.npy', False), ('_phy_spikes_subset.waveforms.npy', '_phy_spikes_subset.waveforms.npy', False), + ('drift.depth_scale.npy', 'drift.depth_scale.npy', False), + ('drift.time_scale.npy', 'drift.time_scale.npy', False), + ('drift.um.npy', 'drift.um.npy', False), # ('cluster_group.tsv', 'ks2/clusters.phyAnnotation.tsv', False), # todo check indexing, add2QC ]