/
synapses.py
668 lines (543 loc) · 30.9 KB
/
synapses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
import sys
import copy
from textwrap import dedent
import numpy as np
import pandas as pd
from ..client import inject_client
from ..utils import tqdm, iter_batches
from .neuroncriteria import NeuronCriteria, neuroncriteria_args
from .synapsecriteria import SynapseCriteria
from .connectivity import fetch_adjacencies
@inject_client
@neuroncriteria_args('neuron_criteria')
def fetch_synapses(neuron_criteria, synapse_criteria=None, batch_size=10, *, client=None):
"""
Fetch synapses from a neuron or selection of neurons.
Args:
neuron_criteria (bodyId(s), type/instance, or :py:class:`.NeuronCriteria`):
Determines which bodies to fetch synapses for.
Note:
Any ROI criteria specified in this argument does not affect
which synapses are returned, only which bodies are inspected.
synapse_criteria (SynapseCriteria):
Optional. Allows you to filter synapses by roi, type, confidence.
See :py:class:`.SynapseCriteria` for details.
If the criteria specifies ``primary_only=True`` only primary ROIs will be returned in the results.
If a synapse does not intersect any primary ROI, it will be listed with an roi of ``None``.
(Since 'primary' ROIs do not overlap, each synapse will be listed only once.)
Otherwise, all ROI names will be included in the results.
In that case, some synapses will be listed multiple times -- once per intersecting ROI.
If a synapse does not intersect any ROI, it will be listed with an roi of ``None``.
batch_size:
To improve performance and avoid timeouts, the synapses for multiple bodies
will be fetched in batches, where each batch corresponds to N bodies.
This argument sets the batch size N.
client:
If not provided, the global default :py:class:`.Client` will be used.
Returns:
DataFrame in which each row represent a single synapse.
Unless ``primary_only`` was specified, some synapses may be listed more than once,
if they reside in more than one overlapping ROI.
Example:
.. code-block:: ipython
In [1]: from neuprint import NeuronCriteria as NC, SynapseCriteria as SC, fetch_synapses
...: fetch_synapses(NC(type='ADL.*', rois=['FB']),
...: SC(rois=['LH(R)', 'SIP(R)'], primary_only=True))
Out[1]:
bodyId type roi x y z confidence
0 5812983094 pre SIP(R) 15300 25268 14043 0.992000
1 5812983094 pre SIP(R) 15300 25268 14043 0.992000
2 5812983094 pre SIP(R) 15300 25268 14043 0.992000
3 5812983094 pre SIP(R) 15300 25268 14043 0.992000
4 5812983094 pre LH(R) 5447 21281 19155 0.991000
5 5812983094 pre LH(R) 5434 21270 19201 0.995000
6 5812983094 pre LH(R) 5434 21270 19201 0.995000
7 5812983094 pre LH(R) 5434 21270 19201 0.995000
8 5812983094 pre LH(R) 5447 21281 19155 0.991000
9 5812983094 pre LH(R) 5447 21281 19155 0.991000
10 5812983094 pre LH(R) 5447 21281 19155 0.991000
11 5812983094 pre LH(R) 5434 21270 19201 0.995000
12 5812983094 pre LH(R) 5447 21281 19155 0.991000
13 5812983094 pre LH(R) 5447 21281 19155 0.991000
14 5812983094 pre LH(R) 5447 21281 19155 0.991000
15 5812983094 pre LH(R) 5447 21281 19155 0.991000
16 5812983094 pre LH(R) 5447 21281 19155 0.991000
17 5812983094 pre LH(R) 5434 21270 19201 0.995000
18 5812983094 pre LH(R) 5447 21281 19155 0.991000
19 5812983094 pre LH(R) 5434 21270 19201 0.995000
20 5812983094 pre LH(R) 5447 21281 19155 0.991000
21 5812983094 pre LH(R) 5434 21270 19201 0.995000
22 5812983094 pre LH(R) 5447 21281 19155 0.991000
23 5812983094 pre SIP(R) 15300 25268 14043 0.992000
24 5812983094 pre SIP(R) 15300 25268 14043 0.992000
25 5812983094 pre SIP(R) 15300 25268 14043 0.992000
26 5812983094 pre SIP(R) 15300 25268 14043 0.992000
27 5812983094 pre SIP(R) 15300 25268 14043 0.992000
28 5812983094 pre SIP(R) 15300 25268 14043 0.992000
29 764377593 post LH(R) 8043 21587 15146 0.804000
30 764377593 post LH(R) 8057 21493 15140 0.906482
31 859152522 pre SIP(R) 13275 25499 13629 0.997000
32 859152522 pre SIP(R) 13275 25499 13629 0.997000
33 859152522 post SIP(R) 13349 25337 13653 0.818386
34 859152522 post SIP(R) 12793 26362 14202 0.926918
35 859152522 pre SIP(R) 13275 25499 13629 0.997000
"""
neuron_criteria.matchvar = 'n'
q = f"""
{neuron_criteria.global_with(prefix=8)}
MATCH (n:{neuron_criteria.label})
{neuron_criteria.all_conditions(prefix=8)}
RETURN n.bodyId as bodyId
"""
bodies = client.fetch_custom(q)['bodyId'].values
batch_dfs = []
for batch_bodies in tqdm(iter_batches(bodies, batch_size)):
batch_criteria = copy.copy(neuron_criteria)
batch_criteria.bodyId = batch_bodies
batch_df = _fetch_synapses(batch_criteria, synapse_criteria, client)
batch_dfs.append( batch_df )
if batch_dfs:
return pd.concat( batch_dfs, ignore_index=True )
# Return empty results, but with correct dtypes
dtypes = {'bodyId': np.dtype('int64'),
'type': pd.CategoricalDtype(categories=['pre', 'post'], ordered=False),
'roi': np.dtype('O'),
'x': np.dtype('int32'),
'y': np.dtype('int32'),
'z': np.dtype('int32'),
'confidence': np.dtype('float32')}
return pd.DataFrame([], columns=dtypes.keys()).astype(dtypes)
def _fetch_synapses(neuron_criteria, synapse_criteria, client):
neuron_criteria.matchvar = 'n'
if synapse_criteria is None:
synapse_criteria = SynapseCriteria()
if synapse_criteria.primary_only:
return_rois = {*client.primary_rois}
else:
return_rois = {*client.all_rois}
# If the user specified rois to filter synapses by, but hasn't specified rois
# in the NeuronCriteria, add them to the NeuronCriteria to speed up the query.
if synapse_criteria.rois and not neuron_criteria.rois:
neuron_criteria.rois = {*synapse_criteria.rois}
neuron_criteria.roi_req = 'any'
# Fetch results
cypher = dedent(f"""\
{neuron_criteria.global_with(prefix=8)}
MATCH (n:{neuron_criteria.label})
{neuron_criteria.all_conditions('n', prefix=8)}
MATCH (n)-[:Contains]->(ss:SynapseSet),
(ss)-[:Contains]->(s:Synapse)
{synapse_criteria.condition('n', 's', prefix=8)}
// De-duplicate 's' because 'pre' synapses can appear in more than one SynapseSet
WITH DISTINCT n, s
RETURN n.bodyId as bodyId,
s.type as type,
s.confidence as confidence,
s.location.x as x,
s.location.y as y,
s.location.z as z,
apoc.map.removeKeys(s, ['location', 'confidence', 'type']) as syn_info
""")
data = client.fetch_custom(cypher, format='json')['data']
# Assemble DataFrame
syn_table = []
for body, syn_type, conf, x, y, z, syn_info in data:
# Exclude non-primary ROIs if necessary
syn_rois = return_rois & {*syn_info.keys()}
# Fixme: Filter for the user's ROIs (drop duplicates)
for roi in syn_rois:
syn_table.append((body, syn_type, roi, x, y, z, conf))
if not syn_rois:
syn_table.append((body, syn_type, None, x, y, z, conf))
syn_df = pd.DataFrame(syn_table, columns=['bodyId', 'type', 'roi', 'x', 'y', 'z', 'confidence'])
# Save RAM with smaller dtypes and interned strings
syn_df['type'] = pd.Categorical(syn_df['type'], ['pre', 'post'])
syn_df['roi'] = syn_df['roi'].apply(lambda s: sys.intern(s) if s else s)
syn_df['x'] = syn_df['x'].astype(np.int32)
syn_df['y'] = syn_df['y'].astype(np.int32)
syn_df['z'] = syn_df['z'].astype(np.int32)
syn_df['confidence'] = syn_df['confidence'].astype(np.float32)
return syn_df
@inject_client
@neuroncriteria_args('neuron_criteria')
def fetch_mean_synapses(neuron_criteria, synapse_criteria=None, batch_size=100, *, client=None):
"""
Fetch per-ROI average synapse position and confidence for a set of neurons.
Args:
neuron_criteria (bodyId(s), type/instance, or :py:class:`.NeuronCriteria`):
Determines which bodies to fetch synapses for.
Note:
Any ROI criteria specified in this argument does not affect
which synapses are returned, only which bodies are inspected.
synapse_criteria (SynapseCriteria):
Optional. Allows you to filter synapses by roi, type, confidence.
See :py:class:`.SynapseCriteria` for details.
If the criteria specifies ``primary_only=True`` only primary ROIs will be returned in the results.
If a synapse does not intersect any primary ROI, it will be ignored by this function.
batch_size:
To improve performance and avoid timeouts, the synapses for multiple bodies
will be processed in batches, where each batch corresponds to N bodies.
This argument sets the batch size N.
client:
If not provided, the global default :py:class:`.Client` will be used.
Returns:
DataFrame in which each row contains the average synapse position and average
confidence for a particular body, ROI, and synapse type (pre/post).
Example:
.. code-block:: ipython
In [1]: from neuprint import fetch_mean_synapses, SynapseCriteria as SC
...: fetch_mean_synapses('LC10', SC(type='pre', rois=['LO(R)', 'AOTU(R)']))
Out[1]:
bodyId roi type count x y z confidence
0 1017448980 AOTU(R) pre 141 10691.737305 30304.355469 15508.099609 0.956000
1 1017448980 LO(R) pre 2 8530.500000 18898.000000 34857.000000 0.954500
2 1017090133 AOTU(R) pre 49 10560.469727 31032.041016 14548.550781 0.956286
3 1017090133 LO(R) pre 52 3737.115479 19489.923828 29530.000000 0.949673
4 1017094185 AOTU(R) pre 157 10922.248047 30326.693359 15349.484375 0.959185
.. ... ... ... ... ... ... ... ...
772 1262602271 LO(R) pre 3 5178.666504 14584.666992 23227.333984 0.957333
773 1291573712 AOTU(R) pre 273 11285.106445 31097.925781 15987.640625 0.954418
774 1291573712 LO(R) pre 3 6482.000000 13845.333008 25962.666016 0.936333
775 5812993252 AOTU(R) pre 17 11110.706055 30892.470703 17220.175781 0.960471
776 5812993252 LO(R) pre 39 7758.897461 11723.154297 28176.794922 0.943128
"""
neuron_criteria.matchvar = 'n'
q = f"""
{neuron_criteria.global_with(prefix=8)}
MATCH (n:{neuron_criteria.label})
{neuron_criteria.all_conditions(prefix=8)}
RETURN n.bodyId as bodyId
"""
bodies = client.fetch_custom(q)['bodyId'].values
batch_dfs = []
for batch_bodies in tqdm(iter_batches(bodies, batch_size)):
batch_criteria = copy.copy(neuron_criteria)
batch_criteria.bodyId = batch_bodies
batch_df = _fetch_mean_synapses(batch_criteria, synapse_criteria, client)
batch_dfs.append( batch_df )
if batch_dfs:
return pd.concat( batch_dfs, ignore_index=True )
# Return empty results, but with correct dtypes
dtypes = {'bodyId': np.dtype('int64'),
'type': pd.CategoricalDtype(categories=['pre', 'post'], ordered=False),
'roi': np.dtype('O'),
'count': np.dtype('int32'),
'x': np.dtype('float32'),
'y': np.dtype('float32'),
'z': np.dtype('float32')}
return pd.DataFrame([], columns=dtypes.keys()).astype(dtypes)
def _fetch_mean_synapses(neuron_criteria, synapse_criteria, client):
neuron_criteria.matchvar = 'n'
if synapse_criteria is None:
synapse_criteria = SynapseCriteria()
if synapse_criteria.rois:
rois = synapse_criteria.rois
elif synapse_criteria.primary_only:
rois = client.primary_rois
else:
rois = client.all_rois
# If the user specified rois to filter synapses by, but hasn't specified rois
# in the NeuronCriteria, add them to the NeuronCriteria to speed up the query.
if synapse_criteria.rois and not neuron_criteria.rois:
neuron_criteria.rois = {*synapse_criteria.rois}
neuron_criteria.roi_req = 'any'
# Fetch results
cypher = dedent(f"""\
WITH {rois} as rois
{neuron_criteria.global_with('rois', prefix=8)}
MATCH (n:{neuron_criteria.label})
{neuron_criteria.all_conditions('n', prefix=8)}
MATCH (n)-[:Contains]->(ss:SynapseSet),
(ss)-[:Contains]->(s:Synapse)
{synapse_criteria.condition('rois', 'n', 's', prefix=8)}
// De-duplicate 's' because 'pre' synapses can appear in more than one SynapseSet
WITH DISTINCT rois, n, s
// Extract properties as rows
UNWIND KEYS(s) as roi
// Filter for the ROIs we need
WITH rois, n, s, roi
WHERE roi in {rois}
RETURN n.bodyId as bodyId,
roi,
s.type as type,
count(s) as count,
avg(s.location.x) as x,
avg(s.location.y) as y,
avg(s.location.z) as z,
avg(s.confidence) as confidence
""")
syn_df = client.fetch_custom(cypher)
# Save RAM with smaller dtypes and interned strings
syn_df['type'] = pd.Categorical(syn_df['type'], ['pre', 'post'])
syn_df['roi'] = syn_df['roi'].apply(lambda s: sys.intern(s) if s else s)
syn_df['count'] = syn_df['count'].astype(np.int32)
syn_df['x'] = syn_df['x'].astype(np.float32)
syn_df['y'] = syn_df['y'].astype(np.float32)
syn_df['z'] = syn_df['z'].astype(np.float32)
syn_df['confidence'] = syn_df['confidence'].astype(np.float32)
return syn_df
@inject_client
@neuroncriteria_args('source_criteria', 'target_criteria')
def fetch_synapse_connections(source_criteria=None, target_criteria=None, synapse_criteria=None, min_total_weight=1, batch_size=10_000, *, client=None):
"""
Fetch synaptic-level connections between source and target neurons.
Note:
Use this function if you need information about individual synapse connections,
such as their exact positions or confidence scores.
If you're just interested in aggregate neuron-to-neuron connection info
(including connection strengths and ROI intersections), see
:py:func:`fetch_simple_connections()` and :py:func:`fetch_adjacencies()`,
which are faster and have more condensed outputs than this function.
Note:
If you experience timeouts while running this function,
try reducing the ``batch_size`` setting.
Args:
source_criteria (bodyId(s), type/instance, or :py:class:`.NeuronCriteria`):
Criteria to by which to filter source (pre-synaptic) neurons.
If omitted, all Neurons will be considered as possible sources.
Note:
Any ROI criteria specified in this argument does not affect
which synapses are returned, only which bodies are inspected.
target_criteria (bodyId(s), type/instance, or :py:class:`.NeuronCriteria`):
Criteria to by which to filter target (post-synaptic) neurons.
If omitted, all Neurons will be considered as possible targets.
Note:
Any ROI criteria specified in this argument does not affect
which synapses are returned, only which bodies are inspected.
synapse_criteria (SynapseCriteria):
Optional. Allows you to filter synapses by roi or confidence.
The same criteria is used to filter both ``pre`` and ``post`` sides
of the connection, except for the ``rois`` -- see note below.
By default, ``SynapseCriteria(primary_only=True)`` is used,
with no additional filters.
If ``primary_only`` is specified in the criteria, then the resulting
``roi_pre`` and ``roi_post`` columns will contain a single
string (or ``None``) in every row.
Otherwise, the roi columns will contain a list of ROIs for every row.
(Primary ROIs do not overlap, so every synapse resides in only one
(or zero) primary ROI.)
See :py:class:`.SynapseCriteria` for details.
Note:
Any ``rois`` specified in your ``synapse_criteria`` will be used to filter
the target (post-synaptic) side of the synapse connection, but not
the pre-synaptic side. So in the rare cases where the pre and post synapses
reside on different sides of an ROI boundary, the connection is associated
with the post-synaptic ROI.
That's consistent with neuprint's conventions for ROI assignment in the
neuron-to-neuron ``ConnectsTo:`` relationship, and thus ensures that this function
returns synapse counts that are consistent with :py:func:`.fetch_adjacencies()`.
min_total_weight:
If the total weight of the connection between two bodies is not at least
this strong, don't include the synapses for that connection in the results.
Note:
This filters for total connection weight, regardless of the weight
within any particular ROI. So, if your ``SynapseCriteria`` limits
results to a particular ROI, but two bodies connect in multiple ROIs,
then the number of synapses returned for the two bodies may appear to
be less than ``min_total_weight``. That's because you filtered out
the synapses in other ROIs.
batch_size:
To avoid timeouts and improve performance, the synapse connections
will be fetched in batches. The batching strategy is to process
each body one at a time, and if it has lots of connection partners,
split the request across several batches to avoid timeouts that
could arise from a large request.
This argument specifies the maximum size of each batch in the inner loop.
Larger batches are more efficient to fetch, but increase the likelihood
of timeouts.
client:
If not provided, the global default :py:class:`.Client` will be used.
Returns:
DataFrame in which each row represents a single synaptic connection
between an upstream (pre-synaptic) body and downstream (post-synaptic) body.
Synapse locations are listed in columns ``[x_pre, y_pre, z_pre]`` and
``[x_post, y_post, z_post]`` for the upstream and downstream synapses,
respectively.
The ``roi_pre`` and ``roi_post`` columns will contain either strings
or lists-of-strings, depending on the ``primary_only`` synapse criteria as
described above.
Example:
.. code-block:: ipython
In [1]: from neuprint import fetch_synapse_connections, SynapseCriteria as SC
...: fetch_synapse_connections(792368888, None, SC(rois=['PED(R)', 'SMP(R)'], primary_only=True))
Out[1]:
bodyId_pre bodyId_post roi_pre roi_post x_pre y_pre z_pre x_post y_post z_post confidence_pre confidence_post
0 792368888 754547386 PED(R) PED(R) 14013 27747 19307 13992 27720 19313 0.996 0.401035
1 792368888 612742248 PED(R) PED(R) 14049 27681 19417 14044 27662 19408 0.921 0.881487
2 792368888 5901225361 PED(R) PED(R) 14049 27681 19417 14055 27653 19420 0.921 0.436177
3 792368888 5813117385 SMP(R) SMP(R) 23630 29443 16297 23634 29437 16279 0.984 0.970746
4 792368888 5813083733 SMP(R) SMP(R) 23630 29443 16297 23634 29419 16288 0.984 0.933871
5 792368888 5813058320 SMP(R) SMP(R) 18662 34144 12692 18655 34155 12697 0.853 0.995000
6 792368888 5812981989 PED(R) PED(R) 14331 27921 20099 14351 27928 20085 0.904 0.877373
7 792368888 5812981381 PED(R) PED(R) 14331 27921 20099 14301 27919 20109 0.904 0.567321
8 792368888 5812981381 PED(R) PED(R) 14013 27747 19307 14020 27747 19285 0.996 0.697836
9 792368888 5812979314 PED(R) PED(R) 14331 27921 20099 14329 27942 20109 0.904 0.638362
10 792368888 424767514 PED(R) PED(R) 14331 27921 20099 14324 27934 20085 0.904 0.985734
11 792368888 424767514 PED(R) PED(R) 14013 27747 19307 14020 27760 19294 0.996 0.942831
12 792368888 424767514 PED(R) PED(R) 14049 27681 19417 14040 27663 19420 0.921 0.993586
13 792368888 331662710 SMP(R) SMP(R) 23630 29443 16297 23644 29429 16302 0.984 0.996389
14 792368888 1196854070 PED(R) PED(R) 14331 27921 20099 14317 27935 20101 0.904 0.968408
15 792368888 1131831702 SMP(R) SMP(R) 23630 29443 16297 23651 29434 16316 0.984 0.362952
"""
assert source_criteria is not None or target_criteria is not None, \
"Please specify either source or target search criteria (or both)."
if synapse_criteria is None:
synapse_criteria = SynapseCriteria(primary_only=True)
def prepare_nc(nc, matchvar):
nc.matchvar = matchvar
# If the user specified rois to filter synapses by, but hasn't specified rois
# in the NeuronCriteria, add them to the NeuronCriteria to speed up the query.
if synapse_criteria.rois and not nc.rois:
nc.rois = {*synapse_criteria.rois}
nc.roi_req = 'any'
return nc
source_criteria = prepare_nc(source_criteria, 'n')
target_criteria = prepare_nc(target_criteria, 'm')
# Fetch the list of neuron-neuron pairs in advance so we can break into batches.
_neuron_df, roi_conn_df = fetch_adjacencies( source_criteria,
target_criteria,
synapse_criteria.rois,
1,
min_total_weight,
properties=[] )
if len(roi_conn_df) == 0:
# Return empty dataframe, but with the correct dtypes
dtypes = {
'bodyId_pre': np.dtype('int64'),
'bodyId_post': np.dtype('int64'),
'roi_pre': np.dtype('O'),
'roi_post': np.dtype('O'),
'x_pre': np.dtype('int32'),
'y_pre': np.dtype('int32'),
'z_pre': np.dtype('int32'),
'x_post': np.dtype('int32'),
'y_post': np.dtype('int32'),
'z_post': np.dtype('int32'),
'confidence_pre': np.dtype('float32'),
'confidence_post': np.dtype('float32')
}
return pd.DataFrame([], columns=dtypes.keys()).astype(dtypes)
conn_df = (roi_conn_df.drop_duplicates(['bodyId_pre', 'bodyId_post'])
.sort_values(['bodyId_pre', 'bodyId_post']))
# Pick either 'pre' or 'post' column to process bodies one at a time.
# Then batch across the connections in an inner loop.
num_pre = conn_df['bodyId_pre'].nunique()
num_post = conn_df['bodyId_post'].nunique()
if num_pre < num_post:
grouping_col = 'bodyId_pre'
else:
grouping_col = 'bodyId_post'
syn_dfs = []
with tqdm(total=roi_conn_df['weight'].sum()) as progress:
for _, group_df in conn_df.groupby(grouping_col):
batches = iter_batches(group_df, batch_size)
for batch_df in tqdm(batches, leave=False):
src_crit = copy.copy(source_criteria)
tgt_crit = copy.copy(target_criteria)
if grouping_col == 'bodyId_pre':
assert batch_df['bodyId_pre'].nunique() == 1
src_crit.bodyId = batch_df['bodyId_pre'].unique()
# Filter target criteria further only if connections
# are being fetched in multiple batches.
# Otherwise, the explicit body list is unnecessary and slows down the query.
if len(batches) > 1:
tgt_crit.bodyId = batch_df['bodyId_post'].unique()
else:
assert batch_df['bodyId_post'].nunique() == 1
tgt_crit.bodyId = batch_df['bodyId_post'].unique()
# Filter source criteria further only if connections
# are being fetched in multiple batches.
# Otherwise, the explicit body list is unnecessary and slows down the query.
if len(batches) > 1:
src_crit.bodyId = batch_df['bodyId_pre'].unique()
batch_syn_df = _fetch_synapse_connections( src_crit,
tgt_crit,
synapse_criteria,
min_total_weight,
client )
syn_dfs.append(batch_syn_df)
progress.update(len(batch_syn_df))
syn_df = pd.concat(syn_dfs, ignore_index=True)
hashable_cols = [col for col, dtype in syn_df.dtypes.items() if dtype != object]
assert syn_df.duplicated(hashable_cols).sum() == 0, \
"Somehow obtained duplicate synapse-synapse connections!"
return syn_df
def _fetch_synapse_connections(source_criteria, target_criteria, synapse_criteria, min_total_weight, client):
if synapse_criteria.primary_only:
return_rois = {*client.primary_rois}
else:
return_rois = {*client.all_rois}
source_syn_crit = copy.copy(synapse_criteria)
target_syn_crit = copy.copy(synapse_criteria)
source_syn_crit.matchvar = 'ns'
target_syn_crit.matchvar = 'ms'
source_syn_crit.type = 'pre'
target_syn_crit.type = 'post'
# We apply ROI filtering to the PSD side only,
# for consistency with the way neuprint assigns ROIs to
# connections in the neuron ``ConnectsTo:`` relationship.
# That way, this function and ``fetch_adjacencies()`` return consistent results.
source_syn_crit.rois = None
criteria_globals = [*source_criteria.global_vars().keys(), *target_criteria.global_vars().keys()]
combined_conditions = NeuronCriteria.combined_conditions(
[source_criteria, target_criteria],
['n', 'e', 'm', 'ns', 'ms', *criteria_globals],
prefix=8)
# Fetch results
cypher = dedent(f"""\
{NeuronCriteria.combined_global_with((source_criteria, target_criteria), prefix=8)}
MATCH (n:{source_criteria.label})-[e:ConnectsTo]->(m:{target_criteria.label}),
(n)-[:Contains]->(nss:SynapseSet)-[:ConnectsTo]->(mss:SynapseSet)<-[:Contains]-(m),
(nss)-[:Contains]->(ns:Synapse)-[:SynapsesTo]->(ms:Synapse)<-[:Contains]-(mss)
{combined_conditions}
// Note: Semantically, the word 'DISTINCT' is unnecessary here,
// but its presence makes this query run faster.
WITH DISTINCT n, m, ns, ms, e
WHERE e.weight >= {min_total_weight}
{source_syn_crit.condition('n', 'm', 'ns', 'ms', prefix=8)}
{target_syn_crit.condition('n', 'm', 'ns', 'ms', prefix=8)}
RETURN n.bodyId as bodyId_pre,
m.bodyId as bodyId_post,
ns.location.x as ux,
ns.location.y as uy,
ns.location.z as uz,
ms.location.x as dx,
ms.location.y as dy,
ms.location.z as dz,
ns.confidence as confidence_pre,
ms.confidence as confidence_post,
apoc.map.removeKeys(ns, ['location', 'confidence', 'type']) as info_pre,
apoc.map.removeKeys(ms, ['location', 'confidence', 'type']) as info_post
""")
data = client.fetch_custom(cypher, format='json')['data']
# Assemble DataFrame
syn_table = []
for bodyId_pre, bodyId_post, ux, uy, uz, dx, dy, dz, up_conf, dn_conf, info_pre, info_post in data:
# Exclude non-primary ROIs if necessary
pre_rois = return_rois & {*info_pre.keys()}
post_rois = return_rois & {*info_post.keys()}
# Intern the ROIs to save RAM
pre_rois = sorted(map(sys.intern, pre_rois))
post_rois = sorted(map(sys.intern, post_rois))
pre_rois = pre_rois or [None]
post_rois = post_rois or [None]
# Should be (at most) one ROI when primary_only=True,
# so only show that one (not a list)
if synapse_criteria.primary_only:
pre_rois = pre_rois[0]
post_rois = post_rois[0]
syn_table.append((bodyId_pre, bodyId_post, pre_rois, post_rois, ux, uy, uz, dx, dy, dz, up_conf, dn_conf))
syn_df = pd.DataFrame(syn_table, columns=['bodyId_pre', 'bodyId_post',
'roi_pre', 'roi_post',
'x_pre', 'y_pre', 'z_pre', 'x_post', 'y_post', 'z_post',
'confidence_pre', 'confidence_post'])
# Save RAM with smaller dtypes
syn_df['x_pre'] = syn_df['x_pre'].astype(np.int32)
syn_df['y_pre'] = syn_df['y_pre'].astype(np.int32)
syn_df['z_pre'] = syn_df['z_pre'].astype(np.int32)
syn_df['x_post'] = syn_df['x_post'].astype(np.int32)
syn_df['y_post'] = syn_df['y_post'].astype(np.int32)
syn_df['z_post'] = syn_df['z_post'].astype(np.int32)
syn_df['confidence_pre'] = syn_df['confidence_pre'].astype(np.float32)
syn_df['confidence_post'] = syn_df['confidence_post'].astype(np.float32)
return syn_df