In [1]:
from visual_behavior.data_access import loading
from visual_behavior import database as db
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 500)

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
from visual_behavior.data_access import loading
from visual_behavior.data_access import utilities as data_utilities

oeid = 993891850
session = loading.get_ophys_dataset(oeid)
cell_df = data_utilities.build_tidy_cell_df(session)
cell_df

  self._check_line_labels()


Unnamed: 0,timestamps,cell_roi_id,cell_specimen_id,dff,events,filtered_events
0,9.43744,1080651142,1086563183,0.078235,0.0,0.0
1,9.53065,1080651142,1086563183,0.467809,0.0,0.0
2,9.62386,1080651142,1086563183,0.170756,0.0,0.0
3,9.71707,1080651142,1086563183,0.415712,0.0,0.0
4,9.81028,1080651142,1086563183,0.167799,0.0,0.0
...,...,...,...,...,...,...
7439119,4512.69204,1080651468,1086580413,0.382157,0.0,0.0
7439120,4512.78527,1080651468,1086580413,0.000000,0.0,0.0
7439121,4512.87851,1080651468,1086580413,0.289028,0.0,0.0
7439122,4512.97174,1080651468,1086580413,0.705847,0.0,0.0


In [6]:
session.cell_specimen_table

Unnamed: 0_level_0,cell_roi_id,height,mask_image_plane,max_correction_down,max_correction_left,max_correction_right,max_correction_up,roi_mask,valid_roi,width,x,y
cell_specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1086563183,1080651142,13,1,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,19,163,24
1086515186,1080651145,20,1,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,19,87,40
1086515385,1080651151,16,1,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,24,423,16
1086525553,1080651162,22,0,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,16,404,404
1086528417,1080651164,18,0,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,19,149,209
...,...,...,...,...,...,...,...,...,...,...,...,...
1086540902,1080651455,16,0,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,31,146,376
1086580050,1080651458,18,2,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,18,49,190
1086553056,1080651459,17,2,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,21,414,158
1086535818,1080651466,23,2,10.0,9.0,14.0,14.0,"[[False, False, False, False, False, False, Fa...",True,25,38,104


In [7]:
session.dff_traces

Unnamed: 0_level_0,cell_roi_id,dff
cell_specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1086563183,1080651142,"[0.0782351940870285, 0.46780869364738464, 0.17..."
1086515186,1080651145,"[0.20403796434402466, 0.2346646785736084, 0.32..."
1086515385,1080651151,"[0.0939764603972435, 0.14564429223537445, 0.10..."
1086525553,1080651162,"[0.5387605428695679, 0.7703697681427002, 0.239..."
1086528417,1080651164,"[0.4103006422519684, 0.332610547542572, 0.3828..."
...,...,...
1086540902,1080651455,"[0.4033322334289551, 0.22940944135189056, 0.49..."
1086580050,1080651458,"[3.453871488571167, 0.8920961022377014, 1.0256..."
1086553056,1080651459,"[0.29580041766166687, 0.17183586955070496, 0.2..."
1086535818,1080651466,"[0.5025877356529236, 0.16728198528289795, 0.24..."


In [75]:
import datetime
import time

In [178]:
def get_cell_timeseries_dict(session, cell_specimen_id):
    '''
    for a given cell_specimen ID, this function creates a dictionary with the following keys
    * timestamps: ophys timestamps
    * cell_roi_id
    * cell_specimen_id
    * dff
    * events
    * filtered events
    This is useful for generating a tidy dataframe
    
    arguments:
        session object
        cell_specimen_id
        
    returns
        dict
    
    '''
    cell_dict = {
        'timestamps': session.ophys_timestamps,
        'cell_roi_id': [session.dff_traces.loc[cell_specimen_id]['cell_roi_id']]*len(session.ophys_timestamps),
        'cell_specimen_id': [cell_specimen_id]*len(session.ophys_timestamps),
        'dff': session.dff_traces.loc[cell_specimen_id]['dff'],
        'events': session.events.loc[cell_specimen_id]['events'],
        'filtered_events': session.events.loc[cell_specimen_id]['filtered_events'],
        
    }

    return cell_dict


def build_tidy_cell_df(session):
    '''
    builds a tidy dataframe describing activity for every cell in session containing the following columns
    * timestamps: the ophys timestamps
    * cell_roi_id: the cell roi id
    * cell_specimen_id: the cell specimen id
    * dff: measured deltaF/F for every timestep
    * events: extracted events for every timestep
    * filtered events: filtered events for every timestep
    
    Takes a few seconds to build
    
    arguments:
        session
        
    returns:
        pandas dataframe
    '''
    

In [180]:
%%time
cell_specimen_id = 1086563183
cell_dict = get_cell_timeseries_dict(session, cell_specimen_id)
# cell_df = pd.DataFrame(cell_dict)

CPU times: user 25 ms, sys: 331 µs, total: 25.3 ms
Wall time: 23.2 ms


In [44]:
from multiprocessing import Pool
import itertools

In [191]:
%%time
with Pool(15) as pool:
    
    cell_dict_list = pool.starmap(
        get_cell_timeseries_dict, 
        itertools.product([session], session.dff_traces.reset_index()['cell_specimen_id'])
    )
    

CPU times: user 4.52 s, sys: 11.7 s, total: 16.2 s
Wall time: 23.3 s


In [185]:
%%time
cell_dict_list = []
for cell_specimen_id in session.dff_traces.reset_index()['cell_specimen_id'][:30]:
    cell_dict_list.append(get_cell_timeseries_dict(session, cell_specimen_id))

CPU times: user 290 ms, sys: 0 ns, total: 290 ms
Wall time: 287 ms


In [206]:
%%time
df = pd.concat([pd.DataFrame(get_cell_timeseries_dict(session, cell_specimen_id)) for cell_specimen_id in session.dff_traces.reset_index()['cell_specimen_id']]).reset_index(drop=True)

CPU times: user 4.05 s, sys: 192 ms, total: 4.25 s
Wall time: 4.24 s


In [210]:
df.groupby('cell_specimen_id').agg('std')

Unnamed: 0_level_0,timestamps,cell_roi_id,dff,events,filtered_events
cell_specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1086514838,1300.131276,0.0,0.308320,0.108012,0.077145
1086515186,1300.131276,0.0,0.120714,0.023296,0.013696
1086515385,1300.131276,0.0,0.110089,0.034706,0.018765
1086515873,1300.131276,0.0,0.123475,0.032923,0.021439
1086516048,1300.131276,0.0,0.193492,0.049401,0.029263
...,...,...,...,...,...
1086579678,1300.131276,0.0,0.140025,0.014705,0.007338
1086579771,1300.131276,0.0,0.151896,0.027656,0.013788
1086579923,1300.131276,0.0,0.211093,0.068405,0.045644
1086580050,1300.131276,0.0,0.275045,0.038219,0.019633


In [200]:
dict1 = {'time':[1,2,3], 'val': [7,8,9]}
dict2 = {'time':[1,2,3], 'val': [12,13,14]}
pd.DataFrame([dict1, dict2])

Unnamed: 0,time,val
0,"[1, 2, 3]","[7, 8, 9]"
1,"[1, 2, 3]","[12, 13, 14]"


In [201]:
pd.DataFrame(dict1)

Unnamed: 0,time,val
0,1,7
1,2,8
2,3,9
