In [54]:
import numpy as np
import pandas as pd

# *** Set parameters ***

In [62]:
# State label dict
LABEL = {
    'closeopen': 0,
    'search': 1, 
    'fz': 1, 
    'eat': 1, 
    'back': 0, 
    'hitass': -1,
    'none': 2
}

# Open file path
MS_PATH = "../../data/230406_glp1_k3_day3_k20-2/230719_glp1_k27_day2.csv"
BH_PATH = "../../data/221117_glp1_ms_k1_day3 - k4-1_2_test - Event Logs/221117_glp1_ms_k4-1_nemo_cond_day0 - k27-day2 - Event Logs.xlsx"
# Output file path
OUT_PATH = "../../data/preprocessed/230406_4.csv"

## Load and preprocess data

In [56]:
data_ms = pd.read_csv(
    MS_PATH, 
    dtype=str 
)
data_bh = pd.read_excel(
    BH_PATH,
    index_col=0,
    usecols=[7,8,11,12]
)

In [57]:
### preprocessing miniscope data

# drop 'rejected' cells
data_ms.drop(
    data_ms.T[(data_ms.loc[0] == ' rejected').values].index.values,
    axis=1,
    inplace = True
)
# drop first row
data_ms = data_ms.iloc[1:, :]
# set data type as float
data_ms = data_ms.astype(float)
# set time bins to index
col = data_ms.columns[0]
data_ms = data_ms.set_index(col, drop=True)

In [58]:
data_ms

Unnamed: 0,C000,C001,C007,C012,C015,C016,C018,C026,C027,C044,...,C106,C107,C108,C109,C111,C112,C113,C121,C122,C123
,,,,,,,,,,,,,,,,,,,,,
0.000000,5.715842,11.808660,1.889938,-8.335680,-1.104908,15.690330,6.397574,6.116627,17.377970,2.570580,...,3.148440,3.673967,2.117704,30.01566,9.556564,6.629687,43.218050,64.89483,0.143579,0.705199
0.099922,4.427123,8.886618,0.312414,-10.627490,-0.582809,16.624350,8.619368,3.186352,16.857170,0.063166,...,3.062823,3.094044,3.615826,28.94102,8.577761,4.997211,41.379890,68.29801,0.143579,2.135915
0.199844,3.669991,5.428589,-3.437951,-11.068290,-3.143800,14.846760,12.427110,1.100938,14.535240,2.068407,...,-0.606291,5.318264,1.713318,28.68896,8.757579,5.755637,39.619100,65.61420,0.143579,2.977342
0.299766,2.712448,6.455625,-4.491973,-10.822400,-3.871136,14.506450,13.250220,1.112311,15.458080,0.278717,...,2.320547,3.801603,2.030378,31.34004,11.033110,4.727316,37.932430,64.87019,0.143581,4.217113
0.399688,4.136421,5.616803,-5.243358,-9.862790,-2.886226,15.892000,12.179090,1.096319,14.069780,-0.281914,...,1.124703,3.674324,1.947773,29.86197,12.474910,4.886545,36.316750,62.34306,0.681960,4.631794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4293.648340,1.668610,-2.579062,-0.550747,3.149296,4.043642,-4.458192,21.248250,-4.125863,-1.356978,5.155404,...,-1.521955,3.457397,-2.870967,25.68709,-1.788077,5.334601,-0.329193,14.76855,0.148824,0.705229
4293.748262,2.696718,-5.923594,1.066803,5.167014,4.877862,-4.498180,20.776780,-3.260844,-0.720913,5.020520,...,-2.726281,4.019667,-0.929688,26.21749,-2.015017,4.145714,-0.334518,14.57584,0.164586,0.705228
4293.848184,1.070258,-4.203601,2.403453,2.523346,5.711647,-4.719995,19.281060,-1.950639,0.841145,4.263561,...,-1.807811,4.466743,-1.291476,25.53921,-0.709627,2.652637,-0.339620,14.39203,0.163518,0.705226


In [59]:
### encoding states

# data_bh.index = np.round(data_bh.index, 1)
data_ms['state'] = np.NaN

# create a stack
label_stack = [LABEL['none']]

# change data_bh indices
index_list = [np.argmin(abs(data_ms.index - index)) for index in data_bh.index.values]
data_bh.set_index(data_ms.iloc[index_list].index, inplace=True)
bh_index = data_bh.index

# 옵저버 기록 전후 행 삭제
data_ms = data_ms.loc[bh_index[0]:bh_index[-1], :]

# normalize miniscope data using 'closeopen' state (Denoising)
LABEL['closeopen'] = -2

for index, row in data_ms.iterrows():
    # iteration 마지막에 stack에서 top 꺼낼지 여부
    top_stack = True
    # if start/stop time bins
    if index in bh_index:
        bh_row = data_bh.loc[index]
        event_type = bh_row['Event_Type']
        
        ### 같은 time index에 두 행 있을 경우 처리
        if type(event_type) != str:
            for _, r in bh_row.iterrows():
                e = r.Event_Type
                if e == 'State start':
                    label_stack.append(LABEL[r.Behavior])     
                elif e == 'State stop':
                    row['state'] = label_stack.pop()
                    top_stack = False
                elif e == 'State point':
                    row['state'] = LABEL[r.Behavior]
                    top_stack = False
            
        # if event type is start time, append state to stack
        elif event_type == 'State start':
            label_stack.append(LABEL[bh_row.Behavior])     
            
        # if it's stop time, pop state from stack
        elif event_type == 'State stop':
            row['state'] = label_stack.pop()
            top_stack = False        
            
        # if it's state point (hitass), set state directly without using stack
        elif event_type == 'State point':
            row['state'] = LABEL[bh_row.Behavior]
            top_stack = False
            
    if top_stack:
        row['state'] = label_stack[-1] 

### Sanity check

In [60]:
data_ms

Unnamed: 0,C000,C001,C007,C012,C015,C016,C018,C026,C027,C044,...,C107,C108,C109,C111,C112,C113,C121,C122,C123,state
,,,,,,,,,,,,,,,,,,,,,
0.299766,2.712448,6.455625,-4.491973,-10.822400,-3.871136,14.506450,13.250220,1.112311,15.458080,0.278717,...,3.801603,2.030378,31.34004,11.033110,4.727316,37.932430,64.87019,0.143581,4.217113,-2.0
0.399688,4.136421,5.616803,-5.243358,-9.862790,-2.886226,15.892000,12.179090,1.096319,14.069780,-0.281914,...,3.674324,1.947773,29.86197,12.474910,4.886545,36.316750,62.34306,0.681960,4.631794,-2.0
0.499610,4.047854,4.856211,-2.395521,-11.376040,-3.740893,16.702880,11.272240,-0.235075,13.409230,0.458656,...,2.011914,0.332898,28.82091,10.710060,4.470281,37.654530,61.16214,0.659788,5.794085,-2.0
0.599532,4.762739,4.531533,-3.704847,-9.659214,-2.382339,14.614220,9.931366,-0.121813,13.281140,1.671992,...,2.982294,0.239735,29.91220,11.090070,3.403138,38.197310,60.74880,1.034582,6.106163,-2.0
0.699454,4.274189,7.940381,-3.353973,-9.182802,-2.847416,15.552420,8.627383,0.287740,12.629960,-1.583555,...,3.808149,2.580726,29.82021,12.068680,4.757610,41.121400,58.41312,3.991966,6.328224,-2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4144.364872,2.963099,2.691105,0.606657,-2.615818,7.644739,-0.850173,11.294030,3.738533,2.237466,-0.874645,...,1.800276,-4.241229,24.16015,0.629548,2.371849,4.576415,17.84483,0.145457,0.709357,0.0
4144.464794,3.592639,1.311760,1.175878,-3.212903,7.335397,-0.564773,12.292020,4.162464,2.184093,-0.951991,...,1.511920,-2.090998,23.54704,1.361119,1.594632,4.362917,17.51930,0.153142,0.709186,0.0
4144.564716,2.928355,1.332991,1.216118,-2.143024,6.492171,-0.907536,11.443350,4.719322,2.805494,-0.605797,...,1.810926,-1.730931,23.27335,0.776128,3.351878,4.158478,17.20839,0.152673,0.709021,0.0


In [61]:
data_ms.loc[data_ms['state'] == -1.0]

Unnamed: 0,C000,C001,C007,C012,C015,C016,C018,C026,C027,C044,...,C107,C108,C109,C111,C112,C113,C121,C122,C123,state
,,,,,,,,,,,,,,,,,,,,,
169.267868,24.27395,17.09728,16.84793,20.48719,36.98659,8.425213,-5.105404,4.321941,8.145644,1.767764,...,15.27912,14.60467,32.65754,16.16239,7.25703,109.7704,32.95934,146.3655,116.2253,-1.0
319.550556,21.40993,16.36201,23.31762,25.04434,5.900453,4.256164,-3.178198,11.48046,10.66917,7.516214,...,6.872107,1.011128,37.17455,18.63725,3.6291,82.40275,33.70695,84.09481,65.16651,-1.0
467.235272,15.97418,36.62222,12.01812,13.14509,4.248568,8.127208,-8.002085,5.319273,1.086866,7.944222,...,6.04011,-0.309134,32.14541,12.55025,2.104243,77.26153,27.39408,64.76811,48.53705,-1.0
679.369678,16.56319,31.91365,12.71395,10.54283,7.35799,7.020969,-6.034402,7.826013,-5.021774,2.690978,...,0.331028,0.323145,31.78572,8.400416,4.095094,64.55247,26.09643,53.69564,44.74108,-1.0
842.742148,7.622662,25.75361,11.90842,0.861582,7.064401,10.31203,-0.354058,2.41604,1.858704,-2.789501,...,-0.484116,0.754422,25.90602,10.32547,-1.887648,45.20974,42.2025,41.57227,29.37689,-1.0
1020.503386,14.0969,19.92344,12.8696,9.247717,6.184079,1.735517,-3.855909,4.055887,6.718735,-1.641262,...,5.603517,0.477057,25.75005,5.021712,3.038965,43.61999,15.17315,39.41102,25.94064,-1.0
1201.06244,8.508431,19.64888,6.616348,3.535691,3.459334,6.941177,0.251197,1.987734,4.019571,4.672408,...,4.654449,1.909568,27.66624,6.611865,3.506232,39.48427,34.3751,38.7148,33.92705,-1.0
1352.544192,3.806245,21.04966,1.863225,6.568713,7.211678,5.214847,4.657138,4.730196,0.981937,2.768516,...,5.164181,1.756727,28.88931,4.247213,0.777095,33.67995,16.49684,25.00925,28.06748,-1.0
1517.515414,6.418666,21.35517,5.23328,-1.196512,6.755905,6.248965,0.924788,5.67707,1.133269,1.873746,...,1.672386,-1.18667,28.63458,6.060118,0.024961,27.70869,31.92944,20.66191,16.10917,-1.0


## Save data to csv

In [63]:
data_ms.to_csv(OUT_PATH, header=False)