In [15]:
import numpy as np
import pandas as pd

# *** Set parameters ***

In [34]:
# State label dict
LABEL = {
    'closeopen': 0,
    'search': 1, 
    'fz': 1, 
    'eat': 1, 
    'back': 0, 
    'hitass': -1,
    'None': 2
}

# Open file path
MS_PATH = "../../data/230406_glp1_k3_day3_k20-2/221119_glp1-k1_day3.csv"
BH_PATH = "../../data/221117_glp1_ms_k1_day3 - k4-1_2_test - Event Logs/221117_glp1_ms_k1_day3 - k4-1_2_test - Event Logs.xlsx"
# Output file path
OUT_PATH = "../../data/preprocessed/230406_1.csv"

## Load and preprocess data

In [35]:
data_ms = pd.read_csv(
    MS_PATH, 
    dtype=str 
)
data_bh = pd.read_excel(
    BH_PATH,
    index_col=0,
    usecols=[7,8,11,12]
)

In [36]:
### preprocessing miniscope data

# drop 'rejected' cells
data_ms.drop(
    data_ms.T[(data_ms.loc[0] == ' rejected').values].index.values,
    axis=1,
    inplace = True
)
# drop first row
data_ms = data_ms.iloc[1:, :]
# set data type as float
data_ms = data_ms.astype(float)
# set time bins to index
# 반올림으로 같은 time bin 갖게 된 행 제거
col = data_ms.columns[0]
# data_ms[col] = np.round(data_ms[col], 1)
# data_ms.drop_duplicates(subset=col, keep='first', inplace=True, ignore_index=False)
data_ms = data_ms.set_index(col, drop=True)

In [37]:
data_ms

Unnamed: 0,C00,C01,C03,C04,C05,C06,C09,C10,C12,C13,...,C18,C21,C22,C24,C25,C26,C28,C29,C31,C32
,,,,,,,,,,,,,,,,,,,,,
0.000000,0.171015,22.418380,0.198023,0.012827,4.825103,11.109640,3.113960,2.944424,19.726520,-1.591319,...,21.206650,1.761721,-3.569320,2.584773,6.618809,3.905607,4.482467,11.126140,10.96196,5.692271
0.100006,0.825305,21.028750,-0.490734,0.290344,5.726901,8.332291,3.711962,2.290142,19.422390,-0.434306,...,21.422750,-0.497861,-4.651616,4.334144,8.022351,2.570588,4.249019,11.539490,10.80178,5.819719
0.200012,1.783895,22.415550,-1.252593,0.355578,3.193192,9.712613,3.637621,3.218112,19.298060,-2.874372,...,21.848880,-1.083246,-3.275992,3.766133,4.503247,2.956809,6.008953,11.430140,10.64939,5.681325
0.300018,3.476991,22.193840,-0.922551,-1.103301,4.841261,9.091285,2.008355,2.436031,17.696360,-0.432628,...,22.937640,-1.892769,-4.876404,2.683629,7.939161,2.193900,4.721345,12.498940,10.50440,5.550370
0.400024,1.003725,22.603040,-0.413539,1.051069,5.055044,9.740983,2.359630,2.872116,20.300500,-0.998292,...,24.311950,-1.717861,-3.723146,2.211510,6.451462,4.891202,5.188851,10.631950,10.68224,5.713310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1488.989334,-5.718642,5.930986,1.737285,-0.669346,2.534343,-3.432742,1.762375,3.707768,-5.545384,1.765916,...,3.086156,3.378594,-4.113146,-6.700840,6.126401,-4.036357,6.712784,-6.289555,13.20696,18.119460
1489.089340,-5.681981,7.571994,1.469579,-0.648739,2.414392,-4.872600,2.396659,1.880589,-5.437061,2.043131,...,4.586025,3.228221,-0.173072,-8.079729,7.065634,-4.085526,7.517140,-6.253339,13.08624,17.450050
1489.189346,-5.526730,6.866599,1.910069,-1.992529,0.770806,-4.270736,3.235621,2.861801,-5.614554,1.589612,...,2.692150,3.706268,-1.981946,-7.099876,7.329717,-3.665212,5.314848,-7.346855,12.82833,16.810890


In [38]:
### encoding states

# data_bh.index = np.round(data_bh.index, 1)
data_ms['state'] = np.NaN

# create a stack
label_stack = [LABEL['None']]

# change data_bh indices
index_list = [np.argmin(abs(data_ms.index - index)) for index in data_bh.index.values]
data_bh.set_index(data_ms.iloc[index_list].index, inplace=True)
bh_index = data_bh.index

# 옵저버 기록 전후 행 삭제
data_ms = data_ms.loc[bh_index[0]:bh_index[-1], :]

# normalize miniscope data using 'closeopen' and 'None' states (Denoising)
LABEL['closeopen'] = -2
LABEL['None'] = -2

for index, row in data_ms.iterrows():
    # iteration 마지막에 stack에서 top 꺼낼지 여부
    top_stack = True
    # if start/stop time bins
    if index in bh_index:
        bh_row = data_bh.loc[index]
        event_type = bh_row['Event_Type']
        
        ### 같은 time index에 두 행 있을 경우 처리
        if type(event_type) != str:
            for _, r in bh_row.iterrows():
                e = r.Event_Type
                if e == 'State start':
                    label_stack.append(LABEL[r.Behavior])     
                elif e == 'State stop':
                    row['state'] = label_stack.pop()
                    top_stack = False
                elif e == 'State point':
                    row['state'] = LABEL[r.Behavior]
                    top_stack = False
            
        # if event type is start time, append state to stack
        elif event_type == 'State start':
            label_stack.append(LABEL[bh_row.Behavior])     
            
        # if it's stop time, pop state from stack
        elif event_type == 'State stop':
            row['state'] = label_stack.pop()
            top_stack = False        
            
        # if it's state point (hitass), set state directly without using stack
        elif event_type == 'State point':
            row['state'] = LABEL[bh_row.Behavior]
            top_stack = False
            
    if top_stack:
        row['state'] = label_stack[-1] 

### Sanity check

In [39]:
data_ms

Unnamed: 0,C00,C01,C03,C04,C05,C06,C09,C10,C12,C13,...,C21,C22,C24,C25,C26,C28,C29,C31,C32,state
,,,,,,,,,,,,,,,,,,,,,
23.701422,0.419798,17.658980,-1.197448,-3.112700,4.055942,9.149645,3.088796,3.386344,12.084540,0.358368,...,3.144708,-2.153414,-2.119368,4.185096,3.216691,4.057753,4.135573,15.88730,21.14071,-2.0
23.801428,-0.757602,19.689010,1.121498,-1.994230,2.677482,10.510790,3.534982,5.066087,12.297940,0.181497,...,3.580197,-3.575171,-3.755034,3.959248,1.661980,4.792258,2.995709,16.31637,20.24832,-2.0
23.901434,-1.358366,21.561530,3.070156,-2.308824,2.899809,11.641970,3.425586,3.512117,12.349990,-0.425252,...,2.868190,-3.599114,-2.443169,3.136915,0.749585,3.588421,2.438945,16.80287,19.40084,-2.0
24.001440,-2.359787,21.414790,2.842077,-1.888866,5.915239,12.040240,2.907575,4.446149,9.925570,1.171128,...,1.960131,-5.091083,-1.585526,4.374778,2.226050,3.674497,2.226899,19.93534,18.59597,-2.0
24.101446,-2.763496,22.847270,5.549794,-1.515143,5.773180,11.361550,3.031367,4.722466,12.456870,0.222305,...,2.821397,-3.192308,-2.658869,4.314807,-0.902089,2.567722,2.648017,20.54276,17.83158,-2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1388.383298,2.886639,6.680945,-1.635555,6.864712,0.292465,1.123801,-0.063859,2.675812,-1.520176,0.632476,...,11.743810,-1.040690,8.254949,9.909923,2.568978,8.255332,3.114483,10.75727,18.54849,0.0
1388.483304,4.358690,7.123236,0.806808,6.458163,2.963036,0.665990,-0.170439,2.706038,-2.150443,2.052006,...,12.578140,-1.260681,8.553136,7.644387,3.064840,9.413674,0.532774,10.60758,19.37854,0.0
1388.583310,3.784449,6.289456,0.444894,6.114028,1.558275,1.492257,0.862651,2.458404,-2.430160,1.696746,...,12.364300,-0.966332,7.798511,9.854277,0.728101,10.282750,1.596111,10.46515,21.06681,0.0


In [40]:
data_ms.loc[data_ms['state'] == -1.0]

Unnamed: 0,C00,C01,C03,C04,C05,C06,C09,C10,C12,C13,...,C21,C22,C24,C25,C26,C28,C29,C31,C32,state
,,,,,,,,,,,,,,,,,,,,,
152.809168,13.07774,38.87957,17.71831,6.503713,7.660601,9.516241,8.620409,17.41649,8.635416,10.57272,...,18.35174,0.140488,5.895337,5.929365,2.958535,2.91653,6.105658,49.96276,30.01712,-1.0
260.51563,-0.9952,40.54616,10.76984,4.298482,3.657757,12.5643,11.83052,38.95104,7.612161,4.980672,...,4.540019,-0.020072,16.80089,4.612062,-1.553362,1.305944,5.34371,53.40231,3.245324,-1.0
623.53741,-1.142846,13.50177,2.145303,9.314779,4.237191,5.498779,-1.614003,1.089649,4.193809,4.029302,...,35.69175,3.107684,14.84051,11.52955,13.57467,10.45897,10.85774,9.037474,53.0306,-1.0
827.949674,2.103891,8.934415,11.30254,11.12354,-1.25559,2.199848,-5.724321,0.561218,-6.768396,2.271063,...,24.10721,2.090399,1.408481,6.938406,5.509604,12.95879,0.147224,8.016674,42.00991,-1.0
955.957354,-0.926978,9.674389,0.27598,13.5157,-3.20296,2.141801,-4.44679,2.363237,-1.380562,-0.119463,...,17.44372,0.675316,19.96202,9.756667,12.98945,17.99666,8.993757,9.207211,39.01022,-1.0
1118.367098,-0.867554,2.564835,-0.544131,10.60665,0.696121,-0.20867,-0.695207,-0.455982,-3.452017,0.675162,...,22.66151,4.887672,6.994239,13.9013,8.786504,15.8918,3.047367,8.597585,30.64974,-1.0
1255.775342,1.134803,7.83812,-2.870888,10.04581,2.144816,2.621134,-0.00553,1.46507,-0.587993,-0.508779,...,18.61443,0.205614,12.48043,13.11509,7.876525,9.148367,3.310241,9.001521,25.84545,-1.0
1388.783322,2.942859,6.412489,1.277994,8.65117,1.543169,1.497443,0.448333,2.881346,-3.224654,-0.367442,...,9.733314,-0.698998,8.16119,10.66087,2.175071,11.12138,0.566643,10.20066,20.37123,-1.0


## Save data to csv

In [41]:
data_ms.to_csv(OUT_PATH, header=False)