In [1]:
import numpy as np
import pandas as pd

# *** Set parameters ***

In [2]:
# State label dict
LABEL = {
    'closeopen': 2,
    'search': 1, 
    'fz': 4, 
    'eat': 5, 
    'back': 3, 
    'hitass': 0,
}

# Open file path
MS_PATH = "../../data/230406_glp1_k3_day3_k20-2/230406_glp1_k2_day3_k20-1.csv"
BH_PATH = "../../data/221117_glp1_ms_k1_day3 - k4-1_2_test - Event Logs/221117_glp1_ms_k2_day3_nemo_cond_day0 - k20-1_2 - Event Logs.xlsx"
# Output file path
OUT_PATH = "../../data/preprocessed/230406_2.csv"

## Load and preprocess data

In [3]:
data_ms = pd.read_csv(
    MS_PATH, 
    dtype=str 
)
data_bh = pd.read_excel(
    BH_PATH,
    index_col=0,
    usecols=[7,8,11,12]
)

In [4]:
### preprocessing miniscope data

# drop 'rejected' cells
data_ms.drop(
    data_ms.T[(data_ms.loc[0] == ' rejected').values].index.values,
    axis=1,
    inplace = True
)
# drop first row
data_ms = data_ms.iloc[1:, :]
# set data type as float
data_ms = data_ms.astype(float)
# set time bins to index
# 반올림으로 같은 time bin 갖게 된 행 제거
col = data_ms.columns[0]
data_ms[col] = np.round(data_ms[col], 1)
data_ms.drop_duplicates(subset=col, keep='first', inplace=True, ignore_index=False)
data_ms = data_ms.set_index(col, drop=True)

In [5]:
### encoding states

data_bh.index = np.round(data_bh.index, 1)
data_ms['state'] = np.NaN

# create a stack
label_stack = [0]
bh_index = data_bh.index.values

for index, row in data_ms.iterrows():
    # if start/stop time bins
    if index in bh_index:
        event_type = data_bh.loc[index]['Event_Type']
        
        ### time 반올림 해서 start와 stop이 같은 time인 경우 있음. 이 경우 stop 먼저 해주고 start 해줘야 함.
        if type(event_type) != str:
            label_stack.pop()
            label_stack.append(LABEL[data_bh.loc[index].Behavior.iloc[-1]])
            row['state'] = label_stack[-1]            
            continue
            
        # if it's start time, append state to stack
        elif event_type == 'State start':
            label_stack.append(LABEL[data_bh.loc[index].Behavior])
            
        # if it's stop time, pop state from stack
        elif event_type == 'State stop':
            label_stack.pop()
    row['state'] = label_stack[-1]

In [6]:
data_ms

Unnamed: 0,C00,C01,C05,C09,C10,C15,C27,C29,C33,C35,C39,C46,C48,C50,C55,C61,C68,C69,state
,,,,,,,,,,,,,,,,,,,
0.0,34.721820,3.968467,29.127550,1.096750,-1.822013,-5.217453,17.071450,45.06568,5.111565,7.643876,-1.865263,14.436450,-0.660960,3.450983,10.201640,21.176840,18.601570,36.131890,0.0
0.1,33.668620,4.194504,27.218110,1.850034,-3.634366,-6.478542,14.928690,46.78782,4.850162,7.745542,0.905539,17.356490,0.163054,4.573467,10.414080,19.871050,17.691920,37.048480,0.0
0.2,32.748620,4.489760,28.881230,0.371466,-2.645320,-7.800433,15.342380,44.07159,3.812214,7.921502,1.845975,14.453610,1.146842,4.478676,10.212120,18.741290,14.173070,39.092530,0.0
0.3,33.476530,6.450552,27.963340,1.867389,-3.359474,-8.446368,15.980860,42.92502,0.908657,8.293480,-1.770817,13.958130,-1.509374,3.376199,10.725780,19.185280,16.151740,43.012700,0.0
0.4,32.803300,5.576483,25.934690,2.203176,-3.156875,-8.749483,15.388280,40.79117,4.065714,8.728731,-2.771173,13.332390,-1.105820,3.414403,12.523740,22.273240,20.097910,46.585670,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4807.2,-1.805038,-1.792474,0.317834,-0.268760,-0.527100,0.232596,8.082303,14.65073,1.612810,0.301799,-1.099852,1.143292,3.029811,-0.060717,7.209540,4.684553,1.888751,10.169030,0.0
4807.3,-1.527060,-2.123056,0.613787,-0.960691,1.299239,0.134930,7.517845,13.53818,0.310445,1.208845,0.670756,-1.345028,1.217132,-0.801101,6.083171,4.653456,2.908509,9.854219,0.0
4807.4,-2.375218,-0.984350,-1.307139,0.472056,1.834057,0.014562,10.116710,14.82881,3.801037,0.321928,2.676258,-0.857935,-0.075501,0.823924,5.980522,6.998259,2.266229,10.075390,0.0


## Save data to csv

In [7]:
data_ms.to_csv(OUT_PATH, header=False)