In [1]:
import pandas as pd 
import numpy as np

In [2]:
raw_sessiondata = pd.read_csv("20210113-133853.csv", skiprows=6, delimiter = ';')

In [3]:
#Add a column for 'timestamps'
import time
import datetime
import ciso8601

timestamp = []
for i in range(len(raw_sessiondata.index)):
    t = raw_sessiondata['PC-TIME'].iloc[i]
    ts= ciso8601.parse_datetime(t)
    if i == 0:
        t0= time.mktime(ts.timetuple())+ts.microsecond/1000000
        timestamp.append(np.around(t0-t0,3))
    else:
        tnow = time.mktime(ts.timetuple())+ts.microsecond/1000000
        timestamp.append(np.around(tnow-t0,3))

raw_sessiondata['timestamps'] = timestamp

In [4]:
df_group_msg = raw_sessiondata.groupby('MSG')
df_group_info = raw_sessiondata.groupby('+INFO')
df_group_type = raw_sessiondata.groupby('TYPE')

# X Matrix

### Step 1: Get Reward Timestamp

In [5]:
# Get lick onsets 
df_openvalve = df_group_msg.get_group('open_valve')
timestamps_reward = df_openvalve.timestamps.to_numpy()

In [6]:
# Get Reward ON timestamp
evenindex = np.arange(0, len(timestamps_reward) + 2, 2)
rewardON = np.take(timestamps_reward, evenindex[:-1])

In [7]:
# Get Reward OFF timestamp
odd = evenindex-1
oddindex = odd[1:]
rewardOFF = np.take(timestamps_reward, oddindex)

### Step 2: Get run speed BEFORE each reward (X1)

Here, I am getting run speed 10s before reward delivery, totally arbituary

In [8]:
df_run = pd.read_hdf('session.running_speed1ms.h5')

In [9]:
prereward = []
for i in range(len(rewardON)):
    a = df_run[(rewardON[0] - 10 <df_run['timestamps']) & (df_run['timestamps']<rewardON[0])]['speed1ms']
    prereward.append(a)

speed_prereward = np.array(prereward)

In [10]:
np.shape(speed_prereward)

(75, 9999)

In [11]:
speed_prereward[74]

array([0.01955641, 0.01958587, 0.01961532, ..., 0.28117254, 0.28121181,
       0.28125108])

### Step 3: Get number of licks per reward; then append 0 (X2 - prior_numoflicks)

To calculate the number of lick per reward, I use the licks between
i) rewardON
ii) audioON for next reward

I'm usinge the audio timestamp of NEXT reward as cut off, bc I notice that they'd 
lick before the next reward (should be bc they knew the cue)

In [12]:
# Get Lick timestamps
df_port1out = df_group_info.get_group('Port1Out')
timestamps_lickon = df_port1out.timestamps.to_numpy()

In [13]:
# Find Audio timestamps ('BNC1High'), process so no duplicates
df_bnc1high = df_group_info.get_group('BNC1High')
a = df_bnc1high['timestamps'].diff().fillna(df_bnc1high['timestamps']) > 10
timestamps_bnc1high = df_bnc1high.loc[a].timestamps.to_numpy()

In [14]:
# Calculate number of lick during each reward
noflicks = [0]
for i in range(len(rewardON)-1):
    noflicks.append(((rewardON[i] < timestamps_lickon) 
                       & (timestamps_lickon < timestamps_bnc1high[i+1])).sum())

In [15]:
prior_numoflicks = np.array(noflicks)

In [16]:
np.shape(prior_numoflicks)

(75,)

#### Checking / verification

In [17]:
print('Begin time: ' + str(rewardON[1]))
print('Cut off time: ' + str(timestamps_bnc1high[2]))
print('Timestamps of licks: ' + str(timestamps_lickon[21:45]))
print('The counted number of licks in between: ' + 
      str(((rewardON[1] < timestamps_lickon)
           & (timestamps_lickon < timestamps_bnc1high[2])).sum()))

Begin time: 62.417
Cut off time: 82.933
Timestamps of licks: [62.371 62.529 62.71  62.855 63.009 63.177 63.333 63.494 63.634 63.797
 63.974 64.134 64.306 64.477 64.648 64.84  65.004 65.184 65.347 65.523
 65.696 65.863 66.416 83.178]
The counted number of licks in between: 22


### Step 4: Get volume of reward - 0/1/2 (X3 - rounded_reward_duration)

In [18]:
reward_duration = rewardOFF - rewardON
rounded_reward_duration = [round(reward_duration[i], 2) for i in range(len(reward_duration))]
print(rounded_reward_duration)

[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05]


In [19]:
np.shape(rounded_reward_duration)

(75,)

### Creating the Matrix

In [65]:
X_matrix = np.column_stack((speed_prereward, prior_numoflicks,rounded_reward_duration))  #need to figure out shape

In [66]:

X_matrix = X_matrix[:len(X_matrix)-1] #deleted first entry

In [67]:
np.shape(X_matrix)

(74, 10001)

# Y vector

In [68]:
y_numoflicks = np.delete(prior_numoflicks,0)

In [69]:
np.shape(y_numoflicks)

(74,)


# GLM Model

In [70]:
from sklearn.linear_model import LinearRegression

### Split Training and Testing Data

In [71]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_matrix, y_numoflicks, test_size=0.33, random_state=42)

In [72]:
#fit model to training data
model = LinearRegression().fit(X_train, y_train)

In [73]:
predictlick = model.predict(X_test)
predictlick


array([32.78791046, 29.26139069, 24.45250702, 37.91738892, 29.90258026,
       31.18494415, 29.26139069, 30.86435699, 30.86435699, 33.10849762,
       28.29961395, 27.33783722, 34.39086914, 28.29961395, 32.46731567,
       19.32302856, 27.97901917, 29.90258026, 18.36125183, 29.90258026,
       30.22316742, 28.94080353, 26.0554657 , 28.29961395, 27.65843201])

In [74]:
y_test


array([17, 26, 19, 14, 24, 30, 27, 37, 27, 17, 21, 23, 27, 33, 20, 20, 29,
       29, 22, 28, 17, 25, 32, 30, 26])