In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import functools
import itertools

# Specify File to Preprocess

In [2]:
session_id = 'WT69_12192021'
filepath_signal = '/Users/josh/Documents/Harvard/GLM/sabatinilab-glm/data/old-data-version/raw-new/Figure_1_2/GLM_SIGNALS_WT69_12192021.txt'
filepath_behavior = '/Users/josh/Documents/Harvard/GLM/sabatinilab-glm/data/old-data-version/raw-new/Figure_1_2/GLM_TABLE_WT69_12192021.txt'
filepath_output = '/Users/josh/Desktop/signal_output_sample.csv'

In [3]:
alignment_columns = [
    'photometryCenterInIndex', 'photometryCenterOutIndex',
    'photometrySideInIndex', 'photometrySideOutIndex',
]

behavior_columns = [
    'nTrial', 'hasAllPhotometryData',
    'rew_0', 'rew_1',
    'wd_AA', 'wd_Aa', 'wd_aA', 'wd_aa',
    'wd_AB', 'wd_Ab', 'wd_aB', 'wd_ab',
]

lst_str_cartesian_series = list(
    itertools.product([column_alignment for column_alignment in alignment_columns],
                      [column_behavior for column_behavior in behavior_columns])
)

In [4]:
df_signal_raw = pd.read_csv(filepath_signal)
df_behavior_raw = pd.read_csv(filepath_behavior).reset_index().rename(columns={'index': 'nTrial'})

In [5]:
df_signal = pd.concat([pd.DataFrame(pd.Series(0, index=df_signal_raw.columns)).T, df_signal_raw], axis=0).reset_index(drop=True) # Add a row of zeros to the top of the dataframe for MatLab compatibility
df_behavior = pd.concat([df_behavior_raw,
                                  pd.get_dummies(df_behavior_raw['wasRewarded'], prefix='rew'),
                                  pd.get_dummies(df_behavior_raw['word'], prefix='wd')], axis=1).copy()

In [7]:
behavior_to_signal_columns = (
  [{'output_column_name': alignment_column,
    'alignment_column': alignment_column,
    'fillValues_absent': 0,
    'fillValues_present': 1} for alignment_column in alignment_columns] +

  [{'output_column_name': f'{column_alignment}={column_behavior}',
    'alignment_column': column_alignment,
    'fillValues_absent': 0,
    'fillValues_present': df_behavior[column_behavior]} for column_alignment, column_behavior in lst_str_cartesian_series]
)

In [8]:
for dict_behavior_to_signal in behavior_to_signal_columns:
    df_signal[dict_behavior_to_signal['output_column_name']] = dict_behavior_to_signal['fillValues_absent']
    df_signal.iloc[df_behavior[dict_behavior_to_signal['alignment_column']].values,
                   list(df_signal.columns).index(dict_behavior_to_signal['output_column_name'])] = dict_behavior_to_signal['fillValues_present']

df_signal = df_signal.drop(0, axis=0).copy()

In [9]:
df_nTrial = df_signal[['photometryCenterInIndex=nTrial', 'photometryCenterOutIndex=nTrial', 'photometrySideInIndex=nTrial', 'photometrySideOutIndex=nTrial']].replace(0, np.nan)

df_signal['nTrial_raw'] = np.nan
df_signal['nTrial_raw'] = df_signal['nTrial_raw'].combine_first(df_nTrial['photometryCenterInIndex=nTrial']).combine_first(df_nTrial['photometryCenterOutIndex=nTrial']).combine_first(df_nTrial['photometrySideInIndex=nTrial']).combine_first(df_nTrial['photometrySideOutIndex=nTrial'])
df_signal['nTrial'] = df_signal['nTrial_raw'].ffill()
df_signal['nEndTrial'] = df_signal['nTrial_raw'].bfill()
df_signal = df_signal.drop('nTrial_raw', axis=1).copy()

df_signal['session_id'] = session_id

In [10]:
df_signal.to_csv(filepath_output, index=True)