In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
import fit_glm_helpers as fgh

In [3]:
dir_data = Path('/Users/josh/Documents/Harvard/GLM/sabatinilab-glm/data/old-data-version/raw-new/Figure_1_2')
dir_output = Path('/Users/josh/Desktop/example_output_folder')

lst_dict_inputdata = [
    {'session_id': 'WT63_11082021',
    'filepath_signal': dir_data / Path('GLM_SIGNALS_WT69_12192021.txt'),
    'filepath_trial': dir_data / Path('GLM_TABLE_WT69_12192021.txt'),
    'bool_trialTable_matlab_indexed': True,
    'columnName_trialTable_trialId': None,
    'columnRenames_signal': {'Ch1': 'gDA', 'Ch5': 'gACH'},
    'columnRenames_trial': None},
]

dir_output.mkdir(parents=True, exist_ok=True)

In [4]:
columnName_alignment_trial_start = 'photometryCenterInIndex'
columnName_alignment_trial_end = 'photometrySideOutIndex'

# Note: Alignment values of 0 for Matlab-indexed trial tables will be treated as "no-data" values
# and and -1 for Python-indexed trial tables. Matlab-indexed trial tables should only have values
# >= 0 in and >= -1 in Python.
lst_strColumns_alignment = [
    'photometryCenterInIndex',
    'photometryCenterOutIndex',
    'photometrySideInIndex',
    'photometrySideOutIndex',
]

lst_strColumns_information = [
    'nTrial_raw', 'hasAllPhotometryData',
    'wasRewarded', 'word',
]

In [5]:
# trial

In [30]:
for dict_inputdata in lst_dict_inputdata:
    trial = pd.read_csv(dict_inputdata['filepath_trial'])
    trial = trial[trial['hasAllPhotometryData'] != 0]
    signal = pd.read_csv(dict_inputdata['filepath_signal'])

    signal_dense_numerical = fgh.Matrix(*[
        fgh.construct_vector_or_matrix(
            name=name,
            values=signal[name].values,
            nrows=signal.shape[0],
            fill_values=np.nan,
            dtype='numerical',
        ) for name in ['Ch1', 'Ch5']
    ])
    
    lst_signal_sparse_numerical = [
        (name, fgh.densified_sparse_to_sparse(signal[name], fill_value=0)) for name in [
            'centerOcc',
            'centerIn',
            'centerOut',
            'rightOcc',
            'rightIn',
            'rightOut',
            'rightLick',
            'leftOcc',
            'leftIn',
            'leftOut',
            'leftLick'
            ]
    ]
    signal_sparse_numerical = fgh.Matrix(*[
        fgh.construct_vector_or_matrix(
            name=name,
            indices=dct_sparsified['indices'],
            values=dct_sparsified['values'],
            nrows=dct_sparsified['nrows'],
            fill_values=np.nan,
            dtype='numerical',
        ) for name, dct_sparsified in lst_signal_sparse_numerical
    ])

    signal_sparse_categorical = fgh.Matrix(*[
        fgh.construct_vector_or_matrix(
            name=name,
            values=signal[name].values,
            nrows=signal.shape[0],
            fill_values=0,
            dtype='categorical',
        ) for name in ['reward', 'noreward']
    ])

    lst_table_sparse_cateegorical = [
        (
            indices_name,
            values_name
         ) for indices_name in [
            'photometryCenterInIndex',
            'photometryCenterOutIndex',
            'photometrySideInIndex',
            'photometrySideOutIndex',
            'photometryFirstLickIndex',
        ] for values_name in [
            'wasRewarded',
            'word',
        ]
    ]

    table_sparse_categorical = fgh.Matrix(*[
        fgh.construct_vector_or_matrix(
            name=(indices_name, values_name),
            indices=trial[indices_name].values,
            values=trial[values_name].astype(str).values,
            nrows=signal.shape[0],
            fill_values=0,
            dtype='categorical',
        ) for indices_name, values_name in lst_table_sparse_cateegorical
    ])

    lst_table_sparse_numerical = [
        (
            indices_name,
            values_name
         ) for indices_name in [
            'photometryCenterInIndex',
            'photometryCenterOutIndex',
            'photometrySideInIndex',
            'photometrySideOutIndex',
            'photometryFirstLickIndex',
        ] for values_name in [
            'hasAllPhotometryData',
            'choseLeft',
            'choseRight',
            'leftRewardProb',
            'rightRewardProb',
        ]
    ]

    table_sparse_numerical = fgh.Matrix(*[
        fgh.construct_vector_or_matrix(
            name=(indices_name, values_name),
            indices=trial[indices_name].values,
            values=trial[values_name].values,
            nrows=signal.shape[0],
            fill_values=np.nan,
            dtype='numerical',
        ) for indices_name, values_name in lst_table_sparse_numerical
    ])

    display(signal_dense_numerical.topd())
    display(signal_sparse_numerical.topd())
    display(signal_sparse_categorical.topd())
    display(signal_sparse_numerical.topd())
    display(table_sparse_categorical.topd())
    display(table_sparse_numerical.topd())

    # signal_sparse_matrices = fgh.Matrix(*[fgh.Vector(name, values=signal[name], nrows=signal.shape[0]) for name in ['Ch1', 'Ch5']])

    # display(trial)
    # display(signal)

    # display(dense_vectors.topd())

Unnamed: 0,"(Ch1,)","(Ch5,)"
0,-2.515587,0.087224
1,-1.660977,-0.159638
2,-2.155949,-0.408390
3,-0.868106,-0.799495
4,-0.477200,-0.422909
...,...,...
29845,-2.426426,0.590765
29846,-2.861054,-1.745552
29847,-2.314100,-0.742046
29848,-1.455169,-0.879324


Unnamed: 0,"(centerOcc,)","(centerIn,)","(centerOut,)","(rightOcc,)","(rightIn,)","(rightOut,)","(rightLick,)","(leftOcc,)","(leftIn,)","(leftOut,)","(leftLick,)"
0,,,,,,,,1.0,,,1.0
1,,,,,,,,1.0,,,
2,,,,,,,,1.0,,,1.0
3,,,,,,,,1.0,,,
4,,,,,,,,1.0,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...
29845,,,,,,,,,,,
29846,,,,,,,,,,,
29847,,,,,,,,,,,
29848,,,,,,,,,,,


Unnamed: 0,"(reward, 0)","(reward, 1)","(noreward, 0)","(noreward, 1)"
0,1,0,0,1
1,1,0,0,1
2,1,0,0,1
3,1,0,0,1
4,1,0,0,1
...,...,...,...,...
29845,1,0,1,0
29846,1,0,1,0
29847,1,0,1,0
29848,1,0,1,0


Unnamed: 0,"(centerOcc,)","(centerIn,)","(centerOut,)","(rightOcc,)","(rightIn,)","(rightOut,)","(rightLick,)","(leftOcc,)","(leftIn,)","(leftOut,)","(leftLick,)"
0,,,,,,,,1.0,,,1.0
1,,,,,,,,1.0,,,
2,,,,,,,,1.0,,,1.0
3,,,,,,,,1.0,,,
4,,,,,,,,1.0,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...
29845,,,,,,,,,,,
29846,,,,,,,,,,,
29847,,,,,,,,,,,
29848,,,,,,,,,,,


Unnamed: 0,"((photometryCenterInIndex, wasRewarded), 0)","((photometryCenterInIndex, wasRewarded), 1)","((photometryCenterInIndex, word), AA)","((photometryCenterInIndex, word), AB)","((photometryCenterInIndex, word), Aa)","((photometryCenterInIndex, word), Ab)","((photometryCenterInIndex, word), aA)","((photometryCenterInIndex, word), aB)","((photometryCenterInIndex, word), aa)","((photometryCenterInIndex, word), ab)",...,"((photometryFirstLickIndex, wasRewarded), 0)","((photometryFirstLickIndex, wasRewarded), 1)","((photometryFirstLickIndex, word), AA)","((photometryFirstLickIndex, word), AB)","((photometryFirstLickIndex, word), Aa)","((photometryFirstLickIndex, word), Ab)","((photometryFirstLickIndex, word), aA)","((photometryFirstLickIndex, word), aB)","((photometryFirstLickIndex, word), aa)","((photometryFirstLickIndex, word), ab)"
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,1,0,0,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29845,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29846,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29847,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29848,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,"((photometryCenterInIndex, hasAllPhotometryData),)","((photometryCenterInIndex, choseLeft),)","((photometryCenterInIndex, choseRight),)","((photometryCenterInIndex, leftRewardProb),)","((photometryCenterInIndex, rightRewardProb),)","((photometryCenterOutIndex, hasAllPhotometryData),)","((photometryCenterOutIndex, choseLeft),)","((photometryCenterOutIndex, choseRight),)","((photometryCenterOutIndex, leftRewardProb),)","((photometryCenterOutIndex, rightRewardProb),)",...,"((photometrySideOutIndex, hasAllPhotometryData),)","((photometrySideOutIndex, choseLeft),)","((photometrySideOutIndex, choseRight),)","((photometrySideOutIndex, leftRewardProb),)","((photometrySideOutIndex, rightRewardProb),)","((photometryFirstLickIndex, hasAllPhotometryData),)","((photometryFirstLickIndex, choseLeft),)","((photometryFirstLickIndex, choseRight),)","((photometryFirstLickIndex, leftRewardProb),)","((photometryFirstLickIndex, rightRewardProb),)"
0,,,,,,,,,,,...,,,,,,1.0,0.0,1.0,0.85,0.15
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29845,,,,,,,,,,,...,,,,,,,,,,
29846,,,,,,,,,,,...,,,,,,,,,,
29847,,,,,,,,,,,...,,,,,,,,,,
29848,,,,,,,,,,,...,,,,,,,,,,


In [35]:
table_sparse_numerical.topd()[(~table_sparse_numerical.topd().isna()).sum(axis=1) != 0]

Unnamed: 0,"((photometryCenterInIndex, hasAllPhotometryData),)","((photometryCenterInIndex, choseLeft),)","((photometryCenterInIndex, choseRight),)","((photometryCenterInIndex, leftRewardProb),)","((photometryCenterInIndex, rightRewardProb),)","((photometryCenterOutIndex, hasAllPhotometryData),)","((photometryCenterOutIndex, choseLeft),)","((photometryCenterOutIndex, choseRight),)","((photometryCenterOutIndex, leftRewardProb),)","((photometryCenterOutIndex, rightRewardProb),)",...,"((photometrySideOutIndex, hasAllPhotometryData),)","((photometrySideOutIndex, choseLeft),)","((photometrySideOutIndex, choseRight),)","((photometrySideOutIndex, leftRewardProb),)","((photometrySideOutIndex, rightRewardProb),)","((photometryFirstLickIndex, hasAllPhotometryData),)","((photometryFirstLickIndex, choseLeft),)","((photometryFirstLickIndex, choseRight),)","((photometryFirstLickIndex, leftRewardProb),)","((photometryFirstLickIndex, rightRewardProb),)"
0,,,,,,,,,,,...,,,,,,1.0,0.0,1.0,0.85,0.15
1223,1.0,1.0,0.0,0.85,0.15,,,,,,...,,,,,,,,,,
1226,,,,,,1.0,1.0,0.0,0.85,0.15,...,,,,,,,,,,
1240,,,,,,,,,,,...,,,,,,,,,,
1243,,,,,,,,,,,...,,,,,,1.0,1.0,0.0,0.85,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29105,1.0,1.0,0.0,0.85,0.15,,,,,,...,,,,,,,,,,
29108,,,,,,1.0,1.0,0.0,0.85,0.15,...,,,,,,,,,,
29129,,,,,,,,,,,...,,,,,,,,,,
29132,,,,,,,,,,,...,,,,,,1.0,1.0,0.0,0.85,0.15


In [25]:
bool_drop_zeroAlignments = True

trialSignalAligned_agg = fgh.TrialSignalAlignerAggregator()

for dict_inputdata in lst_dict_inputdata:
    # Load data
    trial = fgh.TrialPreprocessor(pd.read_csv(dict_inputdata['filepath_trial']))
    signal = fgh.SignalPreprocessor(pd.read_csv(dict_inputdata['filepath_signal']))

    # Preprocess trial table
    trial.preprocess();
    signal.preprocess();

    # Trial / signal alignment
    trialSignalAligned = fgh.TrialSignalAligner(trial, signal)
    trialSignalAligned.align();
    trialSignalAligned.trialstamp();
    trialSignalAligned.timestamp();

    # Aggregate
    trialSignalAligned_agg.add(trialSignalAligned);

trialSignalAligned_agg.combine();

AttributeError: module 'fit_glm_helpers' has no attribute 'TrialSignalAlignerAggregator'

In [None]:
# Generate prediction dataframe X, prediction dataframe y
predictors = ['predictor_1', 'predictor_2']
response = 'y'
trialSignalAligned_agg.generate_Xy(predictors, response);

# Unroll specified X columns into onehot representations
trialSignalAligned_agg.unroll_X_columns(['predictor_1', 'predictor_2']);

# Timeshift X columns
trialSignalAligned_agg.timeshift_X_columns(['predictor_1', 'predictor_2'], shift_amt=1);

# Split train/validation/test sets
trialSignalAligned_agg.split_train_validation_test();

# Fit GLM
glm = fgh.GLM(trialSignalAligned_agg);
glm.fit_GLM();
glm.generate_GLM_summary();
glm.plot_GLM_summary();

# Generate predictions for train/validation/test sets. Evaluate predictions on train/validation/test sets.
glm.generate_predictions();
glm.evaluate_predictions();
glm.generate_prediction_plots();

# Save preprocessing parameters
trial.save_preprocessing_info(dir_output / Path('trial_preprocessing_info.json'));
signal.save_preprocessing_info(dir_output / Path('signal_preprocessing_info.json'));

# Save alignment parameters
trialSignalAligned.save_alignment_info(dir_output / Path('alignment_info.json'));

# Save aggregation parameters
trialSignalAligned_agg.save_aggregation_info(dir_output / Path('aggregation_info.json'));

# Save GLM parameters
glm.save_GLM_info(dir_output / Path('glm_info.json'));