In [None]:
import os
import h5py
import pandas as pd
from pathlib import Path
from harp_resources import process, utils

In [None]:
mouse_info = {'B2M1': {'sex': 'M', 'area': 'V2M'},
              'B2M4': {'sex': 'M', 'area': 'V2M'},
              'B2M5': {'sex': 'M', 'area': 'V2M'},
              'B2M6': {'sex': 'M', 'area': 'V2M'},
              'B3M1': {'sex': 'M', 'area': 'V2M'},
              'B3M2': {'sex': 'M', 'area': 'V2M'},
              'B3M3': {'sex': 'F', 'area': 'V1'},
              'B3M4': {'sex': 'M', 'area': 'V2M'},
              'B3M5': {'sex': 'M', 'area': 'V2M'},
              'B3M6': {'sex': 'F', 'area': 'V2M'},
              'B3M7': {'sex': 'F', 'area': 'V2M'},
              'B3M8': {'sex': 'F', 'area': 'V2M'},
             }

session_info = {'220824': 'day1',
                '230824': 'day2',
                '190824': 'day1',
                '200824': 'day2',
                '120824': 'day1',
                '130824': 'day2',
                '070824': 'day1',
                '080824': 'day2',
               }

In [None]:
def load_streams_from_h5s(data_paths):
    '''
    Takes list of H5 file paths and, loads streams into dictionary, and save to dictionary named by mouse ID
    '''
    #dict to save streams:
    reconstructed_dict = {} 
    # File path to read the HDF5 file
    for input_file in data_paths:
        name = input_file.split('/')[-1][-7:-3] # Given that file name is of format: resampled_streams_2024-08-22T13-13-15_B3M6.h5 
        
        if not os.path.exists(input_file):
            print(f'ERROR: {input_file} does not exist.')
            return None
    
        # Open the HDF5 file to read data
        with h5py.File(input_file, 'r') as h5file:
            print(f'reconstructing streams for mouse {input_file.split('/')[-1][-7:-3]}, from session folder: {input_file.split('/')[-3]}')
            # Read the common index (which was saved as Unix timestamps)
            common_index = h5file['HARP_timestamps'][:]
            
            # Convert Unix timestamps back to pandas DatetimeIndex
            # common_index = pd.to_datetime(common_index)
            
            # Initialize the dictionary to reconstruct the data
            reconstructed_streams = {}
            
            # Iterate through the groups (sources) in the file
            for source_name in h5file.keys():
                if source_name == 'HARP_timestamps':
                    # Skip the 'common_index' dataset, it's already loaded
                    continue
                
                # Initialize a sub-dictionary for each source
                reconstructed_streams[source_name] = {}
                
                # Get the group (source) and iterate over its datasets (streams)
                source_group = h5file[source_name]
                
                for stream_name in source_group.keys():
                    # Read the stream data
                    stream_data = source_group[stream_name][:]
                    
                    # Reconstruct the original pd.Series with the common index
                    reconstructed_streams[source_name][stream_name] = pd.Series(data=stream_data, index=common_index)
                
        reconstructed_dict[name] = reconstructed_streams
        print(f'  --> {input_file.split('/')[-1][-7:-3]} streams reconstructed and added to dictionary \n')
            

    return reconstructed_dict

## Defining paths for grab or G8

In [None]:
sensor = 'grab' #'grab' or 'g8'

In [None]:
rootdir = '/Volumes/RanczLab/20240730_Mismatch_Experiment/GRAB_MMclosed-and-open_190824'
h5_paths = []
eventpaths = []
for dirpath, subdirs, files in os.walk(rootdir):
    for x in files:
        if '.h5' in x:
            eventpaths.append(dirpath)
            h5_paths.append(dirpath+'/'+x)

In [None]:
'''if sensor == 'grab':
    paths = ['/Users/hildeteigen/Downloads/resampled_streams_2024-08-22T13-13-15_B3M6.h5']
    eventpaths = ['/Volumes/RanczLab/20240730_Mismatch_Experiment/GRAB_MMclosed-and-Regular_220824/2024-08-22T13-13-15_B3M6'] 
    
if sensor == 'g8':
    paths = ['/Users/hildeteigen/Downloads/resampled_streams_2024-08-22T13-13-15_B3M6.h5']
    eventpaths = ['/Volumes/RanczLab/20240730_Mismatch_Experiment/GRAB_MMclosed-and-Regular_220824/2024-08-22T13-13-15_B3M6'] '''

### Loading data streams

In [None]:
stream_dict_dict = load_streams_from_h5s(h5_paths)

In [None]:
stream_dict_dict['B3M8']['H1']

In [None]:
data_dict = {}
for mouse, streamdict in stream_dict_dict.items():
    #Getting fluorescence traces
    fluorescence = streamdict['Photometry']['470_dfF'] #Using '470_dfF' only

    #Getting mouse movement data and converting to cm / second
    movementX = process.running_unit_conversion(streamdict['H1']['OpticalTrackingRead0X(46)'])*100
    movementY = process.running_unit_conversion(streamdict['H1']['OpticalTrackingRead0Y(46)'])*100

    #Getting eye movements and pupil diameter
    #eye_center_x = streamdict['SleapVideoData2']['Ellipse.Center.X']
    #eye_center_y = streamdict['SleapVideoData2']['Ellipse.Center.Y']
    #eye_diameter = streamdict['SleapVideoData2']['Ellipse.Diameter']

    #Getting visual stimuli event times
    event = streamdict['ONIX']['Photodiode']
    
    time = movementX.index - movementX.index[0]
    
    dict = {'470_dfF': fluorescence, 'movementX': movementX, 'movementY': movementY, 'event': event,
        'Seconds': time}
    #dict = {'470_dfF': fluorescence, 'movementX': movementX, 'movementY': movementY, 'event': event,
     #   'TimeStamp': time, 'eye_x': eye_center_x, 'eye_y': eye_center_y, 'pupil_diameter': eye_diameter}

    df = pd.DataFrame(dict)

    #Reversing, so that a halt appearst when 'event'==True
    df['event'] = ~df['event']
    
    df.reset_index(inplace=False)
    
    data_dict[mouse] = df

In [None]:
data_dict['B3M7']

### Loading Experiment events and session info

In [None]:
event_dict = {}
for eventpath in eventpaths:
    ExpEvents = utils.read_ExperimentEvents(Path(eventpath))
    ExpEvents.set_index('Seconds', inplace = True)
    ExpEvents.index = ExpEvents.index.round(4)
    name = eventpath.split('/')[-1][-4:]
    ExpEvents['experiment'] = eventpath.split('/')[-2].split('_')[1]
    for key, item in session_info.items():
        if key in eventpath.split('/')[-2]:
            ExpEvents['session']=item
    event_dict[name] = ExpEvents


### Adding events (and non-events) and session info to data

In [None]:
event_dict['B3M6'].loc[event_dict['B3M6'].Value == 'LinearPlaybackMismatch block started']

In [None]:
data_dict['B3M6']#.loc[data_dict['B3M8'].index >=93949.2902]

In [None]:
'''def add_experiment_events(data_dict, events_dict):
    # Iterate over each mouse in the dictionaries
    for mouse_key in data_dict:
        main_df = data_dict[mouse_key]  # Large DataFrame (main data)
        event_df = events_dict[mouse_key]  # Small DataFrame (event data)

        # Ensure the index of the event_df is named 'Seconds' and has proper precision
        event_df.index.name = 'Seconds'
        
        # Resolve duplicate index values by keeping the first occurrence or handling them accordingly
        event_df = event_df[~event_df.index.duplicated(keep='first')]
        
        # Ensure the same for the main_df, if there are duplicates
        main_df = main_df[~main_df.index.duplicated(keep='first')]
        
        # Use pd.merge_asof to match the nearest milliseconds from main_df index to event_df index
        merged_df = pd.merge_asof(
            main_df,
            event_df[['Value']],  # Only bring in the 'Value' column
            left_index=True,  # main_df has time in its index
            right_index=True,  # event_df has time in its index (both in ms)
            direction='backward',  # Choose the closest event on or before the timestamp
            tolerance=0.005 # Adjust tolerance for closest matching (to milliseconds)
        )
        
        # Add 'ExperimentEvents' column to main_df from merged_df
        main_df['ExperimentEvents'] = merged_df['Value']
        
        # Add metadata from event_df
        main_df['Experiment'] = event_df['experiment'].unique()[0]
        main_df['Session'] = event_df['session'].unique()[0]
        
        # Add mouse ID, sex, and brain area
        main_df['mouseID'] = mouse_key
        main_df['sex'] = mouse_info[mouse_key]['sex']
        main_df['area'] = mouse_info[mouse_key]['area']
        
        # Update the dictionary with the modified DataFrame
        data_dict[mouse_key] = main_df
        
        print(f'Events and experiment info added to {mouse_key}')

    return data_dict'''


In [None]:
def add_experiment_events(data_dict, events_dict, mouse_info):
    # Iterate over each mouse key in the dictionaries
    for mouse_key in data_dict:
        # Retrieve the main and event DataFrames
        main_df = data_dict[mouse_key]
        event_df = events_dict[mouse_key]

        # Resolve duplicate index values by keeping the first occurrence or handling them accordingly
        #event_df = event_df[~event_df.index.duplicated(keep='first')]

        # Perform a merge_asof on the index to add 'Value' as 'ExperimentEvents' with backward matching
        main_df = pd.merge_asof(
            main_df.sort_index(),
            event_df[['Value']],  # Only select the 'Value' column from event_df
            left_index=True,
            right_index=True,
            direction='backward',
            tolerance=0  # Adjust tolerance for matching on the index
        )

        # Rename the 'Value' column to 'ExperimentEvents'
        main_df = main_df.rename(columns={'Value': 'ExperimentEvents'})
        
        # Add metadata from event_df
        main_df['Experiment'] = event_df['experiment'].unique()[0]
        main_df['Session'] = event_df['session'].unique()[0]
        
        # Add mouse ID, sex, and brain area
        main_df['mouseID'] = mouse_key
        main_df['sex'] = mouse_info[mouse_key]['sex']
        main_df['area'] = mouse_info[mouse_key]['area']
        
        data_dict[mouse_key] = main_df

        print(f'Added ExperimentEvents to {mouse_key}')

    return data_dict



In [None]:
data_dict = add_experiment_events(data_dict, event_dict,mouse_info)

In [None]:
data_dict['B3M6'].loc[data_dict['B3M6'].ExperimentEvents == 'Block timer elapsed']
#data_dict['B3M6'].ExperimentEvents.unique()
data_dict['B3M6'].loc[data_dict['B3M6'].index == 98993.4863]

In [None]:
def add_no_halt_column(data_dict, events_dict):
    # Iterate over each mouse in the dictionaries
    for mouse_key in data_dict:
        main_df = data_dict[mouse_key]  # Large DataFrame
        event_df = events_dict[mouse_key]  # Small DataFrame

        # Ensure the index of the event_df is named 'Seconds' and is numeric (milliseconds)
        event_df.index.name = 'Seconds'

        # Create a new column 'No_halt' in the main_df
        main_df['No_halt'] = False

        # Filter the 'No halt' events from event_df
        no_halt_events = event_df[event_df['Value'] == 'No halt']

        # Use pd.merge_asof to match the nearest milliseconds from main_df index to event_df index
        merged_df = pd.merge_asof(
            main_df,
            no_halt_events[['Value']],  # Only bring in the 'Value' column where 'No halt' appears
            left_index=True,  # main_df has time in its index
            right_index=True,  # no_halt_events has time in its index (both in ms)
            direction='backward',  # Choose closest event on or before the timestamp
            tolerance=0.00005  # Because the dfs generally match down to 4 decimals, and we only want one no_halt at a time
        )

        # Assign True to the 'No_halt' column where 'No halt' matches
        main_df['No_halt'] = merged_df['Value'].fillna(False) == 'No halt'

        # Update the dictionary with the modified DataFrame
        data_dict[mouse_key] = main_df

        print('No_halt events added to', mouse_key)

    return data_dict


In [None]:
data_dict = add_no_halt_column(data_dict, event_dict)

In [None]:
#Check that number of no halts matches
def no_halt_len(mouse):

    event_len = len(event_dict[mouse].loc[event_dict[mouse].Value == 'No halt'])
    data_len = len(data_dict[mouse].loc[data_dict[mouse].No_halt == True])
    if event_len != data_len:
        print(f'for {mouse} the number of actual no-halt events is {event_len} and the number of True values in the data now is {data_len}')
        
    if event_len == data_len:
        print(f'Correct number of no-halt events for {mouse}')


In [None]:
for mouse in data_dict:
    no_halt_len(mouse)

In [None]:
def add_block_columns(df, event_df):
    # Iterate through each index and event value in event_df
    prev_column = None  # Tracks the column currently being filled as True
    for idx, event in event_df['Value'].items():
        if 'block started' in event:
            # Create a new column in df, filling with False initially
            column_name = event
            df[column_name] = False

            # If there was a previous column being filled as True, set it to False up to this point
            if prev_column is not None:
                df.loc[:idx, prev_column] = False

            # Set the new column to True starting from this index
            df.loc[idx:, column_name] = True
            print(df.loc[idx:, column_name])
            prev_column = column_name  # Track the events

        elif 'Block timer elapsed' in event:
    
            # If there's a current active block, set its values to False up to this point
            if prev_column is not None:
                df.loc[idx:, prev_column] = False

                prev_column = None  # Reset current column tracker

    # Ensure that any remaining True blocks are set to False after their end
    #if current_column is not None:
     #   df.loc[:, current_column] = False
    for col in df:
        if 'block started' in col:
            df.rename({col: f'{col.split()[0]}_block'}, inplace = True)
    
    return df


In [None]:
for name, df in data_dict.items():
    print('updating data for ', name)
    blocks_added_df = add_block_columns(df, event_dict[name])
    blocks_added_df.replace({})
    data_dict[name] = blocks_added_df

In [None]:
data_dict['B3M8'].loc[data_dict['B3M8']['LinearMismatch_block']==True]

In [None]:
data_dict['B3M6'].loc[data_dict['B3M6'].LinearMismatch_block==True]

In [None]:
data_dict['B3M6'].loc[data_dict['B3M6'].No_halt==True]
event_dict['B3M6'].loc[event_dict['B3M6'].Value=='LinearPlaybackMismatch block started']


In [None]:
def downsample_data(df, time_col='Seconds', interval=0.001):
    # Convert the Seconds column to a TimedeltaIndex
    df = df.set_index(pd.to_timedelta(df[time_col], unit='s'))

    # Define aggregation functions for all possible columns
    aggregation_functions = {
        '470_dfF': 'mean',
        'movementX': 'mean',
        'movementY': 'mean',
        'event': 'any',
        'ExperimentEvents': lambda x: x.dropna().iloc[0] if not x.dropna().empty else None,
        'Experiment': 'first',
        'Session': 'first',
        'mouseID': 'first',
        'sex': 'first',
        'area': 'first',
        'No_halt': 'any',
        'LinearMismatch_block': 'any',
        'LinearPlaybackMismatch_block': 'any',
        'LinearRegular_block': 'any'
    }

    # Filter aggregation_functions to only include columns present in df
    aggregation_functions = {key: func for key, func in aggregation_functions.items() if key in df.columns}

    # Resample with the specified interval and apply the filtered aggregations
    downsampled_df = df.resample(f'{interval}s').agg(aggregation_functions)

    # Reset the index to make the Seconds column normal again
    downsampled_df = downsampled_df.reset_index()
    downsampled_df[time_col] = downsampled_df[time_col].dt.total_seconds()  # Convert Timedelta back to seconds

    # Forward fill for categorical columns if needed, only if they exist in downsampled_df
    categorical_cols = ['Experiment', 'Session', 'mouseID', 'sex', 'area']
    for col in categorical_cols:
        if col in downsampled_df.columns:
            downsampled_df[col] = downsampled_df[col].ffill()

    # Remove consecutive duplicate values in the 'ExperimentEvents' column, if it exists
    if 'ExperimentEvents' in downsampled_df.columns:
        downsampled_df['ExperimentEvents'] = downsampled_df['ExperimentEvents'].where(
            downsampled_df['ExperimentEvents'] != downsampled_df['ExperimentEvents'].shift()
        )

    return downsampled_df



In [None]:
def test_event_numbers(downsampled_data, original_data, mouse):
    nohalt_down = len(downsampled_data.loc[downsampled_data['No_halt']==True])
    nohalt_original = len(original_data.loc[original_data['No_halt']==True])
    if nohalt_down != nohalt_original:
        print(f'mouse{mouse}')
        print(f'There are actually {nohalt_original} no-halts, but the downsampled data only contains {nohalt_down}')
    
    

In [None]:
'''downsampled_dict = {}
for mouse, mouse_df in data_dict.items():
    downsampled_df = downsample_data(mouse_df, time_col='Seconds', interval=0.001)
    downsampled_dict[mouse]=downsampled_df
    test_event_numbers(downsampled_df, mouse_df, mouse)
    '''
mouse = 'B3M6'
downsampled_df = downsample_data(data_dict[mouse], time_col='Seconds', interval=0.001)
test_event_numbers(downsampled_df, data_dict[mouse], mouse)

In [None]:
from matplotlib import pyplot as plt
plt.scatter(downsampled_df['Seconds'].loc[downsampled_df['event']==True], downsampled_df['470_dfF'].loc[downsampled_df['event']==True])

In [None]:

downsampled_df.ExperimentEvents.unique()#.loc[downsampled_df.No_halt==True]


In [None]:
downsampled_df#.loc[downsampled_df.No_halt==True]

downsampled_df.ExperimentEvents.unique()#.loc[downsampled_df.No_halt==True]
downsampled_df.loc[downsampled_df.ExperimentEvents=='Apply halt: 1s']


In [None]:
downsampled_df.loc[downsampled_df.Seconds >87.4]

In [None]:
downsampled_df.loc[downsampled_df.event ==True]

In [None]:
#Data = process.pooling_data(data_dict)


In [None]:
Data = Data.reset_index()
Data = Data.drop(columns=['level_0'])  # Assuming the column name is 'level_0' after reset_index()
Data = Data.set_index('level_1')  # 'level_1' will be the numeric index part
Data.index.name = 'Time'

In [None]:
import matplotlib.pyplot as plt

plt.plot()

In [None]:
#20240730_Mismatch_Experiment/GRAB_MMclosed-and-open_190824'
#Data.to_csv('GRAB_MMclosed_open_session1.csv', index=False)

In [None]:
B3M7 = Data.loc[Data.mouseID=='B3M7']

In [None]:
B3M4 = Data.loc[Data.mouseID=='B3M4']

In [None]:
B3M4.loc[B3M4.ExperimentEvents == 'Block timer elapsed']

In [None]:

B3M4.loc[B3M4.LinearPlaybackMismatch_block == True]