<br>


# Ripple clean up

In this notebook we will:

- Aggregate all ripple datasets into one;
- Find closest position datapoint;
- Do a Quality check: the closest datapoint must be inside a trial minimum and maximum timestamps, otherwise, it means the detected ripple occurred during rat transportation, before or after the behavioral session.
- Calculate position at the moment of SWR onset based on closest data points using the rat’s velocity within those two points. 

- Save single dataset into a .csv file.


### Imports

In [4]:
import os
import re
import glob
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm
from ephys_utils import get_file_list
import matplotlib.pyplot as plt
%matplotlib inline

<br>

### Function definition

In [5]:
def find_closest_xy_trial_nr_outcome_run_type(value, position):
    
    """
    Find the closest x to value in the position file
    value, float - Reference value;
    position, DataFrame - Contains the x position data
    """
    
    closest_indices = position['timestamp'].sub(value).abs().sort_values().index.to_list()
    
    if not closest_indices:
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
    else:
        closest_index=closest_indices[0]
        closest_data = position.loc[closest_index, ['timestamp','x', 'y', 'trial_nr', 'outcome', 'run_type']]
        closest_data['i']=closest_index
        return closest_data
       
def remove_ripples_outside_trials(ripple, trial_timestamps):
    
    # Collect the minimum and maximum timestamps of the trial nr and session the ripple
    # was attributed too after finiding the closest position point
    
    trial = trial_timestamps[
        (trial_timestamps['session_code']==ripple['session_code']) &
        (trial_timestamps['trial_nr']==ripple['trial_nr'])]
    
    # Get min and max timestamps for trial of interest
    min_t = trial['min'].iloc[0]
    max_t = trial['max'].iloc[0]
    
    if max_t > ripple['start_time'] > min_t:
        
        ripple['trial_bound']=True
    else:
        ripple['trial_bound']=False
        
    return ripple
    
def calculate_xy_ripple_time_using_velocity(point1, point2, deltat_ripple):
    '''
    Using the two closests position points, calculate velocity at ripple time.
    Will be used to infer ripple time xy position.
    point1 occurs before point2.
    '''
    # Calculate the velocity between two closest points
    deltat = abs(point2.timestamp-point1.timestamp)
    deltax = abs(point2.x - point1.x)
    deltay = abs(point2.y - point1.y)
    vx = deltax / deltat
    vy =  deltay/ deltat
    
    # Get movement deltax and deltay from first point to location of ripple start time
    deltax_ripple = vx * deltat_ripple
    deltay_ripple = vy * deltat_ripple
    
    return point1['x']+deltax_ripple, point1['y']+deltay_ripple
     
def calculate_xy_at_ripple_time(ripple, position):
    '''
    Get the two closest/adjacent position points to the ripple start time 
    (we already have the closest but not both). 
    Based on these two points, we calculate the approximate x in which the rat was 
    at the start of ripple occurrence.
    '''
    session_position = position[position['session_code']==ripple['session_code']].sort_values(
        'timestamp')
    
    # Get previous position datapoint as 2nd point
    if ripple['start_time']==ripple['timestamp']:
        
        x_ripple= ripple['x']
        y_ripple =ripple['y']
        
    elif ripple['start_time']!=ripple['timestamp']:   
        
        if ripple['start_time'] < ripple['timestamp']:   
            # t1 < Start time < t2 (closest)    
            t1 = session_position.loc[ripple['i']-1, :]
            t2 = ripple[['timestamp', 'x', 'y']]
             
        elif ripple['start_time'] > ripple['timestamp']:  
            # Closest timestamp (t1) < Start time < Second point (t2)
            t1 = ripple[['timestamp', 'x', 'y']]
            t2 = session_position.loc[ripple['i']+1, :]
        
        deltat_ripple = abs(ripple['start_time']-t1['timestamp'])  
        x_ripple, y_ripple = calculate_xy_ripple_time_using_velocity(t1, t2, deltat_ripple) 
        
    ripple['x_ripple']=x_ripple
    ripple['y_ripple']=y_ripple
    
    return ripple

<br>

### Path definition

In [6]:
# Where data from all rats is stored
main_path = 'E:/EPHYS/data/'

In [7]:
# Which folder names to process
folders = [
# Magalhaes
'MAGALHAES_DNMP12_20trials_20190127105419',
'MAGALHAES_DNMP13_10trials_20190128135858',
'MAGALHAES_DNMP13_10trials_20190128142936',
'MAGALHAES_DNMP14_3trials_20190128201036',
'MAGALHAES_DNMP14_6trials_20190128190601',
'MAGALHAES_DNMP14_11trials_20190128193951',
'MAGALHES_DNMP11_20trials_20190126160731',
# Napoleão
# Homero
#'HOMERO_DNMP4_12trials_20191105174624',
'HOMERO_DNMP16_20trials_20191113131818',
#'HOMERO_DNMP23_20trials_20191117150748'   
]

#### Process the datasets


In [9]:
all_ripples=[]
all_position=[]
all_cp_rois=[]
all_start_rois=[]
all_rw1_rois=[]
all_rw2_rois=[]

for f in folders:
    
    folder_path = os.path.join(main_path, f)    
    # Get rat and session codes to add to dataframe
    session_code = re.search(r'trials_(\d*)', f).group(1)
    rat_code = re.search(r'([A-Z]*)_DNMP', f).group(1)[0:3]
     
    # Get necessary file paths
    ripple_path = sorted(glob.glob(os.path.join(folder_path, '*ripples.csv')))[0]
    position_path = sorted(glob.glob(os.path.join(folder_path, 'Timestamped_position', '*_timestamped_position_df_clean.csv')))[0]
    cp_roi_path = sorted(glob.glob(os.path.join(folder_path,'ROIS', '*cp_roi_converted.csv')))[0]
    start_roi_path = sorted(glob.glob(os.path.join(folder_path,'ROIS', '*start_roi_converted.csv')))[0]
    rw1_roi_path = sorted(glob.glob(os.path.join(folder_path,'ROIS', '*rw1_roi_converted.csv')))[0]
    rw2_roi_path = sorted(glob.glob(os.path.join(folder_path,'ROIS', '*rw2_roi_converted.csv')))[0]
    
    # Read the data files
    ripple = pd.read_csv(ripple_path)
    position = pd.read_csv(position_path)
    cp_roi = pd.read_csv(cp_roi_path).drop(['Unnamed: 0'], axis=1)
    start_roi = pd.read_csv(start_roi_path).drop(['Unnamed: 0'], axis=1)
    rw1_roi = pd.read_csv(rw1_roi_path).drop(['Unnamed: 0'], axis=1)
    rw2_roi = pd.read_csv(rw2_roi_path).drop(['Unnamed: 0'], axis=1)
    
    # find closest position point to ripple
    closest = ripple['start_time'].apply(find_closest_xy_trial_nr_outcome_run_type, args=(position,))
    
    ripple_info = pd.concat([ripple, closest], axis=1)
    
    # Add rat conde and session code to each dataframe
    for df in [ripple_info, position, cp_roi, start_roi]:
        df['rat_code']=rat_code
        df['session_code']=session_code

    # Add data to list of all dataframes per type
    all_ripples.append(ripple_info)
    all_position.append(position)
    all_cp_rois.append(cp_roi)
    all_start_rois.append(start_roi)
    all_rw1_rois.append(rw1_roi)
    all_rw2_rois.append(rw2_roi)

IndexError: list index out of range

#### Concatenate datasets into single dataframe

In [None]:
# Transform list of dataframes into a single dataframe
ripples = pd.concat(all_ripples)
position = pd.concat(all_position)
cp_rois = pd.concat(all_cp_rois)
start_rois=pd.concat(all_start_rois)
rw1_rois=pd.concat(all_rw1_rois)
rw2_rois=pd.concat(all_rw2_rois)

In [None]:
ripples.head()

<br>



## Curate ripples
<br>

#### Remove ripples if start time occurrs before or after first and final timestamps of position dataset or during transport periods.
That means those ripples occurred before, after the behavioral session or between runs

In [None]:
trial_timestamps = position.groupby(['session_code', 'trial_nr']).timestamp.agg(['min', 'max']).reset_index()
trial_timestamps.head()

In [None]:
ripples = ripples.apply(remove_ripples_outside_trials, args=(trial_timestamps,), axis=1)
# How many ripples are trial bound? How many are not
ripples.groupby(['trial_bound'])['start_time'].count()

In [None]:
# Drop outbound ripples
ripples = ripples[ripples.trial_bound==True]

<br>


#### Get xy position at ripple start time

Get the two closest/adjacent position points to the ripple start time (we already have the closest but not both). Based on these two points, we calculate the approximate x in which the rat was at the start of ripple occurrence.

In [None]:
ripples = ripples.apply(calculate_xy_at_ripple_time, args=(position,), axis=1)

In [None]:
ripples = ripples[['start_time', 'end_time', 
         'trial_nr', 'outcome', 
         'run_type', 'rat_code', 
         'session_code', 'x_ripple', 'y_ripple']]

In [None]:
# select a few ripples and check if data makes sense
ripples.head()

In [None]:
ripples.to_csv(os.path.join(main_path, 'clean_ripples.csv'), index=False)
position.to_csv(os.path.join(main_path, 'position.csv'), index=False)
cp_rois.to_csv(os.path.join(main_path, 'cp_rois.csv'), index=False)
start_rois.to_csv(os.path.join(main_path, 'start_rois.csv'), index=False)
rw1_rois.to_csv(os.path.join(main_path, 'rw1_rois.csv'), index=False)
rw2_rois.to_csv(os.path.join(main_path, 'rw2_rois.csv'), index=False)