In [17]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tdt
import trompy as tp

import dill

In [18]:
DATAFOLDER = Path("..//data")
TANKFOLDER = Path("D://TestData//bazzino//from_paula")
DLCFOLDER = TANKFOLDER / "Sodium_Appetite_DLC"

In [19]:
def get_ttls(stub):

    # Get the TTLs
    if (DATAFOLDER / "ttls.csv").exists():
        ttls_df = pd.read_csv(DATAFOLDER / "ttls.csv")
        if stub in ttls_df.columns:
            sol = ttls_df.loc[:, stub].values
            return sol
    else:
        print("No pre-saved ttls.csv file found. Reading from TDT tank.")
        data = tdt.read_block(TANKFOLDER / stub, evtype=["epocs"])
        sol = data.epocs.sol_.onset
    
    return sol

def read_DLC_csv(filename):
    
    header_df = pd.read_csv(filename, skiprows=1, nrows=2, header=None)
    
    row2_values = header_df.iloc[0].astype(str) # This is original row 2
    row3_values = header_df.iloc[1].astype(str) # This is original row 3
    
    new_column_names = [f"{val2.lower().replace(' ', '')}_{val3}" for val2, val3 in zip(row2_values, row3_values)]
    
    df = pd.read_csv(filename, skiprows=3, header=None, names=new_column_names)
    
    return df

def interpolate_low_likehood(df, threshold=0.5):
    # Convert all columns to numeric, coercing errors. This is important.
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Identify unique bodyparts mentioned in the columns
    bodyparts = set()
    for col_name in df.columns:
        parts = col_name.split('_')
        if len(parts) > 1: # e.g., leftear_x, leftear_likelihood
            bodyparts.add(parts[0]) 
    
    # For each bodypart, interpolate x and y based on likelihood
    for bp in bodyparts:
        x_col = f"{bp}_x"
        y_col = f"{bp}_y"
        likelihood_col = f"{bp}_likelihood"

        if x_col in df.columns and y_col in df.columns and likelihood_col in df.columns:
            # Condition where likelihood is below threshold
            condition = df[likelihood_col] < threshold
            
            # Set x and y to NaN based on the condition
            df.loc[condition, x_col] = np.nan
            df.loc[condition, y_col] = np.nan
            
            # print(np.sum(condition), "values set to NaN for", x_col, y_col)
            
            # Interpolate the x and y columns (linear interpolation by default)
            df[x_col] = df[x_col].interpolate(method='linear', limit_direction='both')
            df[y_col] = df[y_col].interpolate(method='linear', limit_direction='both')
        # else:
            # print(f"Warning: Missing x, y, or likelihood columns for bodypart '{bp}' in {filename.name}")

    return df

def calc_angular_velocity(df):
    
    return (
        df
        .assign(
            _rel_rightear_x_orig = lambda x_df: x_df.rightear_x - x_df.leftear_x,
            _rel_rightear_y_orig = lambda x_df: x_df.rightear_y - x_df.leftear_y
        )
        .assign(
            ear_distance = lambda x_df: np.sqrt(x_df._rel_rightear_x_orig**2 + x_df._rel_rightear_y_orig**2)
        )
        .assign(
            rel_rightear_x = lambda x_df: np.where(x_df.ear_distance >= 90, np.nan, x_df._rel_rightear_x_orig),
            rel_rightear_y = lambda x_df: np.where(x_df.ear_distance >= 90, np.nan, x_df._rel_rightear_y_orig)
        )
        .assign(
            angle_rad = lambda x_df: np.arctan2(x_df.rel_rightear_y, x_df.rel_rightear_x)
        )
        .assign(
            _d_angle_raw = lambda x_df: x_df.angle_rad.diff()
        )
        .assign(
            d_angle = lambda x_df: x_df._d_angle_raw.fillna(0),
            d_angle_wrapped = lambda x_df: (x_df._d_angle_raw + np.pi) % (2 * np.pi) - np.pi
        )
        .assign(
            d_angle_deg = lambda x_df: np.rad2deg(x_df.d_angle_wrapped)
        )
        .drop(columns=['_rel_rightear_x_orig', '_rel_rightear_y_orig', '_d_angle_raw'], errors='ignore')
    )
    
def get_angular_velocity(stub):
    
    date = stub.split("-")[1]
    pattern_str = f"PB_NAapp-{date}_{stub}*.csv"
    
    matching_files = list(DLCFOLDER.glob(pattern_str))
    
    filename = None
    if not matching_files:
        print(f"Error: No DLC file found for stub {stub} with pattern {pattern_str}")
        return None
    
    elif len(matching_files) > 1:
        print(f"Warning: Multiple DLC files found for stub {stub} with pattern {pattern_str}: {matching_files}")
        # Decide how to handle multiple matches: take the first, last, or error.
        # For now, let's take the first one.
        filename = matching_files[0]
        print(f"Using file: {filename}")
    else:
        filename = matching_files[0]
        print(f"Found file: {filename}")
    
    df = read_DLC_csv(filename)
    
    df = interpolate_low_likehood(df, threshold=0.5)

    df = calc_angular_velocity(df)
    
    return df.d_angle_deg

def get_ang_vel_snips(stub):
    # Get the angular velocity snips
    
    sol = get_ttls(stub)
    v = get_angular_velocity(stub)
    
    snips_vel = []
    for i in range(len(sol)-1):
        start = int(sol[i] * 10) - 50
        end = int(sol[i] * 10) + 150
        snips_vel.append(v[start:end])
        
    snips_vel = np.array(snips_vel)
    
    # snips_vel = tp.zscore(snips_vel, baseline_points=50)
    
    return np.array(snips_vel)

## For testing purposes
# stub = "PB71-221123-113609"
# get_ttls(stub)
# snips_vel = get_ang_vel_snips(stub)    

In [20]:
def calc_stillness(df):
    
        return (
        df
        .assign(
            rightear_dx = lambda x_df: x_df.rightear_x.diff(),
            rightear_dy = lambda x_df: x_df.rightear_y.diff(),
            rightear_distance = lambda x_df: np.sqrt(x_df.rightear_dx**2 + x_df.rightear_dy**2),
            
            leftear_dx = lambda x_df: x_df.leftear_x.diff(),
            leftear_dy = lambda x_df: x_df.leftear_y.diff(),
            leftear_distance = lambda x_df: np.sqrt(x_df.leftear_dx**2 + x_df.leftear_dy**2),
            
            nose_dx = lambda x_df: x_df.nose_x.diff(),
            nose_dy = lambda x_df: x_df.nose_y.diff(),
            nose_distance = lambda x_df: np.sqrt(x_df.nose_dx**2 + x_df.nose_dy**2),
            
            stillness_abs = lambda x_df: x_df.rightear_distance + x_df.leftear_distance + x_df.nose_distance
        )
        .assign(
            stillness_z = lambda x_df: (x_df.stillness_abs - x_df.stillness_abs.mean()) / x_df.stillness_abs.std()
        )
                                                
        # .drop(columns=['_rel_rightear_x_orig', '_rel_rightear_y_orig', '_d_angle_raw'], errors='ignore')
        )


def get_stillness(stub):
    date = stub.split("-")[1]
    pattern_str = f"PB_NAapp-{date}_{stub}*.csv"
    
    matching_files = list(DLCFOLDER.glob(pattern_str))
    
    filename = None
    if not matching_files:
        print(f"Error: No DLC file found for stub {stub} with pattern {pattern_str}")
        return None
    
    elif len(matching_files) > 1:
        print(f"Warning: Multiple DLC files found for stub {stub} with pattern {pattern_str}: {matching_files}")
        # Decide how to handle multiple matches: take the first, last, or error.
        # For now, let's take the first one.
        filename = matching_files[0]
        print(f"Using file: {filename}")
    else:
        filename = matching_files[0]
        print(f"Found file: {filename}")
    
    df = read_DLC_csv(filename)
    
    df = interpolate_low_likehood(df, threshold=0.5)

    df = calc_stillness(df)
    
    return df.stillness_z

def get_stillness_snips(stub):
    # Get the angular velocity snips
    
    sol = get_ttls(stub)
    v = get_stillness(stub)
    
    snips_still = []
    for i in range(len(sol)-1):
        start = int(sol[i] * 10) - 50
        end = int(sol[i] * 10) + 150
        snips_still.append(v[start:end])
        
    snips_still = np.array(snips_still)
    
    # snips_vel = tp.zscore(snips_vel, baseline_points=50)
    
    return np.array(snips_still)

## For testing purposes
# stub = "PB71-221123-113609"
# stub = "PB48-220926-121542" #0.45, deplete
# stub = "PB73-221123-131413" #0.45, deplete
# stub = "PB75-221123-140659" #0.45, deplete
# stub = "PB61-221024-110730" #0.10, deplete

# snips = get_stillness_snips(stub)

In [21]:
# also make accompanying df that contains details of the rat and the condition and the time in session

def assemble_all_data(csv_path, tank_folder, dlc_folder, snips_function=get_ang_vel_snips):
    metadata = pd.read_csv(csv_path)
    
    snips_array = []
    x_array = []
    
    for row in metadata.iterrows():
        stub = row[1]["Folder"]
        
        print(stub)
        try:
            
            snips_tmp = snips_function(stub)
            nsnips = len(snips_tmp)
            print(nsnips)
            snips_array.append(snips_tmp)
            
            x_array.append(pd.DataFrame(data={"trial": np.arange(nsnips),
                                              "id": row[1]["Subject"],
                                              "condition": row[1]["Physiological state"]
                                             }
                                       )
                          )           
        except:
            print("Error with tank for", row[1]["Subject"], row[1]['Physiological state'])
        
    return snips_array, x_array

snips_10NaCl, x_10NaCl = assemble_all_data(DATAFOLDER / "10NaCl_FileKey.csv", TANKFOLDER, DLCFOLDER, snips_function=get_ang_vel_snips)
snips_45NaCl, x_45NaCl = assemble_all_data(DATAFOLDER / "45NaCl_FileKey.csv", TANKFOLDER, DLCFOLDER, snips_function=get_ang_vel_snips)

snips_10NaCl = np.concatenate(snips_10NaCl, axis=0)
snips_45NaCl = np.concatenate(snips_45NaCl, axis=0)

snips_vel = np.vstack((snips_10NaCl, snips_45NaCl))
snips_vel.shape

snips_10NaCl, x_10NaCl = assemble_all_data(DATAFOLDER / "10NaCl_FileKey.csv", TANKFOLDER, DLCFOLDER, snips_function=get_stillness_snips)
snips_45NaCl, x_45NaCl = assemble_all_data(DATAFOLDER / "45NaCl_FileKey.csv", TANKFOLDER, DLCFOLDER, snips_function=get_stillness_snips)

snips_10NaCl = np.concatenate(snips_10NaCl, axis=0)
snips_45NaCl = np.concatenate(snips_45NaCl, axis=0)

snips_still = np.vstack((snips_10NaCl, snips_45NaCl))
snips_still.shape


PB23-220608-131619
Found file: D:\TestData\bazzino\from_paula\Sodium_Appetite_DLC\PB_NAapp-220608_PB23-220608-131619_Cam1DLC_resnet50_Raw_VideosMar27shuffle1_50000.csv
49
PB24-220608-122638
Found file: D:\TestData\bazzino\from_paula\Sodium_Appetite_DLC\PB_NAapp-220608_PB24-220608-122638_Cam1DLC_resnet50_Raw_VideosMar27shuffle1_50000.csv
49
PB33-220620-115414
Found file: D:\TestData\bazzino\from_paula\Sodium_Appetite_DLC\PB_NAapp-220620_PB33-220620-115414_Cam1DLC_resnet50_Raw_VideosMar27shuffle1_50000.csv
49
PB39-220629-105034
Found file: D:\TestData\bazzino\from_paula\Sodium_Appetite_DLC\PB_NAapp-220629_PB39-220629-105034_Cam1DLC_resnet50_Raw_VideosMar27shuffle1_50000.csv
49
PB391-220829-111452
Found file: D:\TestData\bazzino\from_paula\Sodium_Appetite_DLC\PB_NAapp-220829_PB391-220829-111452_Cam1DLC_resnet50_Raw_VideosMar27shuffle1_50000.csv
49
PB44-220829-121005
Found file: D:\TestData\bazzino\from_paula\Sodium_Appetite_DLC\PB_NAapp-220829_PB44-220829-121005_Cam1DLC_resnet50_Raw_Video

(2401, 200)

In [22]:
# renames columns and combines dataframes from both infusion types

def tweak_x_array(x_array):

    df = pd.concat(x_array, axis=0)

    return (df
            .replace({"condition": {"Sodium Depleted": "deplete",
                                    "Sodium Replete": "replete",
                                    "Sodium Replete Experienced": "replete_exp",
                                    "Thirsty": "thirsty",                                    
                                    }})
            )

x_10NaCl = tweak_x_array(x_10NaCl)
x_45NaCl = tweak_x_array(x_45NaCl)

x_vel_10NaCl = x_10NaCl.assign(infusiontype="10NaCl")
x_vel_45NaCl = x_45NaCl.assign(infusiontype="45NaCl")

x_vel = pd.concat([x_vel_10NaCl, x_vel_45NaCl]).reset_index(drop=True)

In [23]:
x_vel.shape

(2401, 4)

In [24]:
# code to only select the conditions we want - deplete and replete

snips_vel = snips_vel[(x_vel.condition != "thirsty") & (x_vel.condition != "replete_exp")]
snips_still = snips_still[(x_vel.condition != "thirsty") & (x_vel.condition != "replete_exp")]
x_vel = x_vel.query("condition != 'thirsty' & condition != 'replete_exp'")

In [25]:
# combines with data from subjects csv to get add sex

subjects_10NaCl = pd.read_csv(DATAFOLDER / "10NaCl_SubjectKey.csv")
subjects_45NaCl = pd.read_csv(DATAFOLDER / "45NaCl_SubjectKey.csv")

subject_df = (pd.concat([subjects_10NaCl.iloc[:, :2], subjects_45NaCl.iloc[:, :2]], axis=0)
              .reset_index()
              .rename(columns={"Subject": "id",
                               "Sex": "sex"})
              .drop(columns=["index"])
)

x_vel = pd.merge(x_vel, subject_df[['id', 'sex']], on='id', how='left')

In [26]:
## pre-processing of the DLC angvel snips

# interpolation
df_snips_vel = pd.DataFrame(snips_vel)


df_interpolated = df_snips_vel.interpolate(method='linear', axis=1)
df_filled = df_interpolated.ffill(axis=1).bfill(axis=1)

snips_vel_processed = df_filled.to_numpy()

# find absolute
snips_vel_processed = np.abs(snips_vel_processed)

# how about instead of zscoring, we just adjust to baseline subtraction
baseline = np.nanmean(snips_vel_processed[:, :50], axis=1)
snips_vel_processed = snips_vel_processed - baseline[:, None]

print(np.sum(np.isnan(snips_vel_processed)))
snips_vel = np.array(snips_vel_processed)

# removing NaNs
rows_with_nans_mask = np.isnan(snips_vel).any(axis=1)
snips_vel = snips_vel[~rows_with_nans_mask]
x_vel = x_vel[~rows_with_nans_mask].reset_index(drop=True)
print(snips_vel.shape)
x_vel.shape

0
(1960, 200)


(1960, 5)

In [27]:
def smooth_array(arr, window_size=5):
    """
    Smooth a 2D array along one dimension using a moving average.
    
    :param arr: 2D NumPy array
    :param window_size: Size of the smoothing window
    :return: Smoothed 2D array
    """
    kernel = np.ones(window_size) / window_size
    smoothed = np.apply_along_axis(lambda m: np.convolve(m, kernel, mode='same'), axis=1, arr=arr)
    return smoothed

# Example usage
snips_vel = smooth_array(snips_vel, window_size=5)

In [28]:
## pre-processing of the DLC stillness snips

# interpolation
# df_snips_vel = pd.DataFrame(snips_vel)

# df_interpolated = df_snips_vel.interpolate(method='linear', axis=1)
# df_filled = df_interpolated.ffill(axis=1).bfill(axis=1)

# snips_vel_processed = df_filled.to_numpy()

# find absolute
# snips_vel_processed = np.abs(snips_vel_processed)

# how about instead of zscoring, we just adjust to baseline subtraction
# baseline = np.nanmean(snips_still[:, :50], axis=1)
# snips_still = snips_still - baseline[:, None]

# print(np.sum(np.isnan(snips_still)))
# snips_vel = np.array(snips_still)

# removing NaNs
# rows_with_nans_mask = np.isnan(snips_vel).any(axis=1)
# snips_vel = snips_vel[~rows_with_nans_mask]
# x_vel = x_vel[~rows_with_nans_mask].reset_index(drop=True)
# print(snips_vel.shape)
# x_vel.shape

In [29]:
data_to_save = {"snips_vel": snips_vel,
                "snips_still": snips_still,
                "x_vel": x_vel,
                }

with open(DATAFOLDER / "behav_data.pickle", "wb") as f:
    dill.dump(data_to_save, f)

In [None]:
# with open(DATAFOLDER / "x_angvel.pickle", "wb") as f:
#     dill.dump(x_vel, f)

In [None]:
# snips_vel_all = np.vstack([snips_10NaCl_selected, snips_45NaCl_selected])

# with open(DATAFOLDER / "snips_angvel.pickle", "wb") as f:
#     dill.dump(snips_vel_all, f)