In [2]:
import sys
sys.path.append("scripts")  # Add the path to the scripts folder
#import script_file_1

import ipywidgets
import h5py
import pickle
import os
import pandas as pd
import warnings
import numpy as np
import matplotlib.pyplot as plt


#warnings.filterwarnings('ignore')

In [7]:
# create list of file_paths to loop over and extract data from 
sess_dir = 'C:/code/SMTB_2023/imaging/data/5546'

save_dir = sess_dir + '/5546_total.pkl'

# LOAD DATA FROM MULTIPLE SESSIONS
C_arrays = []
cvs = []
boris1s = []

# List of files to analyze

files_to_analyze = ['/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5546/day0_corr08_pnr5_total.pkl',
                    '/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5546/day1_corr08_pnr6_total.pkl',
                    '/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5546/day2_corr08_pnr5_total.pkl',
                    '/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5546/day14_corr08_pnr8_total.pkl']

#files_to_analyze = ['/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5573/day0_analysis_results_corr08_pnr8_total.pkl',
#                    '/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5573/day1_analysis_results_corr08_pnr8_total.pkl',
#                    '/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5573/day2_analysis_results_corr08_pnr8_total.pkl',
#                    '/Users/savva/Documents/GitHub/SMTB_2023/imaging/data/5573/day14_analysis_results_corr07_pnr7_total.pkl']


# Loop through files and store the data
for file_path in files_to_analyze:
    with open(file_path, 'rb') as file:
        C_array, cv, boris1 = pickle.load(file)
        C_arrays.append(C_array)
        cvs.append(cv)
        boris1s.append(boris1)
        
partnerString = ['left', 'left'] #partner was left on days 2 and 14

# on PPT days, there are partners and novels, so lets rename behaviors appropriately so that they're
# easily indexable 

# 5546 - partner on left day 2, 

day2temp = boris1s[2]
day2temp.loc[(day2temp['Behavior'] == 'noncontact investigation')& (day2temp['Subject'] == 'Right'), 'Behavior'] = 'noncontact investigation right'
day2temp.loc[(day2temp['Behavior'] == 'noncontact investigation')& (day2temp['Subject'] == 'Left'), 'Behavior'] = 'noncontact investigation left'
day2temp.loc[(day2temp['Behavior'] == 'contact investigation')& (day2temp['Subject'] == 'Right'), 'Behavior'] = 'contact investigation right'
day2temp.loc[(day2temp['Behavior'] == 'contact investigation')& (day2temp['Subject'] == 'Left'), 'Behavior'] = 'contact investigation left'

day2temp.loc[(day2temp['Behavior'] == 'huddle')& (day2temp['Subject'] == 'Right'), 'Behavior'] = 'huddle right'
day2temp.loc[(day2temp['Behavior'] == 'huddle')& (day2temp['Subject'] == 'Left'), 'Behavior'] = 'huddle left'


boris1s[2] = day2temp

day14temp = boris1s[3]
day14temp.loc[(day14temp['Behavior'] == 'noncontact investigation')& (day14temp['Subject'] == 'Right'), 'Behavior'] = 'noncontact investigation right'
day14temp.loc[(day14temp['Behavior'] == 'noncontact investigation')& (day14temp['Subject'] == 'Left'), 'Behavior'] = 'noncontact investigation left'
day14temp.loc[(day14temp['Behavior'] == 'contact investigation')& (day14temp['Subject'] == 'Right'), 'Behavior'] = 'contact investigation right'
day14temp.loc[(day14temp['Behavior'] == 'contact investigation')& (day14temp['Subject'] == 'Left'), 'Behavior'] = 'contact investigation left'

day14temp.loc[(day14temp['Behavior'] == 'huddle')& (day14temp['Subject'] == 'Right'), 'Behavior'] = 'huddle right'
day14temp.loc[(day14temp['Behavior'] == 'huddle')& (day14temp['Subject'] == 'Left'), 'Behavior'] = 'huddle left'

boris1s[3] = day14temp

In [8]:
import numpy as np
from sklearn.metrics import roc_auc_score
import pandas as pd
import pdb

def wws (behavior_data , num_frames):
    onsets = np.zeros(num_frames)
    for i, row in behavior_data.iterrows():
        onsets[row['scopeFrameStart']:row['scopeFrameEnd']+ 1] = 1
    return onsets
               
def qotaqbas (matrix, behavior_df, behavior_names, num_shuffles):
    num_neurons = matrix.shape[0]
    num_behaviors = len(behavior_names)
    auroc_matrix = np.zeros(num_neurons)
    p_value_matrix = np.zeros(num_neurons)
    
    for i in range(num_neurons):
        neuron_data = matrix[i, :]
        #print(i)
        
        behavior_data = behavior_df[behavior_df['Behavior'].isin(behavior_names)]
            
            
        if behavior_data.empty:
            print(f"No data found for behavior '{behavior_name}' for neuron {(i+1)}.")


        behavior_onset = wws(behavior_data, matrix.shape[1])

        true_auroc = roc_auc_score(behavior_onset, neuron_data)


        auroc_matrix[i] = true_auroc

        shuffled_aurocs = []
        for _ in range(num_shuffles):
            shuffled_behavior_data = behavior_data.copy()
            shuffled_behavior_data['scopeFrameStart'] = np.random.randint(0, matrix.shape[1] - 1, len(behavior_data))
            shuffled_behavior_data['scopeFrameEnd'] = shuffled_behavior_data['scopeFrameStart'] + (behavior_data['scopeFrameEnd'] - behavior_data['scopeFrameStart']).values
            behavior_onsets_shuffled = wws(shuffled_behavior_data, matrix.shape[1])
            #print(behavior_onsets_shuffled)
            shuffled_aurocs.append(roc_auc_score(behavior_onsets_shuffled, neuron_data))

        auroc_matrix[i] = true_auroc
        p_value_matrix[i] = (np.sum(shuffled_aurocs >= true_auroc) + 1) / (num_shuffles + 1)
            
    return auroc_matrix, p_value_matrix

In [9]:
auroc_matrices = []
p_value_matrices = []
for i, (C_array, boris1) in enumerate(zip(C_arrays, boris1s)):
    print(i)
    if i==0 or i==1:
        auroc_matrix, p_value_matrix = qotaqbas(C_array, boris1, ['noncontact investigation', 'contact investigation'], num_shuffles = 100)
    else:
        auroc_matrix, p_value_matrix = qotaqbas(C_array, boris1, ['noncontact investigation right', 'contact investigation right'], num_shuffles = 100)
    auroc_matrices.append(auroc_matrix)
    p_value_matrices.append(p_value_matrix)

0
1
2
3


In [10]:
import pdb  
def ayagoz(matrices, p_values_thresh = 0.01):
    bar_results = []  
    for matrix in matrices:
            matrix_bool = matrix < p_values_thresh
            bar_results.append(matrix_bool)


    return bar_results

In [11]:
def dil (p_value_matrices):
    sui = []  
    for i in range(4):   
        bar_results = ayagoz(p_value_matrices[i], p_values_thresh = 0.05)
        howManyTrues = list(bar_results).count(True)
        howManyTruesPercent = howManyTrues / len(bar_results)
        sui.append(howManyTruesPercent)
        #pdb.set_trace()
        #print(sui)
    return sui

In [12]:
percent_significant_neuron = dil(p_value_matrices)
print(percent_significant_neuron)

[0.21052631578947367, 0.21052631578947367, 0.35, 0.12903225806451613]


In [14]:
sess_dir = '/Users/savva/Documents/GitHub/SMTB_2023/imaging/output/'
save_dir = sess_dir + '5546_auroc_rightonly2.pkl'

with open(save_dir, 'wb') as file:
    pickle.dump([percent_significant_neuron], file)