In [4]:
import matplotlib
%matplotlib tk
%autosave 180
%load_ext autoreload
%autoreload 2

import nest_asyncio
%config Completer.use_jedi = False

#
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# 
import numpy as np
import os
import scipy

# add root directory to be able to import packages
# todo: make all packages installable so they can be called/imported by environment
import sys
module_path = os.path.abspath(os.path.join('..'))
sys.path.append(module_path)

from utils.calcium import calcium


Autosaving every 180 seconds
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
############################################
################# Load data ################
############################################

#
def get_sessions_from_directory_listing(root_dir,
                                       animal_id):
    
    sessions = os.listdir(root_dir+animal_id)
    print ("sessions: ", sessions)
    
    return sessions
    
#
animal_ids = [
        
    # 'DON-006084',
    # 'DON-006085',
    # 'DON-006087',
    
    # 'DON-009191',
    # 'DON-009192',
    # 'DON-010473',
    # 'DON-010477',
    
    
    'DON-002865',
    'DON-003165',
    'DON-003343',
    
    'DON-008497',
    'DON-008498',
    'DON-008499',
       
    
    
]
              
############################################
############################################
############################################
root_dir = '/media/cat/4TB1/donato/'
dir_ = '/002P-F/tif/'

#
for animal_id in animal_ids:

    #
    print ("Processing: ", animal_id)

    #
    sessions = get_sessions_from_directory_listing(root_dir,
                                                  animal_id)

    #
    for session in sessions:
        #
        fname_check  =  os.path.join(root_dir, 
                                     animal_id, 
                                     session+dir_, 'suite2p','plane0',
                                     'goodcell_correlations_array_post_deduplication_filtered.npy'
                                      )

        #
        if os.path.exists(fname_check):
            continue

        #
        try: 
        #if True:
            #
            print ("processing: ", session)

            #if session != '20220302':
            #    continue

            c = calcium.Calcium()
            c.root_dir = root_dir
            c.data_dir = os.path.join(root_dir, animal_id, session+dir_, 'suite2p','plane0')
            c.animal_id = animal_id
            c.session = session
            c.detrend_model_order = 1
            
            #
            c.load_suite2p()           #note: this already deletes the non-trusted suite2p cells
            c.save_python = True
            c.save_matlab = False

            # 
            c.load_binarization()
            binarization_method = 'upphase'
            if binarization_method=='onphase':
                traces = c.F_onphase_bin
            elif binarization_method=='upphase':
                traces = c.F_upphase_bin
            else:
                print ("METHOD NOT FOUND")

            #print ("binarized data: ", traces.shape)

            ###################################################################
            ########## cleanup cells + compute pairwise correlations ##########
            ###################################################################
            c.load_footprints()
            c.deduplication_method = 'overlap'      # 'overlap'; 'centre_distance'
            c.corr_min_distance = 8                 # min distance for centre_distance method - NOT USED HERE
            c.corr_max_percent_overlap = 0.25       # max overlap for overlap method
            c.corr_threshold = 0.3                  # max correlation allowed for high overlap

            #  
            c.corr_delete_method = 'lowest_snr' #'highest_connected', lowest_snr'
            c.recompute_deduplication = False
            
            #
            c.correlation_datatype = 'filtered'      # filtered vs. upphase
            
            #
            c.compute_correlations()
            
            #
            c.remove_duplicate_neurons()     # this removes duplicate neurons and saves non-duplicate version of correlation array
                        
        except:
            print ("... errror loading : ", session)


Processing:  DON-002865
sessions:  ['DON-002865_20210210', 'DON-002865_202102114', 'DON-002865_202102115', 'DON-002865_202102113', 'DON-002865_202102119', 'DON-002865_20210211', 'DON-002865_202102116', 'DON-002865_202102117', 'DON-002865_202102118', 'DON-002865_202102112']
Processing:  DON-003165
sessions:  ['DON-003165_20210220', 'DON-003165_20210216', 'DON-003165_20210219', 'DON-003165_20210218', 'DON-003165_20210217', 'DON-003165_20210222', 'DON-003165_20210211', 'DON-003165_20210213', 'DON-003165_20210223', 'DON-003165_20210215']
Processing:  DON-003343
sessions:  ['DON-003343_20210213', 'DON-003343_20210218', 'DON-003343_20210217', 'DON-003343_20210216', 'DON-003343_20210219', 'DON-003343_20210215', 'DON-003343_20210222', 'DON-003343_20210221', 'DON-003343_20210220', 'DON-003343_20210214']
processing:  DON-003343_20210221
... errror loading :  DON-003343_20210221
Processing:  DON-008497
sessions:  ['20220206', '20220130', '20220209', '20220211', '20220202', '20220207', '20220204',

In [7]:
def get_sessions_from_filename(fname):
    
    fname = fname.replace("-","_")
    
    idx1 = fname.find("002P_F")+7
    idx2 = fname.find('.mesc')
    
    sessions = fname[idx1:idx2].replace("-","_").split("_")
    
    try:
        sessions.remove("ACQ")
    except:
        pass
    
    return sessions


def get_sessions_from_filename2(fname):

    sessions = []
    for k in range(1,20,1):
        temp = "S"+str(k)
        if temp in fname:
            sessions.append(temp)

    return sessions



def get_sessions_for_mesc_and_raw_filenames_from_directory(folder):
    
    fs = []
    ss = []
    
    # look for .mesc files
    extensions = '.mesc'
    
    matches = []
    for root, dirnames, filenames in os.walk(folder):
        #print (root, dirnames, filenames)
        for filename in filenames:
            if filename.endswith(extensions):
                matches.append(os.path.join(root, filename))
                
    for match in matches:
        sessions = get_sessions_from_filename2(match)
        fs.append(match)
        ss.append(sessions)
        #print (match, sessions)
        
    # look for .raw files
    extensions = '.raw'
    matches = []
    for root, dirnames, filenames in os.walk(folder):
        for filename in filenames:
            if filename.endswith(extensions):
                matches.append(os.path.join(root, filename))
    
    for match in matches:
        fs.append(match)
        ss.append([])           

        
    return fs, ss

folder = '/media/cat/4TB1/donato/tests'
fnames, sessions = get_sessions_for_mesc_and_raw_filenames_from_directory(folder)
for k in range(len(fnames)):
    print (fnames[k], sessions[k])

/media/cat/4TB1/donato/tests/DON-010473_20220514_002P-F_S1-S2-ACQ.mesc ['S1', 'S2']
