In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib
%matplotlib tk
%autosave 180

#
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import matplotlib.cm as cm
from matplotlib import gridspec
from scipy import signal

import numpy as np
import pandas as pd
import os
import shutil
import cv2
from tqdm import trange
import glob
import re
from tqdm import tqdm

from scipy.io import loadmat
import scipy
import h5py
import csv
import sleap

import sys
sys.path.append('/home/cat/code/gerbil/')

from simba_tools.track_simba import track as Track

Autosaving every 180 seconds


In [2]:
###################################################
###################################################
###################################################
# 
def save_two_animal_slp(animal_ids, fname_slp, fname_out):
    
    #
    fname_out = fname_out[:-4] + "_"+str(animal_ids[0])+"_"+str(animal_ids[1])+".slp"
    
    #
    if os.path.exists(fname_out):
        return
    
    # select first to animals
    labels = sleap.load_file(fname_slp)
    
    # Change this to specify which pair of tracks to save:
    if len(labels.tracks)==0:
        print ('sleap did not find tracks: ', fname_slp)
        return 
    #
    keep_tracks = [labels.tracks[animal_ids[0]], labels.tracks[animal_ids[1]]]

    # Remove instances that aren't in keep_tracks
    for lf in labels:
        lf.instances = [inst for inst in lf.instances if inst.track in keep_tracks]

    # Keep only those tracks in the labels
    labels.tracks = keep_tracks

    # Remove frames that are now empty due to the track filtering
    labels.remove_empty_frames()

    # Save the result with the specified tracks in the filename
    labels.save(fname_out)

#
def process_simba_files_parallel(row, 
                                dir_path, 
                                animal_ids):

    
    #
    video_scaling_factor = 1.3
    
    #    
    fname_slp = os.path.join(dir_path, 'features', row['Slp filename'])    
    fname_out = os.path.join(dir_path, 'simba', 'slp_files', row['Slp filename'])
    
    for animal_id in animal_ids:
        
        #try:
        if True:
            
            no_track_files = []
        
            result = save_two_animal_slp(animal_id, fname_slp, fname_out)
            
            if result:
                no_track_files.append(result)

            #########################################################
            ################ GENERATE CLEANED TRACKS ################
            #########################################################

            # print ("STARTING CLEANING...")

            # 
            fname_slp_in = fname_out[:-4] + "_"+str(animal_id[0])+"_"+str(animal_id[1])+".slp"
            if os.path.exists(fname_slp_in)==False:
                print ("Missing: ", fname_slp_in)
                continue
            fname_clean = os.path.join(dir_path, 'simba', 'cleaned', row['Slp filename'])
            fname_slp_clean = fname_slp_in.replace('.slp','_cleaned.npy')
            
            if os.path.exists(fname_slp_clean):
                continue

            #
            track = Track.Track(fname_slp_in)
            track.track_type = "features"
            track.exclude_huddles = True
            track.use_dynamic_centroid = True


            #
            track.animal_ids = [0,1]
            track.tracks_names = ['female','male','pup1','pup2','pup3','pup4']
            track.recompute_spine_centres=True
            track.verbose = True                         # gives additional printouts
            track.n_animals = len(track.animal_ids)      # number of animals
            track.filter_width = 10                      # this is the median filter width in frames; e.g. 10 ~=0.4 seconds
                                                         # higher values provide more stability, but less temporally precise locations
            # 
            track.recompute_h5 = True
            track.load_tracks()

            ####################################################
            ### OPTIONAL - MEDIAN FILTER ALL TRACKS ############
            ####################################################
            if True:
                track.filter_tracks()

            ####################################################
            ### OPTIONAL - ALGORITHM TO REASSIGN CHUNKING ######
            ####################################################
            if True:

                # makes scores based on .slp output? (to check)
                track.get_scores()

                # uses track_spines to break up all the data into continuous chunks
                track.max_jump_single_frame = 30  # max distance in pixels (?) that an animal can move in a single frame
                track.make_tracks_chunks()        

                # deletig very short chunks of track that are orphaned..
                min_chunk_len = 5
                track.del_short_chunks(min_chunk_len)

                ############## FIX TRACKS PARAMS #############
                track.time_threshold = 25       # window to search for nearest chunks, about 1sec seems fair...
                track.safe_chunk_length = 15    # chunks this long will not change id
                track.min_chunk_len = 4         # min length of chukn to be used for anchoring/correcting
                track.max_distance_merge = 75   # max pix diff allowed for merging when using model; not just for neighbouring frames
                # track.memory_length = 25      # how many frames back is it ok to remember a prev animal
                track.verbose = False
                track.update_tracks = True

                # parameters for fixing track chunking
                track.max_time_automerge = 3      # time to automerget chunks from same animal ???
                track.max_dist_automerge = 25     # distance to auto merge chunks from same animal separated by single time skip


                track.fix_tracks()

            else:
                # recompute spine centres from scratch
                track.get_track_spine_centers()

                #
                #print ("tracks loaded: ", track.tracks_spine.shape)

            ####################################################
            ### OPTIONAL - MEDIAN FILTER SPINE CENTRES #########
            ####################################################
            if False:
                track.filter_tracks_spines() 

            
            # scale the data if in cohorts3 or 4
            if 'cohort3' in fname_slp_clean or 'cohort4' in fname_slp_clean:
                track.tracks_spine = track.tracks_spine/video_scaling_factor
            
            #
            np.save(fname_slp_clean, track.tracks_spine)
        #
        else:
        #except:
            print ("could not compute : ", animal_id, " for ", fname_slp,)
            #print(f'{fname_slp_clean} could not be computed')
        
    return no_track_files

In [None]:
#

import parmap

#
data_path = "/mnt/b3a68699-495d-4ebb-9ab1-ac74f11c68c5/gerbil/cohort2/database.xlsx"
df = pd.read_excel(data_path)

#
dir_path = "/mnt/b3a68699-495d-4ebb-9ab1-ac74f11c68c5/gerbil/cohort2"

#
animal_ids = [[0,1], [1,0], [0,2], [2,0], [0,3], [3,0], [0,4], [4,0], [0,5], [5,0],
              [1,2], [2,1], [1,3], [3,1], [1,4], [4,1], [1,5], [5,1],
              [2,3], [3,2], [2,4], [4,2], [2,5], [5,2],
              [3,4], [4,3], [3,5], [5,3],
              [4,5], [5,4]]

# load the rows from the dataframe spreadsheet
rows = []
for _, row in df.iterrows():
    rows.append(row)

  # use parallel option if working
if True:
    results = parmap.map(process_simba_files_parallel,
               rows, 
               dir_path, 
               animal_ids,
               pm_processes=16,
               pm_pbar=True)
    
# use single core option to start/debug
else:
    for row in tqdm(rows):
        process_simba_files_parallel(row,
                                     dir_path,
                                     animal_ids)
        
no_tracks = [item for sublist in results if sublist is not None for item in sublist]
np.save('/home/cat/Downloads/no_tracks.npy', np.array(no_tracks))

#
print ("DONE...")

In [13]:
test = np.load('/home/cat/Downloads/no_tracks.npy')
test

array([], dtype=float64)

In [None]:
import os
import glob
import re
import numpy as np

directory = "/mnt/b3a68699-495d-4ebb-9ab1-ac74f11c68c5/gerbil/cohort3/simba/slp_files"

files = glob.glob(os.path.join(directory, '*_cleaned.npy'))

for file in tqdm(files, desc="Processing files", unit="file"):
    
    try:
        #
        video = np.load(file)

        # change the order of the animals
        video_reordered = video[:, [1, 0], :]

        # extract the animal numbers from the file name
        match = re.search(r'(.*_Day_)(\d+)_(\d+)(_cleaned.npy)', os.path.basename(file))
        if match:
            prefix, animal1, animal2, suffix = match.groups()

            # form the new filename
            new_file = f'{prefix}{animal2}_{animal1}{suffix}'
            new_file_path = os.path.join(directory, new_file)

            # check if the new file already exists
            if os.path.exists(new_file_path):
                # print(f"File {new_file} already exists. Skipping...")
                continue

            # save the reordered array to a new file
            np.save(new_file_path, video_reordered)
        else:
            print ("couldn't find match: ", file)
            
    except:
        print(file)

print('DONE')


In [5]:

fname = '/mnt/b3a68699-495d-4ebb-9ab1-ac74f11c68c5/gerbil/cohort4/features/2020_09_23_09_06_31_558986_compressed_Day.slp'

d = np.load(fname[:-4]+'.npy')

print (d.shape)

(86407, 1, 1, 2)


In [6]:
plt.figure()
for k in range(1):
    plt.plot(d[:,k,0],
             d[:,k,1])

plt.show()

IndexError: index 1 is out of bounds for axis 2 with size 1

In [None]:
import parmap

#
data_path = "/mnt/b3a68699-495d-4ebb-9ab1-ac74f11c68c5/gerbil/cohort3/database.xlsx"
df = pd.read_excel(data_path)

#
dir_path = "/mnt/b3a68699-495d-4ebb-9ab1-ac74f11c68c5/gerbil/cohort3"

#
animal_ids = [[0,1], [0,2], [0,3], [0,4], [0,5],
              [1,2], [1,3], [1,4], [1,5],
              [2,3], [2,4], [2,5],
              [3,4], [3,5],
              [4,5]]

#
rows = []
for _, row in df.iterrows():
    rows.append(row)

#
print (rows[0])

#
print ("len(rows): ", len(rows))

#
for row in rows:
    print (row['Slp filename'])