In [3]:
import matplotlib
%matplotlib tk
%autosave 180
%load_ext autoreload
%autoreload 2

#
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# 
import matplotlib.cm as cm
import numpy as np
import os
from tqdm import trange
import pandas as pd

from utils import plot_me, smooth_traces, generate_track_interactions

from track import Track


Autosaving every 180 seconds


In [59]:
##########################################################################
####################### HELPER FUNCTIONS #################################
##########################################################################

#
def parse_spreadsheet_return_unique_vids_and_starts_ends(header, 
                                                         dataframe):

    # get animal ids first
    animal_labels = []
    for k in range(1,len(header),2):
        temp = header[k]
        animal_labels.append(temp)

    #print ("animal_labels: ", animal_labels)

    # get starts and ends next
    starts_ends = []
    video_names = []
    animal_ids = []
    data_npy = dataframe.to_numpy()
    #print (data_npy.shape)

    #
    for k in range(data_npy.shape[0]):
        vid_name = data_npy[k][0]
        for p in range(1,data_npy.shape[1],2):
            #print (data_npy[k,p])
            if data_npy[k,p]!=0:
                starts_ends.append([data_npy[k,p],data_npy[k,p+1]])
                video_names.append(vid_name)
                animal_ids.append(animal_labels[p//2])

    #
    video_names = np.array(video_names)
    starts_ends = np.array(starts_ends)
    animal_ids = np.array(animal_ids)

    # #
    # for k in range(len(starts_ends)):
    #     print (video_names[k], 
    #            starts_ends[k],
    #            animal_ids[k])

    # gather all video-animal pairs that are the same 
    unique_vids = np.unique(video_names)
    unique_ids = np.unique(animal_ids)
    #print (unique_ids)

    # 
    ctr=0
    final_arrays= []
    final_arrays.append([])
    for unique_vid in unique_vids:
        for unique_id in unique_ids:
           
            
            # loop over all vids
            se_array = []
            for k in range(starts_ends.shape[0]):
                if video_names[k] == unique_vid:
                    if animal_ids[k] == unique_id:
                        se_array.append(starts_ends[k])
            #
            if len(se_array)>0:
                final_arrays[ctr].append(unique_vid)
                final_arrays[ctr].append(unique_id)
                final_arrays[ctr].append(np.vstack(se_array).squeeze())
                final_arrays.append([])
                ctr+=1
    
    if len(final_arrays[-1])==0:
        del(final_arrays[-1])

    # 
    print ("                                UNIQUE VIDEO ID           ANIMAL IDS      STARTS-ENDS ARRAYS")
    for k in range(len(final_arrays)):
        print (" unique video - pair: #", k, ": ", final_arrays[k])

    return final_arrays

#
def make_pairwise_npy_files_for_simba(final_arrays,
                                     root_dir):
    #
    for dataset in final_arrays:

        #
        fname = dataset[0]

        #
        animal_ids = np.int32([dataset[1][0],dataset[1][2]])

        #
        starts_ends = dataset[2]

        fname_out = os.path.join(root_dir,'simon_npy_arrays_july29_2022',
                            fname+"_animals_"+str(animal_ids[0])+"_"+str(animal_ids[1])+'.npy')
        
        #
        if os.path.exists(fname_out):
            continue
        
        #
        print ("PROCESSING: ")
        print (fname)
        print (animal_ids)
        print (starts_ends)
        
        #
        track = Track(os.path.join(root_dir, 
                                   fname+'_compressed_Day.slp'))
        track.animal_ids = animal_ids
        track.tracks_names = ['female','male','pup1','pup2','pup3','pup4']
        track.recompute_spine_centres=True
        track.verbose = True                         # gives additional printouts
        track.n_animals = len(track.animal_ids)      # number of animals
        track.filter_width = 10                      # this is the median filter width in frames; e.g. 10 ~=0.4 seconds
                                                     # higher values provide more stability, but less temporally precise locations
        # 
        track.load_tracks()

        #
        data_out = np.zeros((track.tracks_spine.shape[0],
                             2,2,2))

        #
        data_out[:,:,:,0] = track.tracks_spine[:,animal_ids]

        #
        try:
            for se in starts_ends:
                data_out[se[0]:se[1],:,:,1]=1
        except:
            print ("single exmaple only")
            data_out[starts_ends[0]:starts_ends[1],:,:,1]=1

        #
        np.save(fname_out, data_out)
        
        #
        print ("")
        

In [60]:
##############################################################
################# LOAD DATA FRAME AND ID PAIRS ###############
##############################################################

# these are the ids of the gerbils that Lisa and Jennifer decided to label
# ids 0->5 correspond to female->pup4
id_pairs = [
    [0,1],
    [1,0],
    [4,5],
    [5,4],
    [0,2],
    [2,0],
    [1,3],
    [3,1]
]

fname = "/home/cat/Documents/simba_spreadsheet_approach_ids.ods"

#
dataframe = pd.read_excel(fname)
header = dataframe.columns.tolist()

#
pd.read_excel("/home/cat/Documents/simba_spreadsheet_approach_ids.ods", engine="odf")



Unnamed: 0,video_name,"0,1","0,1.1","1,0","1,0.1","4,5","4,5.1","5,4","5,4.1","0,2","0,2.1","2,0","2,0.1","1,3","1,3.1","3,1","3,1.1"
0,2020_08_01_11_02_57_239373,6459,6470,148,185,220,290,14233,14244,5350,5370,28170,28181,11667,11685,16030,16047
1,2020_08_01_11_02_57_239373,0,0,2286,2354,1054,1072,14250,14264,0,0,0,0,17820,17845,18245,18284
2,2020_08_01_11_02_57_239373,0,0,6188,6202,7642,7670,21016,21027,0,0,0,0,0,0,21942,21969
3,2020_08_01_11_02_57_239373,0,0,15200,15223,7709,7725,0,0,0,0,0,0,0,0,0,0
4,2020_08_01_11_02_57_239373,0,0,17903,17940,13264,13280,0,0,0,0,0,0,0,0,0,0
5,2020_08_01_11_02_57_239373,0,0,18788,18858,13324,13343,0,0,0,0,0,0,0,0,0,0
6,2020_08_01_11_02_57_239373,0,0,25765,25780,14890,14913,0,0,0,0,0,0,0,0,0,0
7,2020_08_01_11_02_57_239373,0,0,0,0,17845,17886,0,0,0,0,0,0,0,0,0,0
8,2020_08_01_11_02_57_239373,0,0,0,0,18936,18967,0,0,0,0,0,0,0,0,0,0
9,2020_08_01_11_02_57_239373,0,0,0,0,19648,19664,0,0,0,0,0,0,0,0,0,0


In [61]:
#########################################################################
################# CONVERT SPREADSHEET TO 2 COLUMN VERSION ###############
#########################################################################
final_arrays = parse_spreadsheet_return_unique_vids_and_starts_ends(header, dataframe)

  
            

                                UNIQUE VIDEO ID           ANIMAL IDS      STARTS-ENDS ARRAYS
 unique video - pair: # 0 :  ['2020_07_21_11_48_16_485044', '0,1', array([[15357, 15377],
       [21460, 21490],
       [27547, 27582]])]
 unique video - pair: # 1 :  ['2020_07_21_11_48_16_485044', '0,2', array([[11108, 11141],
       [11740, 11758],
       [18730, 18747],
       [21460, 21477]])]
 unique video - pair: # 2 :  ['2020_07_21_11_48_16_485044', '1,0', array([[ 5775,  5801],
       [ 7128,  7160],
       [15263, 15277],
       [18336, 18354]])]
 unique video - pair: # 3 :  ['2020_07_22_11_48_50_324539', '0,1', array([[ 2283,  2303],
       [18974, 18991],
       [25428, 25466]])]
 unique video - pair: # 4 :  ['2020_07_22_11_48_50_324539', '0,2', array([[1544, 1554],
       [2242, 2255]])]
 unique video - pair: # 5 :  ['2020_07_22_11_48_50_324539', '1,0', array([[  378,   400],
       [12134, 12164],
       [13267, 13284],
       [20861, 20895]])]
 unique video - pair: # 6 :  ['2020_0

In [62]:
###########################################################################################
########## GENERATE .NPY FILES FROM .SLP FOR EACH UNIQUE VID-PAIR AND NUMPY ARRAYS ########
###########################################################################################

##############################################################
root_dir = '/media/cat/256GB/dan/presentation/'

# break
make_pairwise_npy_files_for_simba(final_arrays,
                                     root_dir)  
print ("DONE...")

DONE...
