In [3]:
import os
import numpy as n
from matplotlib import pyplot as plt
import pickle

In [4]:
from modules import utils

### Set up data directories

In [11]:
# The root directory where the two folders are contained. 
# You can leave this as is for now, it will work since the data directory that contains the 
# coordinates and annotations is contained within this repository. If you download the dropbox
# folder to your computer and want to use that data, you should set data_root_dir to the root 
# directory of the dropbox folder.
data_root_dir = 'data'

# These are hardcoded to corrspond to the folder names in the downloaded dataset.
# The first is the folder of files containing coordinates, second is annotations.
dlc_dir = 'postprocessedXYCoordinates'
ann_dir = 'manualannotations'

### Load in and format the data

In [29]:
# Using os.path.join to join names of folders when building a path is better practice
# than doing it manually with something like path = data_root + '/' + dlc_dir
dlc_path = os.path.join(data_root, dlc_dir)
dlc_files = os.listdir(dlc_path)
ann_path = os.path.join(data_root, ann_dir)
ann_files = os.listdir(ann_path)

print("Looking for DLC files in : %s" % dlc_path)
print(("\tFound: " + "\n\t\t%s" *len(dlc_files))% tuple(dlc_files))
print("Looking for Annotation files in : %s" % ann_path)
print(("\tFound: " + "\n\t\t%s" *len(ann_files))% tuple(ann_files))

Looking for DLC files in : data/postprocessedXYCoordinates
	Found: 
		Female1.npy
		Female2.npy
		Female4.npy
		Male1.npy
		Male2.npy
		Male3.npy
Looking for Annotation files in : data/manualannotations
	Found: 
		Annotated_Female1.npy
		Annotated_Female2.npy
		Annotated_Female4.npy
		Annotated_Male1.npy
		Annotated_Male2.npy
		Annotated_Male3.npy


In [66]:
# This bunch of code loads in and formats the data... don't worry too much about it, just let it do its thing! 
# The output is a dictionary called all_data, where there is a key for each of the experiments
all_data = {}
print("Loading files: ")
for f_name in dlc_files:
    if f_name[-3:] != 'npy':
        continue
        
    dlc_file=os.path.join(dlc_path, f_name)
    ann_file=os.path.join(ann_path, 'Annotated_' + f_name)
    print("\t" + f_name + "\n\tAnnotated_" + f_name)
    data_dlc = n.load(dlc_file)
    data_ann = n.load(ann_file)
    labels = data_dlc[0]
    dtype = [('t', n.int), ('ann', 'U30')]
    i = 0
    for label in data_dlc[0]:
        i += 1
        coord = 'x' if i % 2 == 0 else 'y'
        dtype += [(label + '_' + coord , n.float32 )]

    data_concat = n.concatenate((data_ann, data_dlc[1:]),axis=1)
    data = n.array(n.zeros(data_concat.shape[0]), dtype = dtype)
    for i in range(data_concat.shape[1]):
        data[dtype[i][0]] = data_concat[:, i]
    all_data[f_name[:-4]] = data

Loading files: 
	Female1.npy
	Annotated_Female1.npy


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = [('t', n.int), ('ann', 'U30')]


	Female2.npy
	Annotated_Female2.npy
	Female4.npy
	Annotated_Female4.npy
	Male1.npy
	Annotated_Male1.npy
	Male2.npy
	Annotated_Male2.npy
	Male3.npy
	Annotated_Male3.npy


In [80]:
f = open(os.path.join(data_root_dir, 'all_data.p'), 'wb')
pickle.dump(all_data, f)