In [None]:
# Initialize a new VAME project
import vame
import functions_bondjango as bd
import functions_vame as fv
import paths
import random
import os
import shutil
import h5py
import numpy as np
import importlib
import pandas as pd

In [None]:
# Manually define the config path if project has already been created
# config = r"D:\VAME_projects\VAME_prey_6-Apr28-2021\config.yaml"
# config = r"D:\VAME_projects\VAME_prey_7-Apr29-2021\config.yaml"
# config = r"D:\VAME_projects\VAME_prey_15dim-May2-2021\config.yaml"
# config = r"D:\VAME_projects\VAME_prey_mouse15dim-May14-2021\config.yaml"
config = r"D:\VAME_projects\VAME_prey_15dim-May17-2021\config.yaml"
# config = r"D:\VAME_projects\VAME_prey_mouse15dim-Oct12-2021\config.yaml"
# config = r"D:\VAME_projects\VAME_prey_mouse15dim-Oct25-2021\config.yaml"
# config = r"D:\VAME_projects\VAME_prey_speeds15dim-Jun27-2022\config.yaml"

In [None]:
# vame.motif_videos(config)
vame.motif_videos(config, videoType='.avi')

In [None]:
# importlib.reload(bd)

# # target_string = 'mouse:DG_210323_b'
# target_string = 'slug:dg_210323_b'
# # target_string = 'slug:05_31_2021_10_44_32_miniscope_dg_210323_b_succ_head'

# # target_model = 'video_experiment'
# target_model = 'analyzed_data'

# # results = bd.query_database(target_model, target_string)
# results = bd.delete_multiple(target_model, target_string)

# print(results)

In [None]:
%%time
# get the file paths

# define the number of files to use
number_files = 750
# define the search string
search_string = ['rig:miniscope, result:succ', 'rig:miniscope, result:fail']
# search_string = ['rig:miniscope']
# search_string = ['slug:06_02_2021_09_33_34_miniscope_dg_210323_b_succ_head']

# define the target model
target_model = 'video_experiment'

# define the columns to check for nans
nan_columns = [0, 2, 4, 6, 8, 10, 12, 14]

# allocate memory for the files
files = []

# for all the queries
for query in search_string:
    # get the queryset
    files.append(bd.query_database(target_model, query))

# concatenate the list
files_list = [item for sublist in files for item in sublist]
print(len(files_list))

# allocate memory for the cleaned up files and analysis paths
files = []
analysis_paths = []
# set the columns flag
column_flag = True
# get rid of the files that are too short for pose segmentation
# for all the files
for el in files_list:
    # get the path to the analyzed data file
    dlc_path = el['avi_path']
    dlc_path = dlc_path.replace('.avi', '_rawcoord.hdf5')
    dlc_path = dlc_path.replace('VideoExperiment', 'AnalyzedData')
    dlc_path = dlc_path.replace('VRExperiment', 'AnalyzedData')
    # open the file using h5py, as can't install pytables in this env
    with h5py.File(dlc_path, 'r') as f:
        # check if it's a bad file
        first_string = np.array(f['full_traces/block0_items']).astype(str)[0]
        if first_string == 'badFile':
            continue
        # parse the bounds from the file
        bounds = np.array(f['frame_bounds/block0_values'])[0]
        # get also the rest of the data to check for nans
        try:
            data = np.array(f['matched_calcium/block0_values'])
        except KeyError:
            data = np.array(f['full_traces/block0_values'])
#         print(data.columns)
#         raise ValueError
        try:
            column_sum = any(np.sum(np.isnan(data[:, nan_columns]), axis=0)==data.shape[0])
        except IndexError:
            print(data)
            print(dlc_path)
            print(f['full_traces/block0_values'])
            print(f['full_traces/block0_items'])
            print(np.array(f['full_traces/block0_items']).astype(str))
            raise ValueError
        if column_sum == 1:
            print(f'The file {dlc_path} has only nans in the behavior')
        # if it's the first iteration and a success, get the column names
        if (column_flag) & ('succ' in dlc_path):
            # get the column names
            columns_all = np.array(f['full_traces/block0_items']).astype(str)
            # select the columns with mouse and x or y on them
            column_list = [el for el in columns_all if (('x' in el) or ('y' in el))]
#             column_list = [el for el in column_list if 'mouse' in el]
            # set the flag to off
            column_flag = False
#         values = np.array(f['frame_bounds/block0_values'])[0]
#         labels = np.array(f['frame_bounds/block0_items']).astype(str)
#         bounds = pd.DataFrame(values.reshape([1, 3]), columns=labels)
        # add the file to the final list only if it's long enough
#         if (bounds.loc[0, 'end'] - bounds.loc[0, 'start']) > 50:
        if ((bounds[1] - bounds[0]) > 50) & (column_sum == 0):
            files.append(el)
            analysis_paths.append(dlc_path)

if number_files > 0:
    # pick a random subset of number_files
    random_indexes = random.sample(range(len(files)), number_files)
    files = [files[el] for el in random_indexes]
    analysis_paths = [analysis_paths[el] for el in random_indexes]

# get the video paths in a list
video_paths = [el['avi_path'] for el in files]

# define the working directory
working_directory = paths.vame_path
print(len(video_paths))
print(len(analysis_paths))

In [None]:
%%time

# initialize the project
config = vame.init_new_project(project='VAME_prey_speeds15dim', 
                               videos=video_paths, working_directory=working_directory, videotype='.avi')

In [None]:
# # test alignment function
# # define the target example file
# example_file = r"J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\03_29_2021_09_17_07_miniscope_DG_210202_a_succ_preproc.hdf5"

# with h5py.File(example_file, 'r') as f:
#     # parse the bounds from the file
#     try:
#         data = np.array(f['matched_calcium/block0_values'])
#         labels = np.array(f['matched_calcium/block0_items']).astype(str)
#     except KeyError:
#         data = np.array(f['full_traces/block0_values'])
#         labels = np.array(f['full_traces/block0_items']).astype(str)

#     dataframe = pd.DataFrame(data, columns=labels)
# #     filename = el['slug']
#     filename = os.path.splitext(os.path.basename(example_file))[0]
#         # define the cropping
# #     crop_size = (1, 1)
# #     # egocentrically align the data and save
# #     fv.run_alignment(dlc_path, project_folder, '.avi', crop_size)
# # get the egocentrically aligned coordinates
# aligned_traj, frames = vame.egocentric_alignment(config, pose_ref_index=[0, 7], crop_size=(200, 200), 
#                                                  use_video=False, video_format='.avi', 
#                                                  check_video=False, save_flag=False, 
#                                                  filename=[filename], column_list=column_list,
#                                                  dataframe=[dataframe], 
#                                                  extra_columns=['mouse_speed'])

In [None]:
%%time

# importlib.reload(vame)
# Align video trajectories
# get the project folder
project_folder, _ = os.path.split(config)
# get the columns to use (only mouse in this case)


# for all the videos
for idx, el in enumerate(files):
    print(f'Current file: {el["slug"]}')
    
#     # if there is no preprocessing file, skip and warn
#     if len(el['preproc_files']) == 0:
#         print('no preprocessing file found')
#         continue
#     # load the avi file path
#     dlc_path = el['avi_path']
#     # edit it for the preprocessing path
#     dlc_path = dlc_path.replace('.avi', '_preproc.hdf5')
#     dlc_path = dlc_path.replace('VideoExperiment', 'AnalyzedData')
#     dlc_path = dlc_path.replace('VRExperiment', 'AnalyzedData')
    # get the path from the list calculated above
    # dlc_path = analysis_paths[idx]
    with h5py.File(analysis_paths[idx], 'r') as f:
        # parse the bounds from the file
        try:
            data = np.array(f['matched_calcium/block0_values'])
            labels = np.array(f['matched_calcium/block0_items']).astype(str)
        except KeyError:
            data = np.array(f['full_traces/block0_values'])
            labels = np.array(f['full_traces/block0_items']).astype(str)
        
        dataframe = pd.DataFrame(data, columns=labels)
        filename = el['slug']
        # define the cropping
#     crop_size = (1, 1)
#     # egocentrically align the data and save
#     fv.run_alignment(dlc_path, project_folder, '.avi', crop_size)
    # get the egocentrically aligned coordinates
    aligned_traj, frames = vame.egocentric_alignment(config, pose_ref_index=[0, 7], crop_size=(200, 200), 
                                                     use_video=False, video_format='.avi', 
                                                     check_video=False, save_flag=True, 
                                                     filename=[filename], column_list=column_list,
                                                     dataframe=[dataframe], 
                                                     extra_columns=['mouse_speed', 'cricket_0_speed'])

In [None]:
%%time
# Create the training set

vame.create_trainset(config)

In [None]:
importlib.reload(vame)

In [None]:
# Train the network

# remember to set the parameters in the config.yaml

vame.train_model(config)

In [None]:
# Evaluate the network

vame.evaluate_model(config)

In [None]:
%%time
# Segment behavior

# vame.behavior_segmentation(config, model_name='VAME_prey_model', cluster_method='kmeans', n_cluster=[30])
vame.pose_segmentation(config)

In [None]:
# Visualize embedding
# vame.visualization(config, label=None)

In [None]:
# Create motif videos
# vame.motif_videos(config, videoType='.avi')

In [None]:
# Community analysis

# vame.community(config, show_umap=False, cut_tree=None)

In [None]:
# Quantify behavior

# vame.behavior_quantification(config, model_name='VAME_prey_model', cluster_method='kmeans', n_cluster=30)