Please read the TODOs as they may be crucial depending on the dataset you are using to train. Configs are not specific to a dataset so choose those as you so appropriate

In [92]:
from baseline_model import *
from pathlib import Path 
import os
import math

In [64]:
root = Path('../../..')

config = {}
config['k_nearest_baboons'] = 4 #getting the four nearest baboons per frame to consider(MUST BE EVEN)
config['k_historic_velocities'] = 4 #getting the four past velocities to consider
config['saved_path'] = 'saved_weights.pth' #path to save current model information (epoch, weights, etc...)
config['learning_rate'] = 0.001 #lr
config['batch_size'] = 32 # Number of training samples per batch to be passed to network
config['val_split'] = 0.2 #ratio of val and training data
config['shuffle_dataset'] = True #shuffle data before start (TODO: change this if we use kfold?)
config['epochs'] = 100  # Number of epochs to train the model
config['early_stop'] = True  # Implement early stopping or not
config['early_stop_epoch'] = 3 # Number of epochs for which validation loss increases to be counted as overfitting
config['input_training_data'] = root / 'output/DJI_0870_velocity.csv'

#DO NOT CHANGE THE FOLLOWING 
config['input_dimension'] = config['k_nearest_baboons'] + config['k_historic_velocities'] #DON'T TOUCH THIS
config['validation_loss_path'] = '' #leave empty 
config['training_loss_path'] = '' #leave empty 

#config checks
assert( config['k_nearest_baboons'] % 2 == 0 )
assert( os.path.isfile(config['input_training_data']) == True)


## Initialize table for getting nearest baboons and their respective velocities

In [93]:
#This is assuming the obvious assumption that a baboon only has one position per frame
#TODO : Assuming data is ordered by frame (increasing timesteps/frames)
training_data = pd.read_csv(config['input_training_data'])

frames_to_velocities = {} #Maps a single frame to a list of all the baboons, their id, position, and velocity

for idx, row in training_data.iterrows():
    current_frame = row.loc['frame']
    current_position = (row.loc['centroid_x'], row.loc['centroid_y'])
    current_velocity = row.loc['velocity']
    current_id = row.loc['baboon id']
    
    if current_frame not in frames_to_velocities:
        frames_to_velocities[current_frame] = []
    
    frames_to_velocities[current_frame].append((current_position, current_velocity, current_id))

"""
Returns the K physically nearest baboons to baboon_id at the given frame

Pre-pends with negative ones if there are less than k nearest baboons in the frame
"""
def get_k_nearest_baboons_velocities(frame, baboon_id):
    baboons_in_frame = frames_to_velocities[frame].copy()
    output = []
    
    target_position = (-1, -1)
    
    #get rid of the baboon we are considering
    for idx, baboon in enumerate(baboons_in_frame):
        if baboon[2] == baboon_id:
            target_velocity = baboons_in_frame[idx][0]
            del baboons_in_frame[idx]
            break
      
    if len(baboons_in_frame) <= config['k_nearest_baboons']:
        padded_output = list(np.ones(config['k_nearest_baboons'] - len(baboons_in_frame)) * -1)
        return padded_output.extend([item[1] for item in baboons_in_frame])
    
    for k_nearest_neighbor in range(config['k_nearest_baboons']):
        min_distance = math.inf
        min_velocity = -1
        min_idx = -1
        
        for idx, baboon in enumerate(baboons_in_frame):
            current_distance = np.linalg.norm(np.array(baboon[0]) - np.array(target_velocity))
            if current_distance < min_distance:
                min_distance = current_distance
                min_velocity = baboon[1]
                min_idx = idx
        
        output.append(min_velocity)
        del baboons_in_frame[min_idx]
        
    return output
            
frames_to_velocities[1]

[((2125.4, 1057.18), 51.074506175125435, 10.0),
 ((2053.7, 939.645), 0.44955044955004064, 11.0),
 ((2045.13, 905.955), 13.382885251805144, 12.0),
 ((2007.215, 900.905), 0.6178479958454139, 13.0),
 ((2161.0550000000007, 941.54), 3.7101154055304586, 14.0),
 ((2278.565, 1158.07), 6.985331880593899, 15.0),
 ((2314.695, 1177.34), 12.901044905119257, 16.0),
 ((2119.07, 809.89), 19.39097995929645, 17.0),
 ((1908.015, 763.5), 1.8776169959325903, 18.0),
 ((1923.885, 781.475), 6.434838327744581, 19.0),
 ((1950.245, 810.35), 1.6483516483603706, 20.0),
 ((1948.19, 826.1949999999998), 2.1455276318607357, 21.0),
 ((2105.445, 1237.58), 1.2081305317123932, 22.0),
 ((2124.105, 1239.65), 0.9477355624850088, 23.0),
 ((2490.31, 1190.72), 3.146853146853692, 24.0),
 ((2487.08, 1277.885), 1.0805847978347027, 25.0),
 ((1707.42, 1122.74), 0.14985014984660636, 26.0),
 ((1785.4699999999998, 1085.1), 15.050804630763162, 27.0),
 ((1800.63, 1090.315), 3.8526354042502913, 28.0),
 ((1865.715, 1096.415), 2.57811946064

In [94]:
get_k_nearest_baboons(1, 10)

[0.2997002997068415,
 0.9115028766610744,
 3.7101154055304586,
 0.44955044955004064]

## To get the historic velocities per baboon

In [80]:
#TODO : Must check for 'extended' periods of discountinuous frames for future potentially sparese labeled data
#TODO : Assuming data is ordered by frame (increasing timesteps/frames)
#TODO : Warning : if there are not enough K historic frames then we pre pad with negative ones

baboons_to_velocities = {} #maps the baboon's id to it's velocties and respective frames in a tuple

for idx, row in training_data.iterrows():
    current_frame = row.loc['frame']
    current_position = (row.loc['centroid_x'], row.loc['centroid_y'])
    current_velocity = row.loc['velocity']
    current_id = row.loc['baboon id']
    
    if current_id not in baboons_to_velocities:
        baboons_to_velocities[current_id] = []
        
    baboons_to_velocities[current_id].append((current_velocity, current_frame))

"""
Given a frame returns the k past velocities of the given baboon_id

Pre pads with negative ones if there are less than k previous frames available in the labeled data
"""
def get_k_past_velocities(baboon_id, current_frame):
    velocities = baboons_to_velocities[baboon_id]
    
    #annoying but I don't know how to do this easier
    frame_index = -1
    for idx, frame in enumerate(velocities):
        if frame[1] == current_frame:
            frame_index = idx
            break
            
    if frame_index == -1:
        raise RuntimeError('Frame does not exist in dataset for this baboon_id')
        
    if frame_index < config['k_historic_velocities']:
        padded_output = list(np.ones(config['k_historic_velocities'] - frame_index) * -1)   
        padded_output.extend([item[0] for item in velocities[:frame_index]])
        return padded_output
    
    else:
        return [item[0] for item in velocities[frame_index-config['k_historic_velocities'] : frame_index]]
    
baboons_to_velocities[0]

[(12.85046947525376, 21.0),
 (12.97826878451604, 22.0),
 (12.644369496333105, 23.0),
 (12.850469475230561, 24.0),
 (12.85046947525376, 25.0),
 (12.85046947522877, 26.0),
 (12.850469475230561, 27.0),
 (12.850469475242162, 28.0),
 (12.978268784517812, 29.0),
 (12.723151376903115, 30.0),
 (12.85046947522877, 31.0),
 (12.978268784519587, 32.0),
 (12.850469475240375, 33.0),
 (12.723151376903115, 34.0),
 (12.978268784517812, 35.0),
 (12.85046947522877, 36.0),
 (12.723151376891556, 37.0),
 (12.85046947525376, 38.0),
 (12.97826878451604, 39.0),
 (12.850469475232348, 40.0),
 (12.723151376903115, 41.0),
 (12.978268784517812, 42.0),
 (12.850469475240375, 43.0),
 (12.850469475230561, 44.0),
 (12.850469475230561, 45.0),
 (12.850469475240375, 46.0),
 (12.85046947524395, 47.0),
 (12.85046947522877, 48.0),
 (12.850469475230561, 49.0),
 (12.850469475242162, 50.0),
 (50.51439438630578, 51.0),
 (50.56171446434248, 52.0),
 (50.51439438631872, 53.0),
 (50.41962099716843, 54.0),
 (50.41962099717059, 55.0),


In [81]:
print(get_k_past_velocities(0, 22))

found at 1
[-1.0, -1.0, -1.0, 12.85046947525376]
