# Heatmap Notebook

I'm using this notebook to play around with displays of the data, get a better sense of what the data looks like, and develop an architecture that helps us visualize positional data in the format of a thermal heatmap.

## Table of Contents:


1. Looking at the data to understand it's innate formatting and brainstorm about how to process and manipulate it
2. Processing and manipulating the data to get it in the right format
3. Developing and OOP architecture to load, filter, and display the data
4. Writing a main with a simple call to the plot function (after loading and instantiating) to create the heatmap

In [54]:
import pandas as pd
import matplotlib as plt
import plotly as ply
import numpy as np

In [55]:
# read csv as pandas dataframe
pilot_df = pd.read_csv("k934 Pilot Position Data.csv")
pilot_df.head() # display first 5 lines

Unnamed: 0,scorer,DLC_mobnet_100_k934Jan28shuffle1_100000,DLC_mobnet_100_k934Jan28shuffle1_100000.1,DLC_mobnet_100_k934Jan28shuffle1_100000.2,DLC_mobnet_100_k934Jan28shuffle1_100000.3,DLC_mobnet_100_k934Jan28shuffle1_100000.4,DLC_mobnet_100_k934Jan28shuffle1_100000.5,DLC_mobnet_100_k934Jan28shuffle1_100000.6,DLC_mobnet_100_k934Jan28shuffle1_100000.7,DLC_mobnet_100_k934Jan28shuffle1_100000.8,DLC_mobnet_100_k934Jan28shuffle1_100000.9,DLC_mobnet_100_k934Jan28shuffle1_100000.10,DLC_mobnet_100_k934Jan28shuffle1_100000.11
0,bodyparts,bodypart1,bodypart1,bodypart1,bodypart2,bodypart2,bodypart2,bodypart3,bodypart3,bodypart3,objectA,objectA,objectA
1,coords,x,y,likelihood,x,y,likelihood,x,y,likelihood,x,y,likelihood
2,0,183.74688720703125,267.8570251464844,0.9984562397003174,165.04798889160156,286.6018371582031,0.9676482081413269,185.1376190185547,288.53961181640625,0.9937425255775452,169.37730407714844,390.68994140625,0.9994144439697266
3,1,188.9870147705078,250.45738220214844,0.9965400695800781,171.54673767089844,267.5286560058594,0.9560951590538025,187.74215698242188,274.10040283203125,0.9912405610084534,167.10882568359375,370.994873046875,0.9999550580978394
4,2,195.9990692138672,231.02508544921875,0.9989144802093506,176.02798461914062,248.64425659179688,0.9965523481369019,194.4108123779297,257.8283996582031,0.9991292357444763,166.38233947753906,353.5351867675781,0.9999170303344727


In [56]:
pilot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18019 entries, 0 to 18018
Data columns (total 13 columns):
 #   Column                                      Non-Null Count  Dtype 
---  ------                                      --------------  ----- 
 0   scorer                                      18019 non-null  object
 1   DLC_mobnet_100_k934Jan28shuffle1_100000     18019 non-null  object
 2   DLC_mobnet_100_k934Jan28shuffle1_100000.1   18019 non-null  object
 3   DLC_mobnet_100_k934Jan28shuffle1_100000.2   18019 non-null  object
 4   DLC_mobnet_100_k934Jan28shuffle1_100000.3   18019 non-null  object
 5   DLC_mobnet_100_k934Jan28shuffle1_100000.4   18019 non-null  object
 6   DLC_mobnet_100_k934Jan28shuffle1_100000.5   18019 non-null  object
 7   DLC_mobnet_100_k934Jan28shuffle1_100000.6   18019 non-null  object
 8   DLC_mobnet_100_k934Jan28shuffle1_100000.7   18019 non-null  object
 9   DLC_mobnet_100_k934Jan28shuffle1_100000.8   18019 non-null  object
 10  DLC_mobnet_100_k934Jan

In [57]:
# transpose for groupby
pilot_T = pilot_df.T
#pilot_T.drop('scorer')

pilot_df = pilot_df.drop(columns=['scorer'])
pilot_df = pilot_df.drop([0])
pilot_df

Unnamed: 0,DLC_mobnet_100_k934Jan28shuffle1_100000,DLC_mobnet_100_k934Jan28shuffle1_100000.1,DLC_mobnet_100_k934Jan28shuffle1_100000.2,DLC_mobnet_100_k934Jan28shuffle1_100000.3,DLC_mobnet_100_k934Jan28shuffle1_100000.4,DLC_mobnet_100_k934Jan28shuffle1_100000.5,DLC_mobnet_100_k934Jan28shuffle1_100000.6,DLC_mobnet_100_k934Jan28shuffle1_100000.7,DLC_mobnet_100_k934Jan28shuffle1_100000.8,DLC_mobnet_100_k934Jan28shuffle1_100000.9,DLC_mobnet_100_k934Jan28shuffle1_100000.10,DLC_mobnet_100_k934Jan28shuffle1_100000.11
1,x,y,likelihood,x,y,likelihood,x,y,likelihood,x,y,likelihood
2,183.74688720703125,267.8570251464844,0.9984562397003174,165.04798889160156,286.6018371582031,0.9676482081413269,185.1376190185547,288.53961181640625,0.9937425255775452,169.37730407714844,390.68994140625,0.9994144439697266
3,188.9870147705078,250.45738220214844,0.9965400695800781,171.54673767089844,267.5286560058594,0.9560951590538025,187.74215698242188,274.10040283203125,0.9912405610084534,167.10882568359375,370.994873046875,0.9999550580978394
4,195.9990692138672,231.02508544921875,0.9989144802093506,176.02798461914062,248.64425659179688,0.9965523481369019,194.4108123779297,257.8283996582031,0.9991292357444763,166.38233947753906,353.5351867675781,0.9999170303344727
5,199.0595245361328,212.73370361328125,0.9982749223709106,180.4905242919922,231.13796997070312,0.9960758090019226,198.74842834472656,237.82188415527344,0.994279146194458,164.18785095214844,336.8181457519531,0.9999291896820068
...,...,...,...,...,...,...,...,...,...,...,...,...
18014,573.058837890625,17.906360626220703,0.9958451390266418,577.7439575195312,51.47393798828125,0.9994076490402222,597.6116333007812,34.115211486816406,0.9991927742958069,679.1771850585938,79.63839721679688,0.9998797178268433
18015,573.028076171875,17.82950210571289,0.9957618117332458,577.7886962890625,51.371612548828125,0.9993901252746582,597.6239624023438,34.0703125,0.9991920590400696,679.2212524414062,79.62274169921875,0.9998778104782104
18016,575.8427734375,13.843232154846191,0.9908120036125183,578.408447265625,50.09979248046875,0.9991810917854309,599.7352294921875,34.279449462890625,0.9990198612213135,677.3461303710938,82.67105865478516,0.9999796152114868
18017,572.4026489257812,13.478456497192383,0.9910678267478943,576.093505859375,50.230247497558594,0.9992073178291321,597.7764282226562,33.692195892333984,0.9994667172431946,673.5689697265625,82.5133056640625,0.999964714050293


In [58]:
# group by column 0 -- create a set of groups for each bodypart, object, etc.
# each group has attributes x, y, and likelihood which represent position (x,y) and likelihood over a timeseries (?)
grouped = pilot_T.groupby(pilot_T[1])
group_names = [group for group, data in grouped]
group_names # 4 elem arr containing group identifier ('coords') and subsequent coords: likelihood, x, y

['coords', 'likelihood', 'x', 'y']

In [65]:
# for simplicity and to understand the data, created an array of pandas df objects
data_arr = [data for group, data in grouped]
labels = data_arr[0]
#df = pd.DataFrame(data_arr[1:], columns = labels)
data_arr_T = [elem.T for elem in data_arr]

# keys: ['coords', 'likelihood', 'x', 'y']
sets = {group_names[i]: data_arr_T[i] for i in range(1, len(group_names))}

Unnamed: 0,DLC_mobnet_100_k934Jan28shuffle1_100000,DLC_mobnet_100_k934Jan28shuffle1_100000.3,DLC_mobnet_100_k934Jan28shuffle1_100000.6,DLC_mobnet_100_k934Jan28shuffle1_100000.9
0,bodypart1,bodypart2,bodypart3,objectA
1,x,x,x,x
2,183.74688720703125,165.04798889160156,185.1376190185547,169.37730407714844
3,188.9870147705078,171.54673767089844,187.74215698242188,167.10882568359375
4,195.9990692138672,176.02798461914062,194.4108123779297,166.38233947753906
...,...,...,...,...
18014,573.058837890625,577.7439575195312,597.6116333007812,679.1771850585938
18015,573.028076171875,577.7886962890625,597.6239624023438,679.2212524414062
18016,575.8427734375,578.408447265625,599.7352294921875,677.3461303710938
18017,572.4026489257812,576.093505859375,597.7764282226562,673.5689697265625


In [7]:
# above: processed data and converted into dict of df objects
# below: need to create 2D representation of space with rgb values, increment pixel rgb values if position in location
# before: try scatter plot of all data

# TODO: clean up data presentation of sets
# ideally: 

In [23]:
class Heatmap:
    def __init__(self, sets):
        self.x_min, self.x_max = get_x_dim()
        self.y_min, self.y_max = get_y_dim()
        self.x_dim = int(self.x_max)
        self.y_dim = int(self.y_max)
        self.sets = sets
        
        self.pixel_width = 0.5
        
        self.bins = float[y_dim/pixel_width][x_dim/pixel_width][4] # dim 3: 4 elems [likelihood, r, g, b]
        self.load_data(sets)
        
    def get_x_dim:
        data = self.sets[group_names[-2]]
        min_x, max_x = None, None
        # find max and min x value to find range
        for label, content, in data.items():
            this_max = content.max()
            
            if !max_x:
                max_x = this_max
            else if this_max > max_x:
                max_x = this_max
                
            this_min = content.min()
            
            if !min_x:
                min_x = this_min
            if this_min > min_x:
                min_x = this_min
        return min_x, max_x
        
    def get_y_dim:
        data = self.sets[group_names[-1]]
        min_y, max_y = None, None
        # find max and min y value to find range
        for label, content, in data.items():
            this_max = content.max()
            
            if !max_y:
                max_y = this_max
            else if this_max > max_y:
                max_x = this_max
                
            this_min = content.min()
            
            if !min_y:
                min_y = this_min
            if this_min > min_y:
                min_y = this_min
        return min_y, max_y
    
    def load_data(self):
        for grp in sets.keys:
            for elem in sets[grp][2:] {
                i,j = self.find_bin(elem)
                self.increment_color(i,j,elem)
            }
        
        for r_num in len(self.bins):
            for c_num in len(self.bins[0]):
                self.bins[r_num][c_num]
    
    def find_bin(self, elem):
    
    def increment_color(self, i, j, llhd):
        for 
        self.bins[i][j]
        
    def plot:
        
        

SyntaxError: invalid syntax (3607832338.py, line 2)