# 2 - Algorithm Design

In [34]:
import pandas as pd
import numpy as np

from tqdm import tqdm_notebook,tnrange,tqdm_pandas,tqdm
tqdm.pandas(tqdm())

import os
import cPickle as pickle

import datetime as dt

import ipywidgets as widgets


import jupyternotify
ip = get_ipython()
ip.register_magics(jupyternotify.JupyterNotifyMagics)
#notify if cell is finished for cells taking longer than 30 seconds
%autonotify -a 30


0it [00:00, ?it/s][A


<IPython.core.display.Javascript object>

### Path to your urinal-data-28-nov_clean _pickle_ file

In [2]:
# pickle_name = "urinal-data-28-nov_clean.p"
# pickle_path = "F:\\Research\\ben\\grideye_urinal"
# pickle_full = os.path.join(pickle_path, pickle_name)

# print("looking in", pickle_full)
pickle_full = "urinal-data-28-nov_clean.p"

### Load Data

In [122]:
#---------- read in pickle----------   
if os.path.isfile(pickle_full):
    print("loading pickle")
    df = pd.read_pickle(pickle_full)
   
else:
    print("Did you run 1- Raw Data Visualisation?")

loading pickle


we want indexes not, timestamps, as our time data is only recorded to the nearest second (yet we have a sampling rate of 10 frames/second)

In [4]:
df.reset_index(inplace=True)

In [5]:
df.sample(2)

Unnamed: 0,time,P0,P1,P2,P3,P4,P5,P6,P7,P8,...,P54,P55,P56,P57,P58,P59,P60,P61,P62,P63
2345562,2017-11-26 18:46:21,22.75,23.75,22.75,22.75,23.0,25.0,24.0,23.75,23.5,...,22.5,23.25,23.0,23.75,23.5,22.75,23.5,24.25,22.25,23.0
578919,2017-11-24 11:13:12,21.25,22.75,22.0,21.75,22.75,23.0,23.0,24.0,22.0,...,21.5,22.75,22.75,23.0,23.0,22.0,22.75,23.5,23.75,22.5


## Adaptive Threshold
In _1- Raw Data Visualisation_ we saw how noisy our data is. 
With some additional testing, it's pretty clear simply looking for pixels above a set temperature is not sufficient, the ambient air temperature just varies too much!<sup>[2](https://www.sciencedirect.com/science/article/pii/S0360132396000340)</sup>

<img src="layout_sections.png" alt="grideye camera view" width="350" height="350" align="left">So: 
- We want to find a warm body in the of mess pixels. 
- We have a pretty good idea of where the warm body will appear. 
- We have an area where no person will be (the empty space)




These researchers achieved 97% accuracy finding a person with the grideye
If we adapt their algorithm for stationary people only, 
we create an adaptive threshold algorithm for finding people which goes something like:

1. get background temperature (Empty Space) ($T_b$)
2. Calculate mean of highest three temperatures for each section  ($T_m$)
3. Compare $T_b$ to $T_m$ ($T_m - T_b > threshold$) and greater than an absolute temperature ($T_abs$) 
4. Determine a "__stay__" by thresholding the time

### Grab the Urinal Section

We first create a "mask" which contains a list of column names corresponding to the Urinal section  
In this case we ignore the top 2 rows, as no one in the office is above 7 foot!

In [6]:
height = 6
n = 8
width_left = 2
width_middle = 3
width_right = 3
bottom_trim = 0

left_mask   = []
middle_mask = []
right_mask  = []
for y in range(n-height,n-bottom_trim):
    # LEFT
    for x in range(width_left):
        left_mask.append(x+y*n) 
    # MIDDLE
    for x in range(width_middle):
         middle_mask.append(x+y*n+width_left) 
    # RIGHT
    for x in range(width_right):
         right_mask.append(x+y*n+width_left+width_middle) 

left_mask = ["P"+str(x) for x in left_mask]  
middle_mask = ["P"+str(x) for x in middle_mask]  
right_mask = ["P"+str(x) for x in right_mask]  


### Grab the background temperature
Assuming no person (or hot object) will loiter near the roof, 
we take the background temperature values as the mean of top 2 rows (per section)

In [7]:
height = 6 # top 2 rows
n = 8
width_left = 2
width_middle = 3
width_right = 3

left_top   = []
middle_top = []
right_top  = []
for y in range(0,n-height):
    # LEFT
    for x in range(width_left):
        left_top.append(x+y*n) 
    # MIDDLE
    for x in range(width_middle):
         middle_top.append(x+y*n+width_left) 
    # RIGHT
    for x in range(width_right):
         right_top.append(x+y*n+width_left+width_middle) 

left_top = ["P"+str(x) for x in left_top]  
middle_top = ["P"+str(x) for x in middle_top]  
right_top = ["P"+str(x) for x in right_top]  

df[middle_top].sample(10)

Unnamed: 0,P2,P3,P4,P10,P11,P12
337921,22.75,22.5,21.75,22.0,22.75,22.25
2608596,23.25,23.25,23.5,24.0,23.25,23.0
1017184,22.25,21.75,22.75,23.5,23.5,22.5
2574593,22.0,23.0,23.25,23.5,24.0,23.25
79388,23.5,22.75,22.5,23.5,23.0,23.25
641966,23.0,22.25,23.0,22.5,23.0,22.5
1609958,23.0,23.75,23.0,22.5,23.75,23.0
2782585,22.0,22.25,22.5,21.75,23.25,22.5
54653,23.5,22.75,24.0,23.25,24.0,22.25
1793914,22.0,23.0,22.5,23.0,23.25,22.25


## Calculate the Background temperature

In [8]:
window = 600 # 1 minute window for background temp

left_thresh = []
middle_thresh = []
right_thresh = []

for t in tnrange(window/2,len(df),window):  
    left_thresh.append(df.loc[t-window/2:t+window/2,:][left_top].mean().mean())
    middle_thresh.append(df.loc[t-window/2:t+window/2,:][middle_top].mean().mean())
    right_thresh.append(df.loc[t-window/2:t+window/2,:][right_top].mean().mean())

left_thresh = [x for x in left_thresh for _ in range(window)]    
middle_thresh = [x for x in middle_thresh for _ in range(window)]    
right_thresh = [x for x in right_thresh for _ in range(window)]  

if len(left_thresh) > len(df):
    left_thresh = left_thresh[0:-(len(left_thresh) - len(df))]
    middle_thresh = middle_thresh[0:-(len(middle_thresh) - len(df))]
    right_thresh = right_thresh[0:-(len(right_thresh) - len(df))]
elif len(left_thresh) < len(df):    
    left_thresh.extend([left_thresh[-1]]*(len(df) - len(left_thresh)))
    middle_thresh.extend([middle_thresh[-1]]*(len(df) - len(middle_thresh)))
    right_thresh.extend([right_thresh[-1]]*(len(df) - len(right_thresh)))

if not (len(left_thresh) == len(middle_thresh) == len(right_thresh) == len(df)):
    raise AssertionError()




<IPython.core.display.Javascript object>

In [9]:
# pickle_name = "urinal-data-28-nov_clean.p"
# pickle_path = "F:\\Research\\ben\\grideye_urinal"
# pickle_full = os.path.join(pickle_path, pickle_name)

# print("looking in", pickle_full)
pickle_full = 'compare_temps.p'


In [13]:

if os.path.isfile(pickle_full):
    print("loading pickle")
    compare_temps = pd.read_pickle(pickle_full)
    left = compare_temps[compare_temps["position"] == 'left']
    middle = compare_temps[compare_temps["position"] == 'middle']
    right = compare_temps[compare_temps["position"] == 'right']
else:
    print("creating pickle...")
    print("this will take some time...")
    
        
    # 2. 
    print("2")
    left  = df[left_mask].copy()
    middle = df[middle_mask].copy()
    right = df[right_mask].copy()
     
    # Get the highest three numbers from a list of numbers
    # and calculate their mean
    # O(n)
    def top_three_mean(numbers):
        count = 0
        m1 = m2 = m3 = float('-inf')
        for number in numbers:
            count += 1
            if number >= m3:
                if number >= m2:
                    if number >= m1:
                        m1, m2, m3 = number, m1, m2   
                    else:
                        m3 = m2
                        m2 = number
                else:
                    m3 = number

        tri_mean = np.mean((m1,m2,m3))          
        return tri_mean if count >= 2 else None


    left["mean3"] = left.progress_apply(top_three_mean,axis = 1)
    middle["mean3"] = middle.progress_apply(top_three_mean,axis = 1)
    right["mean3"] = right.progress_apply(top_three_mean,axis = 1)
    
    left["background_temp"] = left_thresh
    middle["background_temp"] = middle_thresh
    right["background_temp"] = right_thresh
    
    left["position"] = 'left'
    middle["position"] = 'middle'
    right["position"] = 'right'
    
    compare_temps = pd.concat([left.iloc[:,-3:],middle.iloc[:,-3:],right.iloc[:,-3:]],axis = 0)
    
    compare_temps.to_pickle(pickle_full)
    
    left = compare_temps[compare_temps["position"] == 'left']
    middle = compare_temps[compare_temps["position"] == 'middle']
    right = compare_temps[compare_temps["position"] == 'right']
    

loading pickle


### Compare background temperature to highest three temepratures

In [15]:

Tabs = 24.5
Tth = 1.5
df_temp_thresh = pd.DataFrame({"time":df["time"]})
df_temp_thresh["left"] = ((left["mean3"] - left["background_temp"] > Tth) & (left["mean3"] > Tabs)).astype(int)
df_temp_thresh["middle"] = ((middle["mean3"] - middle["background_temp"] > Tth) & (middle["mean3"] > Tabs)).astype(int)
df_temp_thresh["right"] = ((right["mean3"] - right["background_temp"] > Tth) & (right["mean3"] > Tabs)).astype(int)
df_temp_thresh.set_index("time",inplace=True)
df_temp_thresh.sample(5)

Unnamed: 0_level_0,left,middle,right
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-11-23 21:36:02,0,0,0
2017-11-24 00:42:11,0,0,0
2017-11-27 16:27:41,0,0,0
2017-11-23 19:47:35,0,0,0
2017-11-28 02:30:42,0,0,0


In [16]:
print("Differentiating df_tempMeans_thresh" )
df_temp_diff = df_temp_thresh.diff(axis=0)[1:]
print("complete")
df_temp_diff.reset_index(inplace=True)

assert df_temp_diff["left"].sum() == 0
assert df_temp_diff["middle"].sum() == 0
assert df_temp_diff["right"].sum() == 0
# df_temp_diff.iloc[90:120,1:4,]
# len(df_temp_diff.iloc[:,1][df_temp_diff.iloc[:,1] != 0])


Differentiating df_tempMeans_thresh
complete


In [95]:
df_temp_diff_redL =  pd.concat([df_temp_diff.iloc[:,1][df_temp_diff.iloc[:,1] != 0],df_temp_diff["time"][df_temp_diff.iloc[:,1] != 0]],axis = 1)
df_temp_diff_redM = pd.concat([df_temp_diff.iloc[:,2][df_temp_diff.iloc[:,2] != 0],df_temp_diff["time"][df_temp_diff.iloc[:,2] != 0]],axis = 1)
df_temp_diff_redR = pd.concat([df_temp_diff.iloc[:,3][df_temp_diff.iloc[:,3] != 0],df_temp_diff["time"][df_temp_diff.iloc[:,3] != 0]],axis = 1)

df_temp_diff_redL.columns = ["in_out","time"]
df_temp_diff_redM.columns = ["in_out","time"]
df_temp_diff_redR.columns = ["in_out","time"]

df_temp_diff_redR.head(10)

Unnamed: 0,in_out,time
130,1.0,2017-11-23 16:57:14
131,-1.0,2017-11-23 16:57:14
188,1.0,2017-11-23 16:57:21
189,-1.0,2017-11-23 16:57:21
225,1.0,2017-11-23 16:57:25
226,-1.0,2017-11-23 16:57:25
234,1.0,2017-11-23 16:57:26
235,-1.0,2017-11-23 16:57:26
236,1.0,2017-11-23 16:57:26
237,-1.0,2017-11-23 16:57:26


In [18]:
#check in_out always alternates-->it always does
prev = -1
test = df_temp_diff_redL
for i in tnrange(len(test)):
    
    curr = test["in_out"].iloc[i]
    if curr == prev:
        print(test["time"].iloc[i])
        raise AssertionError()
    prev = curr




In [96]:
def in_out(b):
    try:
        if int(b) == 1:
            return "in"
        elif int(b) == -1:
            return "out"
        else:
            return b
    except:
        return b
    
df_temp_diff_redL["in_out"]    = df_temp_diff_redL["in_out"].apply(in_out)
df_temp_diff_redL["Position"] = ["left" for x in range(len(df_temp_diff_redL))]

df_temp_diff_redM["in_out"]    = df_temp_diff_redM["in_out"].apply(in_out)
df_temp_diff_redM["Position"] = ["middle" for x in range(len(df_temp_diff_redM))]

df_temp_diff_redR["in_out"]    = df_temp_diff_redR["in_out"].apply(in_out)
df_temp_diff_redR["Position"] = ["right" for x in range(len(df_temp_diff_redR))]

In [97]:

rsv = df_temp_diff_redL
rsv = rsv.append(df_temp_diff_redM)
rsv = rsv.append(df_temp_diff_redR)


rsv.sort_index(inplace=True)

rsv.head()

Unnamed: 0,in_out,time,Position
61,in,2017-11-23 16:57:06,middle
62,out,2017-11-23 16:57:06,middle
82,in,2017-11-23 16:57:09,middle
83,out,2017-11-23 16:57:09,middle
87,in,2017-11-23 16:57:09,middle


Collect in and out times as indexes

In [98]:
# in and out are times, and inx and outx are the indexes
in_out_dict = {"left":{"in":[],"out":[],"inx":[],"outx":[]},"middle":{"in":[],"out":[],"inx":[],"outx":[]},
               "right":{"in":[],"out":[],"inx":[],"outx":[]}}

for t in tnrange(len(rsv)):
    in_out_dict[rsv["Position"].iloc[t]][rsv["in_out"].iloc[t]].append(rsv["time"].iloc[t])
    in_out_dict[rsv["Position"].iloc[t]][rsv["in_out"].iloc[t]+'x'].append(rsv.index[t])
    
    




We now have a dictionary of all activity in our 3 urinal areas, timestamped.  
However, we still need to differentiate a person walking past the urinals (to take care of other business),   
and potentially remove any instantaneous temperature fluctuations  
(but hopefully our background temperature comparison took care of that).  

We call a person visiting the urinal, to relieve some pressure, a __stay__.  
A stay is defined as a person visisting the urinal longer than 8 seconds <sup>1</sup>.

___
<sup>1</sup> <sub> This was experimentally determined, and later verified by [this Ig Noble prize winning paper](http://www.pnas.org/content/111/33/11932?tab=ds), and two [Grideye](http://www.scirp.org/jouRNAl/PaperInformation.aspx?PaperID=74726) [specific](http://ieeexplore.ieee.org/document/6798925/) papers </sub>


In [114]:
from datetime import timedelta

# removes all non stays and sort into order of occurence
def removeNonStays(timeFilt):
    positions = ["left","middle","right"]
    unsorted_stays = pd.DataFrame()
    for position in positions:
        curr_stays = pd.concat([pd.DataFrame(in_out_dict[position]["in"]),pd.DataFrame(in_out_dict[position]["out"]),
                           pd.DataFrame(in_out_dict[position]["inx"]),pd.DataFrame(in_out_dict[position]["outx"])],axis=1)       
        curr_stays.columns = ["in","out","in_index","out_index"]
        curr_stays = curr_stays[curr_stays["out"] - curr_stays["in"]> timedelta(seconds=timeFilt )]
        curr_stays["length"] = curr_stays["out"] - curr_stays["in"]
        curr_stays["Position"] = position
        unsorted_stays = unsorted_stays.append(curr_stays)
        
        sorted_stays = unsorted_stays.sort_index()
        
    return sorted_stays



In [115]:
timeFilter = 8
sorted_stays = removeNonStays(timeFilter)

removeNonStays(timeFilter).head()

Unnamed: 0,in,out,in_index,out_index,length,Position
23,2017-11-23 17:05:18,2017-11-23 17:05:52,2509,2814,00:00:34,left
28,2017-11-23 17:06:30,2017-11-23 17:07:12,3148,3520,00:00:42,left
37,2017-11-23 17:03:22,2017-11-23 17:03:48,1487,1719,00:00:26,right
44,2017-11-23 17:04:28,2017-11-23 17:05:15,2073,2486,00:00:47,right
69,2017-11-23 17:40:15,2017-11-23 17:40:52,21033,21364,00:00:37,left


In [117]:
pickle_full = "sorted_stays.p"

In [121]:
if os.path.isfile(pickle_full):
    print("You've already pickled!\n\nTo check out the data analysis go to \"3 - Data Analysis\" ")
   
else:
    sorted_stays.to_pickle(pickle_full)

You've already pickled!

To check out the data analysis go to "3 - Data Analysis" 


In [116]:
timeFilt = 8
stays = removeStays(timeFilt)
left_stays = stays[stays["Position"] == "left"]
middle_stays = stays[stays["Position"] == "middle"]
right_stays = stays[stays["Position"] == "right"]

print("Left: {}, Middle: {}, Right: {}".format(len(left_stays),len(middle_stays),len(right_stays)))

Left: 212, Middle: 64, Right: 176


Go to __3 - Data Analysis__