In [91]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [92]:
number_of_samples = 10
number_of_keys = 8

In [93]:
def get_consecutive_diff(input_arr):
    temp = []
    for x in range(0, len(input_arr)-1):
        temp.append(input_arr[x+1] - input_arr[x])
    return temp
    
def get_arr_diff(down_events, up_events):
    assert len(down_events) == len(up_events)
    temp = []
    for x in range(0, len(down_events)):
        temp.append(up_events[x]-down_events[x])
    return temp

def get_data(sample_number):   
    f = 'samples/sample_' + str(sample_number) + '.pickle'
    data = pickle.load(open(f,'rb'))
    
    up_arr = [x[1] for x in data['key_up_events']]
    down_arr = [x[1] for x in data['key_down_events']]
    
    assert len(up_arr) == len(down_arr)
    
    up_up_arr = get_consecutive_diff(up_arr)
    down_down_arr = get_consecutive_diff(down_arr)
    down_up_arr = get_arr_diff(down_arr, up_arr)
    
    return (up_up_arr, down_down_arr, down_up_arr)

def sample_dist(sample_val, mean, std_dev):
    return (sample_val - mean)/float(std_dev)

In [94]:
mean_list = []
std_dev_list = []

up_up_data = []
down_down_data = []
down_up_data = []

for i in range(1, number_of_samples+1):
    (up_up_arr, down_down_arr, down_up_arr) = get_data(i)
    up_up_data.append(up_up_arr)
    down_down_data.append(down_down_arr)
    down_up_data.append(down_up_arr)

In [95]:
up_up_mean_list = []
up_up_std_dev_list = []

df = pd.DataFrame(up_up_data, 
                  index=['sample_' + str(x) for x in range(1, number_of_samples+1)], 
                  columns=['key_' + str(x) for x in range(1, number_of_keys+1)])
for key in list(df.columns.values):
    up_up_mean_list.append(np.mean(df[key]))
    up_up_std_dev_list.append(np.std(df[key]))
    
df

Unnamed: 0,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8
sample_1,88,113,167,200,97,39,184,88
sample_2,88,144,160,200,96,64,224,80
sample_3,87,153,520,192,200,24,199,89
sample_4,72,153,175,185,136,39,160,105
sample_5,96,104,145,183,121,23,164,72
sample_6,108,168,440,208,112,32,160,96
sample_7,110,57,168,168,88,56,176,80
sample_8,87,105,152,208,93,51,192,360
sample_9,87,153,120,224,88,64,175,89
sample_10,87,137,136,192,95,57,151,97


In [96]:
down_down_mean_list = []
down_down_std_dev_list = []

df = pd.DataFrame(down_down_data, 
                  index=['sample_' + str(x) for x in range(1, number_of_samples+1)], 
                  columns=['key_' + str(x) for x in range(1, number_of_keys+1)])
for key in list(df.columns.values):
    down_down_mean_list.append(np.mean(df[key]))
    down_down_std_dev_list.append(np.std(df[key]))
df

Unnamed: 0,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8
sample_1,96,152,160,184,72,125,147,88
sample_2,103,161,144,200,68,96,203,85
sample_3,92,179,488,184,161,72,207,84
sample_4,80,168,160,192,112,80,184,72
sample_5,116,120,168,148,73,83,176,45
sample_6,60,148,504,184,101,67,176,80
sample_7,108,92,167,169,76,88,163,69
sample_8,111,105,144,184,108,81,170,361
sample_9,115,162,170,161,55,124,156,84
sample_10,116,148,144,176,85,96,146,73


In [97]:
down_up_mean_list = []
down_up_std_dev_list = []

df = pd.DataFrame(down_up_data, 
                  index=['sample_' + str(x) for x in range(1, number_of_samples+1)], 
                  columns=['key_' + str(x) for x in range(1, number_of_keys+1+1)]) # since down_up data has one extra 
for key in list(df.columns.values):
    down_up_mean_list.append(np.mean(df[key]))
    down_up_std_dev_list.append(np.std(df[key]))
df

Unnamed: 0,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8,key_9
sample_1,119,111,72,79,95,120,34,71,71
sample_2,79,64,47,63,63,91,59,80,75
sample_3,87,82,56,88,96,135,87,79,84
sample_4,111,103,88,103,96,120,79,55,88
sample_5,91,71,55,32,67,115,55,43,70
sample_6,67,115,135,71,95,106,71,55,71
sample_7,104,106,71,72,71,83,51,64,75
sample_8,95,71,71,79,103,88,58,80,79
sample_9,111,83,74,24,87,120,60,79,84
sample_10,112,83,72,64,80,90,51,56,80


## Validating a Sample

In [98]:
f = 'samples/sample_' + str(i) + '.pickle'
sample = pickle.load(open(f,'rb'))

(up_up_sample, down_down_sample, down_up_sample) = get_data(11)

#print down_up_sample

up_up_score = 0
down_down_score = 0
down_up_score = 0

for i in range(0, len(up_up_sample)):
    up_up_score = up_up_score + sample_dist(up_up_sample[i], up_up_mean_list[i], up_up_std_dev_list[i])
    
for i in range(0, len(down_down_sample)):
    down_down_score = down_down_score + sample_dist(down_down_sample[i], down_down_mean_list[i], down_down_std_dev_list[i])
    
for i in range(0, len(down_up_sample)):
    down_up_score = down_up_score + sample_dist(down_up_sample[i], down_up_mean_list[i], down_up_std_dev_list[i])
    
up_up_score = up_up_score/number_of_samples
down_down_score = down_down_score/number_of_samples
down_up_score = down_up_score/number_of_samples

print 'Up up score:', up_up_score
print 'Down down score:', down_down_score
print 'Down up score:', down_up_score

Up up score: -0.393342605722
Down down score: -0.249188682816
Down up score: 0.0022738812751
