In [35]:
import pandas as pd
import numpy as np

from bokeh.plotting import figure, output_notebook, show

In [36]:
output_notebook()

We're going to track two trends:
1. The above/below trend
2. Which numbers are getting selected above what you would expect by random

## Above/Below trend:

Getting whether it was above or below:

In [75]:
def above_below_40(input_list):
    over_40 = [item for item in input_list if item > 40]
    if len(over_40) >= 12:
        return 'over'
    elif len(over_40) <= 8:
        return 'under'
    else:
        return 'neither'

In [76]:
test_file = pd.read_excel('./data/omaha/2020_10_17.xlsx', index_col=0)

In [77]:
tracking_progress = []
for idx, each_row in test_file.iterrows():
    tracking_progress.append(above_below_40(list(each_row)))

Calculating trends:

In [85]:
def update_dict(input_dict, input_value):
    all_options = list(input_dict.keys())
    for each_value in input_value:
        input_dict[each_value].append(1)
    for each_value in all_options:
        if each_value in input_value:
            continue
        input_dict[each_value].append(0)
    return input_dict

Converting to ones and zeroes:

In [86]:
over_under_dict = {'over':[],
                   'under':[],
                   'neither':[]}
for each_val in tracking_progress:
    over_under_dict = update_dict(over_under_dict, [each_val])

Converting to probabilities - starting with overall, but will add rolling soon, hopefully:

In [87]:
def calc_prob_dict(input_list):
    cum_list = np.cumsum(input_list)
    prob_list = []
    for idx, each_item in enumerate(cum_list):
        prob_list.append(float(each_item)/(idx + 1))
    return prob_list

In [88]:
prob_dict = {'over' : calc_prob_dict(over_under_dict['over']),
             'under' : calc_prob_dict(over_under_dict['under']),
             'neither' : calc_prob_dict(over_under_dict['neither'])}

Plotting:

In [89]:
drunken_walk_fig = figure(toolbar_location=None)

drunken_walk_fig.line(x=list(range(len(prob_dict['over']))), y=prob_dict['over'], 
                      color='red', legend_label='Over', line_width=4)
drunken_walk_fig.line(x=list(range(len(prob_dict['under']))), y=prob_dict['under'], 
                      color='blue', legend_label='Under', line_width=4)
drunken_walk_fig.line(x=list(range(len(prob_dict['neither']))), y=prob_dict['neither'], 
                      color='black', legend_label='Neither', line_width=4)
show(drunken_walk_fig)

## Overrepresented Numbers:

In [94]:
# Making the big number dictionary:
number_dict = {}
for each_number in list(range(1, 81)):
    number_dict[each_number] = []

In [95]:
for idx, each_row in test_file.iterrows():
    number_dict = update_dict(number_dict, list(each_row))

In [96]:
number_prob_dict = {}
for each_number in number_dict.keys():
    number_prob_dict[each_number] = calc_prob_dict(number_dict[each_number])

In [97]:
drunken_walk_fig = figure(toolbar_location=None)

for each_value in number_prob_dict.keys():
    drunken_walk_fig.line(x=list(range(len(number_prob_dict[each_value]))), y=number_prob_dict[each_value], 
                          color='black', line_width=2)

show(drunken_walk_fig)

Listing the top ten most overrepresented numbers:

In [109]:
# Getting the mean of the last five values:
final_number_prob_dict = {}
for each_value in number_prob_dict.keys():
    final_number_prob_dict[each_value] = np.mean(number_prob_dict[each_value][-5:])

In [110]:
final_number_prob_dict_sorted = {k: v for k, v in sorted(final_number_prob_dict.items(), 
                                                         key=lambda item: item[1], reverse=True)}

In [115]:
for idx, each_val in enumerate(final_number_prob_dict_sorted.keys()):
    if idx == 10:
        break
    print(str(each_val) + '\t' + str(final_number_prob_dict_sorted[each_val]))

54	0.3998022541965489
20	0.38675466451621177
33	0.37815151928031904
13	0.36451045749972233
23	0.36016263141276583
4	0.3330696331470808
5	0.3290119424566426
28	0.3290119424566426
58	0.31981532512049043
72	0.311418898230851
