In [46]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

We're going to track two trends:
1. The above/below trend
2. Which numbers are getting selected above what you would expect by random

## Above/Below trend:

Getting whether it was above or below:

In [47]:
# Functions:
def above_below_40(input_list):
    over_40 = [item for item in input_list if item > 40]
    if len(over_40) >= 11:
        return 'over'
    elif len(over_40) <= 9:
        return 'under'
    else:
        return 'neither'

def left_right_track(input_list):
    left_list = [item for item in input_list if item in left]
    right_list = [item for item in input_list if item in right]
    if len(left_list) >= 11:
        return 'left'
    elif len(right_list) >= 11:
        return 'right'
    else:
        return 'neither'
    
def update_dict(input_dict, input_value):
    all_options = list(input_dict.keys())
    for each_value in input_value:
        input_dict[each_value].append(1)
    for each_value in all_options:
        if each_value in input_value:
            continue
        input_dict[each_value].append(0)
    return input_dict

def calc_prob_dict(input_list):
    cum_list = np.cumsum(input_list)
    prob_list = []
    for idx, each_item in enumerate(cum_list):
        prob_list.append(float(each_item)/(idx + 1))
    return prob_list

In [48]:
communities = ['Omaha', 'Lincoln', 'Norfolk', 'Fremont']

# Setting the seaborn values:
sns.set_theme(style="dark")
sns.set(font_scale = 1.2)

# Defining left and right:
left = list(range(1,6)) + list(range(11,16)) + list(range(21,26)) \
       + list(range(31,36)) + list(range(41,46)) + list(range(51,56)) \
       + list(range(61,66)) + list(range(71,76))
right = [item for item in list(range(1,81)) if item not in left]

for each_community in communities:
    
    #### OVER/UNDER REPRESENTATION:
    test_file = pd.read_excel('./data/input/%s.xlsx' % each_community.lower(), index_col=0)

    tracking_progress = []
    for idx, each_row in test_file.iterrows():
        tracking_progress.append(above_below_40(list(each_row)))

    # Calculating trends - onverting to ones and zeroes:
    over_under_dict = {'over':[],
                       'under':[],
                       'neither':[]}
    for each_val in tracking_progress:
        over_under_dict = update_dict(over_under_dict, [each_val])

    # Converting to probabilities:
    prob_dict = {'over' : calc_prob_dict(over_under_dict['over']),
                 'under' : calc_prob_dict(over_under_dict['under']),
                 'neither' : calc_prob_dict(over_under_dict['neither'])}

    # Plotting:
    ax_1 = sns.lineplot(x=list(range(len(prob_dict['over']))), y=prob_dict['over'], linewidth=3, 
                 label='Over', color='red');
    ax_1 = sns.lineplot(x=list(range(len(prob_dict['under']))), y=prob_dict['under'], linewidth=3, 
                 label='Under', color='blue');
    ax_1 = sns.lineplot(x=list(range(len(prob_dict['neither']))), y=prob_dict['neither'], linewidth=3, 
                 label='Neither', color='black');
    ax_1.set(xlabel="Number of Games", ylabel="Probability");
    ax_1.figure.savefig('./data/output/%s_top_vs_bottom.png' % each_community.lower())
    plt.close()
    
    #### LEFT/RIGHT REPRESENTATION:
    lr_tracking_progress = []
    for idx, each_row in test_file.iterrows():
        lr_tracking_progress.append(left_right_track(list(each_row)))
        
    # Calculating trends - onverting to ones and zeroes:
    over_under_dict_lr = {'left':[],
                          'right':[],
                          'neither':[]}
    for each_val in lr_tracking_progress:
        over_under_dict_lr = update_dict(over_under_dict_lr, [each_val])

    # Converting to probabilities:
    prob_dict_lr = {'left' : calc_prob_dict(over_under_dict_lr['left']),
                 'right' : calc_prob_dict(over_under_dict_lr['right']),
                 'neither' : calc_prob_dict(over_under_dict_lr['neither'])}
    # Plotting:
    ax_1point5 = sns.lineplot(x=list(range(len(prob_dict_lr['left']))), y=prob_dict_lr['left'], linewidth=3, 
                 label='Left', color='red');
    ax_1point5 = sns.lineplot(x=list(range(len(prob_dict_lr['right']))), y=prob_dict_lr['right'], linewidth=3, 
                 label='Right', color='blue');
    ax_1point5 = sns.lineplot(x=list(range(len(prob_dict_lr['neither']))), y=prob_dict_lr['neither'], linewidth=3, 
                 label='Neither', color='black');
    ax_1point5.set(xlabel="Number of Games", ylabel="Probability");
    ax_1point5.figure.savefig('./data/output/%s_left_vs_right.png' % each_community.lower())
    plt.close()
    
    #### MOST REPRESENTED NUMBERS:
    # Making the big number dictionary:
    number_dict = {}
    for each_number in list(range(1, 81)):
        number_dict[each_number] = []

    # Getting the use of each number
    for idx, each_row in test_file.iterrows():
        number_dict = update_dict(number_dict, list(each_row))

    # converting to probabilities:
    number_prob_dict = {}
    for each_number in number_dict.keys():
        number_prob_dict[each_number] = calc_prob_dict(number_dict[each_number])

    # Plotting numbers:
    for each_value in number_prob_dict.keys():
        ax_2 = sns.lineplot(x=list(range(len(number_prob_dict[each_value]))), y=number_prob_dict[each_value], 
                                  color='black', linewidth=2);
    ax_2.set(xlabel="Number of Games", ylabel="Probability");
    ax_2.figure.savefig('./data/output/%s_top_numbers.png' % each_community.lower())
    plt.close()
    
    # Getting the mean of the last five values:
    final_number_prob_dict = {}
    for each_value in number_prob_dict.keys():
        final_number_prob_dict[each_value] = np.mean(number_prob_dict[each_value][-5:])
    final_number_prob_dict_sorted = {k: v for k, v in sorted(final_number_prob_dict.items(), 
                                                             key=lambda item: item[1], reverse=True)}

    # Writing the top numbers to a file:
    top_number_file = open('./data/output/%s_top_numbers.txt' % each_community, 'w')
    for idx, each_val in enumerate(final_number_prob_dict_sorted.keys()):
        if idx == 0:
            top_number_file.write('Number' + '\t' + 'Use (Fraction)' + '\n')
        if idx == 20:
            break
        top_number_file.write(str(each_val) + '\t' + str(final_number_prob_dict_sorted[each_val]) + '\n')
    top_number_file.close()