In [107]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import os

We're going to track two trends:
1. The above/below trend
2. Which numbers are getting selected above what you would expect by random

## Above/Below trend:

Getting whether it was above or below:

In [47]:
# Functions:
def above_below_40(input_list):
    over_40 = [item for item in input_list if item > 40]
    if len(over_40) >= 11:
        return 'over'
    elif len(over_40) <= 9:
        return 'under'
    else:
        return 'neither'

def left_right_track(input_list):
    left_list = [item for item in input_list if item in left]
    right_list = [item for item in input_list if item in right]
    if len(left_list) >= 11:
        return 'left'
    elif len(right_list) >= 11:
        return 'right'
    else:
        return 'neither'
    
def update_dict(input_dict, input_value):
    all_options = list(input_dict.keys())
    for each_value in input_value:
        input_dict[each_value].append(1)
    for each_value in all_options:
        if each_value in input_value:
            continue
        input_dict[each_value].append(0)
    return input_dict

def calc_prob_dict(input_list):
    cum_list = np.cumsum(input_list)
    prob_list = []
    for idx, each_item in enumerate(cum_list):
        prob_list.append(float(each_item)/(idx + 1))
    return prob_list

def find_winner(input_dict, key_1, key_2):
    last_ten_key_1 = input_dict[key_1][-10:]
    last_ten_key_2 = input_dict[key_2][-10:]
    
    key_1_mean = np.mean(last_ten_key_1)
    key_2_mean = np.mean(last_ten_key_2)
        
    pvalue = stats.ttest_ind(last_ten_key_1, last_ten_key_2)[1]
    
    if key_1_mean > key_2_mean:
        if pvalue <= 0.05:
            return key_1, 'significant'
        else:
            return key_1, 'not significant'
    else:
        if pvalue <= 0.05:
            return key_2, 'significant'
        else:
            return key_2, 'not significant'
        
# Function that evaluates winnings and earnings:
def check_win_earn(input_list, win_val, earn_val):
    if len(input_list) >= 16:
        win_val += 1
        earn_val += 4
    elif len(input_list) >= 11:
        win_val += 1
        earn_val += 2
    return win_val, earn_val

In [125]:
communities = ['Omaha', 'Lincoln', 'Norfolk', 'Fremont']

# Setting the seaborn values:
sns.set_theme(style="dark")
sns.set(font_scale = 1.2)

# Defining left and right:
left = list(range(1,6)) + list(range(11,16)) + list(range(21,26)) \
       + list(range(31,36)) + list(range(41,46)) + list(range(51,56)) \
       + list(range(61,66)) + list(range(71,76))
right = [item for item in list(range(1,81)) if item not in left]

# For calculating 10-spot winnings:
winnings = {
    10 : 10000,
    9:3300,
    8:300,
    7:35,
    6:4,
    5:0,
    4:0,
    3:0,
    2:0,
    1:0,
    0:0
}

# Over/Under translater:
translator_dict = {'over':'Top', 'under':'Bottom'}

# Initiating the dictionary to update the template:
readme_template = {}

for each_community in communities:
    
    #### OVER/UNDER REPRESENTATION:
    test_file = pd.read_excel('./data/input/%s.xlsx' % each_community.lower(), index_col=0)

    tracking_progress = []
    for idx, each_row in test_file.iterrows():
        tracking_progress.append(above_below_40(list(each_row)))

    # Calculating trends - onverting to ones and zeroes:
    over_under_dict = {'over':[],
                       'under':[],
                       'neither':[]}
    for each_val in tracking_progress:
        over_under_dict = update_dict(over_under_dict, [each_val])

    # Converting to probabilities:
    prob_dict = {'over' : calc_prob_dict(over_under_dict['over']),
                 'under' : calc_prob_dict(over_under_dict['under']),
                 'neither' : calc_prob_dict(over_under_dict['neither'])}

    # Plotting:
    ax_1 = sns.lineplot(x=list(range(len(prob_dict['over']))), y=prob_dict['over'], linewidth=3, 
                 label='Top', color='red');
    ax_1 = sns.lineplot(x=list(range(len(prob_dict['under']))), y=prob_dict['under'], linewidth=3, 
                 label='Bottom', color='blue');
    ax_1 = sns.lineplot(x=list(range(len(prob_dict['neither']))), y=prob_dict['neither'], linewidth=3, 
                 label='Neither', color='black');
    ax_1.set(xlabel="Number of Games", ylabel="Probability");
    ax_1.figure.savefig('./data/output/%s_top_vs_bottom.png' % each_community.lower())
    plt.close()
    
    #### LEFT/RIGHT REPRESENTATION:
    lr_tracking_progress = []
    for idx, each_row in test_file.iterrows():
        lr_tracking_progress.append(left_right_track(list(each_row)))
        
    # Calculating trends - onverting to ones and zeroes:
    over_under_dict_lr = {'left':[],
                          'right':[],
                          'neither':[]}
    for each_val in lr_tracking_progress:
        over_under_dict_lr = update_dict(over_under_dict_lr, [each_val])

    # Converting to probabilities:
    prob_dict_lr = {'left' : calc_prob_dict(over_under_dict_lr['left']),
                 'right' : calc_prob_dict(over_under_dict_lr['right']),
                 'neither' : calc_prob_dict(over_under_dict_lr['neither'])}
    # Plotting:
    ax_1point5 = sns.lineplot(x=list(range(len(prob_dict_lr['left']))), y=prob_dict_lr['left'], linewidth=3, 
                 label='Left', color='red');
    ax_1point5 = sns.lineplot(x=list(range(len(prob_dict_lr['right']))), y=prob_dict_lr['right'], linewidth=3, 
                 label='Right', color='blue');
    ax_1point5 = sns.lineplot(x=list(range(len(prob_dict_lr['neither']))), y=prob_dict_lr['neither'], linewidth=3, 
                 label='Neither', color='black');
    ax_1point5.set(xlabel="Number of Games", ylabel="Probability");
    ax_1point5.figure.savefig('./data/output/%s_left_vs_right.png' % each_community.lower())
    plt.close()
    
    #### MOST REPRESENTED NUMBERS:
    # Making the big number dictionary:
    number_dict = {}
    for each_number in list(range(1, 81)):
        number_dict[each_number] = []

    # Getting the use of each number
    for idx, each_row in test_file.iterrows():
        number_dict = update_dict(number_dict, list(each_row))

    # converting to probabilities:
    number_prob_dict = {}
    for each_number in number_dict.keys():
        number_prob_dict[each_number] = calc_prob_dict(number_dict[each_number])

    # Plotting numbers:
    for each_value in number_prob_dict.keys():
        ax_2 = sns.lineplot(x=list(range(len(number_prob_dict[each_value]))), y=number_prob_dict[each_value], 
                                  color='black', linewidth=2);
    ax_2.set(xlabel="Number of Games", ylabel="Probability");
    ax_2.figure.savefig('./data/output/%s_top_numbers.png' % each_community.lower())
    plt.close()
    
    # Getting the mean of the last five values:
    final_number_prob_dict = {}
    for each_value in number_prob_dict.keys():
        final_number_prob_dict[each_value] = np.mean(number_prob_dict[each_value][-5:])
    final_number_prob_dict_sorted = {k: v for k, v in sorted(final_number_prob_dict.items(), 
                                                             key=lambda item: item[1], reverse=True)}

    # Writing the top numbers to a file:
    top_number_file = open('./data/output/%s_top_numbers.txt' % each_community, 'w')
    top_numbers_list = []
    for idx, each_val in enumerate(final_number_prob_dict_sorted.keys()):
        if idx == 0:
            top_number_file.write('Number' + '\t' + 'Use (Fraction)' + '\n')
        if idx == 20:
            break
        top_number_file.write(str(each_val) + '\t' + str(final_number_prob_dict_sorted[each_val]) + '\n')
        top_numbers_list.append(each_val)
    top_number_file.close()
    
    ##### DETERMINING WINNINGS #####
    winner_tb, degree_tb = find_winner(prob_dict, 'over', 'under')
    winner_lr, degree_lr = find_winner(prob_dict_lr, 'left', 'right')
    
    # Adding to template:
    readme_template['%s_HVL_WINNER' % each_community.lower()] = translator_dict[winner_tb]
    readme_template['%s_LVR_WINNER' % each_community.lower()] = winner_lr.capitalize()
    readme_template['%s_HVL_DEGREE' % each_community.lower()] = degree_tb.capitalize()
    readme_template['%s_LVR_DEGREE' % each_community.lower()] = degree_lr.capitalize()
    
    # Getting the last ten games:
    last_ten_games = test_file[-10:]

    # Setting up for numbers earnings:
    top_ten_numbers = top_numbers_list[:10]


    # Initializing earnings:
    topbot_earnings = 0
    lr_earnings = 0
    numbers_earnings = 0

    # Checking winnings over the last ten games:
    for idx, each_latest_game in last_ten_games.iterrows():
        temp_list = list(each_latest_game)
        # Checking the top/bottom breakout:
        if winner_tb == 'over':
            winning_vals = [item for item in temp_list if item > 40]
            _, topbot_earnings = check_win_earn(winning_vals, 0, topbot_earnings)
        elif winner_tb == 'under':
            winning_vals = [item for item in temp_list if item <= 40]
            _, topbot_earnings = check_win_earn(winning_vals, 0, topbot_earnings)
        # Checking the left/right breakout:
        if winner_lr == 'left':
            winning_vals = [item for item in temp_list if item in left]
            _, lr_earnings = check_win_earn(winning_vals, 0, lr_earnings)
        elif winner_lr == 'right':
            winning_vals = [item for item in temp_list if item in right]
            _, lr_earnings = check_win_earn(winning_vals, 0, lr_earnings)

        # Numbers earnings:
        numbers_correct = len([item for item in temp_list if item in top_ten_numbers])
        numbers_earnings = numbers_earnings + top_ten_numbers[numbers_correct]
        
    # Adding to template:
    readme_template['%s_HVL_WINNINGS' % each_community.lower()] = topbot_earnings
    readme_template['%s_LVR_WINNINGS' % each_community.lower()] = lr_earnings
    readme_template['%s_TOPSPOT_WINNINGS' % each_community.lower()] = numbers_earnings

In [126]:
# Updating readme:
input_readme = open('readmetemplate.md', 'r')
intermediate_readme = open('intermediate_readme.md', 'w')

things_to_fix = list(readme_template.keys())

for each_line in input_readme:
    if any(filler in each_line for filler in things_to_fix):
        current_string = each_line
        for each_item_to_update in readme_template.keys():
            if each_item_to_update in current_string:
                current_string = current_string.replace(each_item_to_update, str(readme_template[each_item_to_update]))
        intermediate_readme.write(current_string)
    else:
        intermediate_readme.write(each_line)

input_readme.close()
intermediate_readme.close()

os.system('cp intermediate_readme.md README.md')
os.system('rm intermediate_readme.md')

0