In [1]:
import pandas as pd
import numpy as np
import pymongo as mongo
import matplotlib.pyplot as plt
import glob
import ipywidgets as widgets
import copy
%matplotlib widget

In [2]:
# Accept and clean data
file_path = r'/home/brainbox/Create/Code/Bankroll/data'
bets_test_files = glob.glob(file_path + "/f_to_test_waka_*.csv")

li = []
for filename in bets_test_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

preds_data = pd.concat(li, axis=0, ignore_index=True)
wins_data  = pd.read_csv("./data/allData.csv")
merged_data_1 = pd.merge(preds_data, wins_data, how="inner", left_on=["Fighter", "Opponent"], right_on=["R_fighter", "B_fighter"])
merged_data_dups = copy.deepcopy(merged_data_1)
merged_data_1.drop_duplicates(subset=['Fighter', 'Opponent', 'date'], keep='last', inplace=True)

doi = merged_data_1.drop(columns=['Unnamed: 0','MoneyLine', 'DecimalOdds', 'Opponent', 'date', 'location', 'country', 'title_bout', 'weight_class', 'gender', 'R_odds', 'B_odds', 'Fighter'])
doi['PercentOther'] = doi['PercentOdds'].map(lambda x: 100-x)
doi = doi[['R_fighter', 'PercentOdds', 'B_fighter', 'PercentOther', 'Winner']]
doi = doi.rename(columns={'PercentOdds':'R_prob', 'PercentOther': 'B_prob'})


In [3]:
# Separate data into Red and Blue
bin_by_picks         = doi
bin_by_picks['Pick'] = pd.cut(bin_by_picks['R_prob'], [0,50, 100], labels = ['Blue', 'Red'])
picks                = bin_by_picks.groupby(['Pick'])

# Separate out red picks
red_picks            = picks.get_group('Red')
red_picks            = red_picks.drop(columns=['Pick'])

# Separate out blue picks
blue_picks           = picks.get_group('Blue')
blue_picks           = blue_picks.drop(columns=['Pick'])

In [4]:
# Put data into buckets
red_bucket = red_picks
# red_bucket  = copy.deepcopy(red_picks)
# red_bucket2 = copy.deepcopy(red_picks)
# red_bucket['R_prob'] = red_bucket['R_prob'].subtract(5)
# red_bucket['R_prob'] = red_bucket['R_prob'].clip(0,100)
blue_bucket = blue_picks

split       = 5 # Change this value to create bins
bins        = list(range(0, 101, split))
bin_names   = []
for i in range(0, 100, split):
    bin_names.append(str(i)+' - '+str(i+split))

# Create buckets by pick (red or blue)
red_bucket ['prob_bin'] = pd.cut(red_bucket ['R_prob'], bins, labels=bin_names)
# red_bucket2['prob_bin'] = pd.cut(red_bucket2['R_prob'], bins, labels=bin_names)
blue_bucket['prob_bin'] = pd.cut(blue_bucket['B_prob'], bins, labels=bin_names)

In [5]:
# Group picks by probability
red_prob_bins  = red_bucket.groupby(red_bucket.prob_bin)
blue_prob_bins = blue_bucket.groupby(blue_bucket.prob_bin)

In [6]:
# Count red bin amounts
red_bin_counts    = red_picks['prob_bin'].value_counts(sort=False)
plt.figure()
plt.stem(bin_names, red_bin_counts)


# Count blue bin amounts
blue_bin_counts   = blue_picks['prob_bin'].value_counts(sort=False)
plt.figure()
plt.stem(bin_names, blue_bin_counts)
# plt.show(blue_count_fig)




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<StemContainer object of 3 artists>

In [7]:
# Slice win/loss by bin

# Red
red_bin_array = []
for bin_name in bin_names:
    red_bin_array.append(red_bucket[red_bucket.prob_bin == bin_name])

red_wins = []
for i in range(len(red_bin_array)):
    red_wins.append(red_bin_array[i]['Winner'].value_counts())

red_wins = pd.DataFrame(red_wins)
red_wins['bin_names'] = bin_names
red_wins.fillna(0)

red_wins['percent'] = red_wins['Red']/(red_wins['Red']+red_wins['Blue'])

plt.figure()
plt.stem(bin_names, red_wins['percent'])

# Blue
blue_bin_array = []
for bin_name in bin_names:
    blue_bin_array.append(blue_bucket[blue_bucket.prob_bin == bin_name])

blue_wins = []
for i in range(len(blue_bin_array)):
    blue_wins.append(blue_bin_array[i]['Winner'].value_counts())

blue_wins = pd.DataFrame(blue_wins)
blue_wins['bin_names'] = bin_names
blue_wins.fillna(0)

blue_wins['percent'] = blue_wins['Blue']/(blue_wins['Blue']+blue_wins['Red'])

plt.figure()
plt.stem(bin_names, blue_wins['percent'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<StemContainer object of 3 artists>

In [8]:
# Sort red fighter information by probability
red_fighters = red_picks
red_fighters = red_fighters.drop(columns=['prob_bin']).sort_values(by='R_prob')
red_fighters

# Create linear averaging kernel h_mu[n]
N_kernel     = 261
h_mu = np.ones(N_kernel)
h_mu = h_mu/N_kernel

# Create P_red[n] (sorted probabilities of fighters)
P_red = red_fighters['R_prob'].to_numpy()

# Convolve P_red[n]*h_mu[n]
mu = np.convolve(P_red, h_mu, 'valid') # Note: Actual desired output is P*h_mu[n+(N-1)/2]. This is because we want the average centered like it is on an imaging kernel

# Convert red_fighters to binary outcomes
red_fighters['Winner'] = np.where(red_fighters['Winner'] == 'Red', 1, 0)

# Convolve binary outcomes and find experimental probability
win_bin = red_fighters['Winner'].to_numpy()
h_sum = np.ones(N_kernel)
win_percents = (np.convolve(win_bin, h_sum, 'valid'))/N_kernel


# Generate hi-res linear sets
x = np.linspace(45,100, 2490)
y = np.linspace(.45, 1, 2490)
# Plot the outcomes
plt.figure()
plt.plot(mu, win_percents)
plt.plot(x,y)







Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fac7a798df0>]

In [9]:
# Scratch paper
buck1 = len(red_fighters[red_fighters['R_prob']<60])
buck2 = len( red_fighters[ (red_fighters['R_prob']>60) & (red_fighters['R_prob']<70) ] )
buck3 = len( red_fighters[ (red_fighters['R_prob']>70) & (red_fighters['R_prob']<80) ] )
buck4 = len( red_fighters[ (red_fighters['R_prob']>80) & (red_fighters['R_prob']<90) ] )
buck5 = len( red_fighters[ (red_fighters['R_prob']>90) & (red_fighters['R_prob']<97) ] )

print(len(win_percents))
a = np.linspace(50,100,2490)




2490
