In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix

from features.data_provider import get_feature_columns, get_whole_dataset, set_feature_columns
from simulation.predictor import MaxProbabilityOutcomePredictor
from models.outcome_model import get_model
from simulation.analyse import get_win_probabilities, get_simulations
from simulation.simulation import run_actual_tournament_simulation
from db.simulation_table import get_simulation_results, delete_all
from notebook_helpers import plot_bank_and_bets, run_unit_strategy, run_kelly_strategy

In [2]:
all_features = get_feature_columns()
player_features = ['rating_diff', 'potential_diff', 'height_diff','weight_diff','age_diff',
                   'weak_foot_diff','internationl_repuatiotion_diff','crossing_diff','finishing_diff',
                   'heading_accuracy_diff','short_passing_diff','dribbling_diff','fk_accuracy_diff',
                   'long_passing_diff','ball_control_diff','acceleration_diff','sprint_speed_diff',
                   'reactions_diff','shot_power_diff','stamina_diff','strength_diff','long_shots_diff',
                   'aggression_diff','penalties_diff','marking_diff','standing_tackle_diff',
                  'gk_diving_diff', 'gk_handling_diff', 'gk_kicking_diff', 'gk_reflexes_diff']

other_features = ['elo_diff', 'away_goal_mean', 'away_goals_with_home', 
                  'goal_diff_with_away', 'home_goal_mean', 'home_goals_with_away']

assert (len(player_features) + len(other_features)) == len(all_features)

In [3]:
def simulate_betting_strategies(features, match_template_file, bet_file, filter_start=None, filter_end=None):
    unit_banks = []
    kelly_banks = []
    accuracies = []

    set_feature_columns(features)
    
    X, y = get_whole_dataset("home_win", filter_start=filter_start, filter_end=filter_end)

    print(X.shape)
    for i in range(10):
        model = get_model(X=X, y=y, n_estimators=500)
        predictor = MaxProbabilityOutcomePredictor(model)
        match_template = pd.read_csv(match_template_file)
        run_actual_tournament_simulation(match_template, predictor)
        tournament_simulation = get_simulation_results()
        tournament_simulation["true_outcome"] = np.sign(tournament_simulation["home_score"] - tournament_simulation["away_score"])
        delete_all()

        accuracy = sum(tournament_simulation["outcome"] == tournament_simulation["true_outcome"]) / tournament_simulation.shape[0]
        accuracies.append(accuracy)

        match_bets = pd.read_csv(bet_file)

        y_pred = tournament_simulation["outcome"].values
        y_true = tournament_simulation["true_outcome"].values
        odds = match_bets[["1", "X", "2"]].values
        unit_bank = run_unit_strategy(y_pred, y_true, odds)

        probabilities = tournament_simulation[["home_win_prob", "draw_prob", "away_win_prob"]].values
        kelly_bank = run_kelly_strategy(y_true, odds, probabilities)

        unit_banks.append(unit_bank)
        kelly_banks.append(kelly_bank)

    return accuracies, unit_banks, kelly_banks

In [4]:
#### WC 2018
match_template = 'data/original/wc_2018_games_real.csv'
betting_file = 'data/original/wc_2018_bets.csv'

In [5]:
acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file)
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(8124, 36)
AVG Accuracy:  0.5703125
AVG Unit bank:  67.795
AVG Kelly bank:  81.5687295396


In [6]:
acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file)
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(8124, 6)
AVG Accuracy:  0.515625
AVG Unit bank:  60.19
AVG Kelly bank:  37.9295302604


In [7]:
acc, unit, kelly = simulate_betting_strategies(player_features, match_template, betting_file)
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(8124, 30)
AVG Accuracy:  0.6078125
AVG Unit bank:  76.411
AVG Kelly bank:  99.6474080705


In [8]:
#### WC 2014
match_template = 'data/original/wc_2014_games_real.csv'
betting_file = 'data/original/wc_2014_bets.csv'

In [9]:
acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file,
                                              filter_start="2014-06-12")
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(5153, 36)
AVG Accuracy:  0.6203125
AVG Unit bank:  76.918
AVG Kelly bank:  71.7821926059


In [10]:
acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file,
                                               filter_start="2014-06-12")
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(5153, 6)


  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))


AVG Accuracy:  0.578125
AVG Unit bank:  70.11
AVG Kelly bank:  99.2778135356


In [11]:
acc, unit, kelly = simulate_betting_strategies(player_features, match_template, betting_file,
                                               filter_start="2014-06-12")
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(5153, 30)


  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))


AVG Accuracy:  0.5859375
AVG Unit bank:  70.088
AVG Kelly bank:  53.6713158025


  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))


In [12]:
#### WC 2010
match_template = 'data/original/wc_2010_games_real.csv'
betting_file = 'data/original/wc_2010_bets.csv'

In [13]:
acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file,
                                              filter_start="2010-06-12")
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(2119, 36)
AVG Accuracy:  0.528125
AVG Unit bank:  60.828
AVG Kelly bank:  62.7681966669


In [14]:
acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file,
                                               filter_start="2010-06-12")
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(2119, 6)


  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))


AVG Accuracy:  0.5578125
AVG Unit bank:  66.519
AVG Kelly bank:  101.148833191


In [15]:
acc, unit, kelly = simulate_betting_strategies(player_features, match_template, betting_file,
                                               filter_start="2010-06-12")
print("AVG Accuracy: ", np.mean(acc))
print("AVG Unit bank: ", np.mean(unit))
print("AVG Kelly bank: ", np.mean(kelly))

(2119, 30)
AVG Accuracy:  0.540625
AVG Unit bank:  64.174
AVG Kelly bank:  54.374098658


In [16]:
from sklearn.model_selection import train_test_split

for features_group in [all_features, other_features, player_features]:
    set_feature_columns(features_group)
    X, y = get_whole_dataset("home_win")

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
    model = get_model(X=X_train, y=y_train, n_estimators=500)

    score = model.score(X_test, y_test)
    print("Score: ", score)

Score:  0.550153846154
Score:  0.544923076923
Score:  0.532
