In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix

from features.data_provider import get_feature_columns, get_whole_dataset, set_feature_columns
from simulation.predictor import MaxProbabilityScorePredictor, ScorePredictor
from models.score_model import get_model
from simulation.analyse import get_win_probabilities, get_simulations
from simulation.simulation import run_actual_tournament_simulation
from db.simulation_table import get_simulation_results, delete_all
from bet.unit_strategy import UnitStrategy
from bet.kelly_strategy import KellyStrategy
from notebook_helpers import plot_bank_and_bets, print_report

In [2]:
all_features = get_feature_columns()
player_features = ['rating_diff', 'potential_diff', 'height_diff','weight_diff','age_diff',
                   'weak_foot_diff','internationl_repuatiotion_diff','crossing_diff','finishing_diff',
                   'heading_accuracy_diff','short_passing_diff','dribbling_diff','fk_accuracy_diff',
                   'long_passing_diff','ball_control_diff','acceleration_diff','sprint_speed_diff',
                   'reactions_diff','shot_power_diff','stamina_diff','strength_diff','long_shots_diff',
                   'aggression_diff','penalties_diff','marking_diff','standing_tackle_diff',
                  'gk_diving_diff', 'gk_handling_diff', 'gk_kicking_diff', 'gk_reflexes_diff']

other_features = ['elo_diff', 'away_goal_mean', 'away_goals_with_home', 
                  'goal_diff_with_away', 'home_goal_mean', 'home_goals_with_away']

assert (len(player_features) + len(other_features)) == len(all_features)

In [3]:
def simulate_betting_strategies(features, match_template_file, 
                                bet_file, filter_start=None, filter_end=None, interval=None,
                                iter_n=10):
    unit_profit = []
    kelly_profit = []
    accuracies = []

    set_feature_columns(features)
    
    home = get_whole_dataset("home_score", filter_start=filter_start, filter_end=filter_end, interval=interval)
    away = get_whole_dataset("away_score", filter_start=filter_start, filter_end=filter_end, interval=interval)
    X = pd.concat([home[0], away[0]])
    y = pd.concat([home[1], away[1]])

    print(X.shape)
    for i in range(iter_n):
        model = get_model(X=X, y=y, n_estimators=2000)
        predictor = MaxProbabilityScorePredictor(model)
        match_template = pd.read_csv(match_template_file)
        run_actual_tournament_simulation(match_template, predictor)
        tournament_simulation = get_simulation_results()
        tournament_simulation["true_outcome"] = np.sign(tournament_simulation["home_score"] - tournament_simulation["away_score"])
        delete_all()

        accuracy = sum(tournament_simulation["outcome"] == tournament_simulation["true_outcome"]) / tournament_simulation.shape[0]
        accuracies.append(accuracy)

        match_bets = pd.read_csv(bet_file)
        
        odds = match_bets[["1", "X", "2"]].values
        probabilities = tournament_simulation[["home_win_prob", "draw_prob", "away_win_prob"]].values        
        y_pred = tournament_simulation["outcome"].values
        y_true = tournament_simulation["true_outcome"].values

        unit_strategy = UnitStrategy(y_pred, y_true)
        unit_strategy.run(odds)

        kelly_strategy = KellyStrategy(y_true)
        kelly_strategy.run(odds, probabilities)

        unit_profit.append(unit_strategy.get_total_profit())
        kelly_profit.append(kelly_strategy.get_total_profit())

    return accuracies, unit_banks, kelly_banks

In [4]:
#### WC 2018
match_template = 'data/original/wc_2018_games_real.csv'
betting_file = 'data/original/wc_2018_bets.csv'

In [5]:
acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file)
print_report(acc, unit, kelly)

(9478, 36)


  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b - a - c) + p3*np.log(1 + o3*c - a - b))
  return -(p1 * np.log(1 + o1*a - b - c) + p2 * np.log(1 + o2*b 

NameError: name 'unit_banks' is not defined

In [None]:
acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file)
print_report(acc, unit, kelly)

In [None]:
acc, unit, kelly = simulate_betting_strategies(player_features, match_template, betting_file)
print_report(acc, unit, kelly)

In [None]:
#### WC 2014
match_template = 'data/original/wc_2014_games_real.csv'
betting_file = 'data/original/wc_2014_bets.csv'

In [None]:
acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file,
                                              filter_start="2014-06-12")
print_report(acc, unit, kelly)

In [None]:
acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file,
                                               filter_start="2014-06-12")
print_report(acc, unit, kelly)

In [None]:
acc, unit, kelly = simulate_betting_strategies(player_features, match_template, betting_file,
                                               filter_start="2014-06-12")
print_report(acc, unit, kelly)

In [None]:
#### WC 2010
match_template = 'data/original/wc_2010_games_real.csv'
betting_file = 'data/original/wc_2010_bets.csv'

In [None]:
acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file,
                                              filter_start="2010-06-11")
print_report(acc, unit, kelly)

In [None]:
acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file,
                                               filter_start="2010-06-11")
print_report(acc, unit, kelly)

In [None]:
acc, unit, kelly = simulate_betting_strategies(player_features, match_template, betting_file,
                                               filter_start="2010-06-11")
print_report(acc, unit, kelly)

In [None]:
for features_group in [all_features, other_features, player_features]:
    set_feature_columns(features_group)
    home = get_whole_dataset("home_score")
    away = get_whole_dataset("away_score")
    X = pd.concat([home[0], away[0]])
    y = pd.concat([home[1], away[1]])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
    model = get_model(X=X_train, y=y_train, n_estimators=500)

    y_pred_mu = model.predict(X_test)
    y_pred = np.around(y_pred_mu)
    print("Score: ", sum(np.around(y_pred) == y_test) / len(X_test))

In [None]:
#### WC 2018
match_template = 'data/original/wc_2018_games_real.csv'
betting_file = 'data/original/wc_2018_bets.csv'

for interval in [("2004-06-12", "2010-06-11"), ("2010-06-11", "2014-06-12"), ("2014-06-12", "2018-06-14")]:
    acc, unit, kelly = simulate_betting_strategies(all_features, match_template, betting_file,
                                               interval=interval, iter_n=10)
    print_report(acc, unit, kelly)

In [None]:
#### WC 2018
match_template = 'data/original/wc_2018_games_real.csv'
betting_file = 'data/original/wc_2018_bets.csv'

for interval in [("2004-06-12", "2010-06-12"), ("2010-06-12", "2014-06-12"), ("2014-06-12", "2018-06-14")]:
    acc, unit, kelly = simulate_betting_strategies(other_features, match_template, betting_file,
                                               interval=interval, iter_n=10)
    print_report(acc, unit, kelly)