In [165]:
"""
G - goals
GA - assists
S - shots
PAS - passes
FK - free kicks
COR - corners
FC - fouls conceded
FS - fouls suffered
Y - yellows
R - reds
PEN - penalties
MIN - minutes
TA - tackles
CLR - clearances
SAV - saves
GC - goals conceded
GK - goals kicks
""";

In [166]:
"""
df = collect_data_from_csvs()
df = transform_data(df)
df = aggregate_features(df)
df = add_goals(df)
df.dropna(inplace = True)
df.reset_index(drop = True, inplace = True)
""";

In [167]:
import pandas as pd
from data_processor import *
from modeler import *
import numpy as np
from sklearn import svm, preprocessing
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.ensemble import AdaBoostClassifier
from sklearn.decomposition import PCA
from multiprocessing import Pool
import time

%matplotlib

Using matplotlib backend: Qt5Agg


In [168]:
# data = collect_data_from_csvs()
# data = transform_data(data)
# data = data.dropna(axis = 0)
# data = data.sort_values(by = "kickoff")
# data = data.reset_index(drop = True)

In [170]:
# data preparation 

data = pd.read_csv("42.csv", encoding = "latin1")
prices = "B365D"
data = scale_and_add_goals_prices(data, prices)

remove_fouls = set(data.columns) - set(['FC_d_away', 'FC_d_home', 'FC_m_away', 'FC_m_home', 
                                             'FS_d_away', 'FS_d_home', 'FS_m_away', 'FS_m_home'])
remove_fouls_list = list(remove_fouls)
data = data[remove_fouls_list]

In [172]:
# PCA

pca_params = {
    "attack" : 1,
    "midfield" : 2, 
    "defence" : 3,
    "goalkeeper" : 2
}

data_pca, report = apply_pca_to_positions(data, pca_params)
report

{'away': {'attack': [4, 1, 0.75],
  'defence': [5, 3, 0.70999999999999996],
  'goalkeeper': [3, 2, 0.83999999999999997],
  'midfield': [4, 2, 0.76000000000000001]},
 'home': {'attack': [4, 1, 0.73999999999999999],
  'defence': [5, 3, 0.70999999999999996],
  'goalkeeper': [3, 2, 0.81999999999999995],
  'midfield': [4, 2, 0.77000000000000002]}}

In [173]:
# breaking data by month

draws = [1 if draw else 0 for draw in data.HG == data.AG]
draws_df = pd.DataFrame(draws, columns = ["label"])

evens = [1 if residual == 0 else 0 for residual in (data.HG + data.AG) % 2]
evens_df = pd.DataFrame(evens, columns = ["label"])

data_pca_kickoff = pd.concat([data_pca, 
                              evens_df,
                              pd.DataFrame(data["kickoff"])], axis = 1)

data_pca_kickoff = convert_kickoff_to_date(data_pca_kickoff)
dfs = break_df_by_month(data_pca_kickoff)

In [174]:
# grid of parameters
# 1x1 grid with step 0.01
params_grid = {

    "c_g_1x1_100" : {"c" : [val/100.0 for val in range(1, 101, 1)], "gamma" : [val/100.0 for val in range(1, 101, 1)]},
}

In [74]:
#calculate_scores(dfs, params_grid)

In [175]:
# training, prediction and trading with results report

scores = [0.51]
probs = [0.57, 0.58, 0.59, 0.6]
path = "c_g_1x1_100/"
months = [12, 1, 2, 3, 4, 5]

pool = Pool(4)
params = [(data, dfs, prices, scores, [prob], path, months) for prob in probs]
start = time.time()
results = pool.map(trade_and_print_report, params)
stop = time.time()
print (round(stop - start, 2))

In [72]:
# results[(score, prob)] = [n_draws, n_evens, n_bets, profit_procent]
results

[{(0.51, 0.57, 1): [12, 19, 43, -3.0],
  (0.51, 0.57, 2): [7, 16, 29, -20.3],
  (0.51, 0.57, 3): [1, 1, 2, 75.0],
  (0.51, 0.57, 4): [4, 7, 14, 4.6],
  (0.51, 0.57, 5): [3, 4, 5, 100.0],
  (0.51, 0.57): {'overall_accuracy': '29.03', 'overall_profit': '-0.05'},
  'time': 20822.36},
 {(0.51, 0.58, 1): [11, 17, 38, 0.5],
  (0.51, 0.58, 2): [6, 10, 15, 32.7],
  (0.51, 0.58, 3): [1, 1, 2, 75.0],
  (0.51, 0.58, 4): [3, 4, 5, 113.0],
  (0.51, 0.58, 5): [2, 2, 3, 120.0],
  (0.51, 0.58): {'overall_accuracy': '36.51', 'overall_profit': '25.16'},
  'time': 20850.24},
 {(0.51, 0.59, 1): [11, 14, 33, 15.8],
  (0.51, 0.59, 2): [5, 9, 12, 37.5],
  (0.51, 0.59, 3): [1, 1, 1, 250.0],
  (0.51, 0.59, 4): [3, 4, 4, 166.2],
  (0.51, 0.59, 5): [1, 1, 2, 55.0],
  (0.51, 0.59): {'overall_accuracy': '40.38', 'overall_profit': '38.37'},
  'time': 20836.28},
 {(0.51, 0.6, 1): [11, 14, 32, 19.4],
  (0.51, 0.6, 2): [3, 5, 6, 65.0],
  (0.51, 0.6, 3): [1, 1, 1, 250.0],
  (0.51, 0.6, 4): [3, 4, 4, 166.2],
  (0.51, 0.

In [178]:
# league history and market evaluation

fixtures = data
cols = ['home', 'away', 'AG', 'HG', 'B365D', 'kickoff']
fixtures = fixtures.loc[:, cols].sort_values(by = 'kickoff')

month = fixtures[(fixtures.kickoff >= '2017-01-01')]
even_goals = month[(month.AG + month.HG) %2 == 0].shape
draws = month[(month.AG == month.HG)].shape
prices = month.B365D.mean()