In [1]:
from fetch.match_results import read_joined 

df, player_mapping, inverse_player_mapping = read_joined()

  if self.run_code(code, result):


In [2]:
df['__surface__'].value_counts(normalize=True)

Hard      0.385969
Clay      0.326778
Indoor    0.178735
Grass     0.108518
Name: __surface__, dtype: float64

In [3]:
df.groupby(['court', 'surface']).size() / df.shape[0]

court    surface
Indoor   Carpet     0.034109
         Clay       0.003654
         Hard       0.140972
Outdoor  Clay       0.326778
         Grass      0.108518
         Hard       0.385969
dtype: float64

In [4]:
df.sort('date', inplace=True)

  """Entry point for launching an IPython kernel.


#### Build a Model for Just Clay 

In [5]:
import itertools
import numpy as np
import pandas as pd
from models.logit.base import sipko_weights, get_X_y
from ml.prior_logit import NonZeroLogit
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression


N_ATTEMPTS_PER_SURFACE = 10
SURFACES = df['__surface__'].unique().tolist()
N_SURFACES = len(SURFACES) 
TUNERS = list(itertools.product(*[
    np.linspace(0., 1., N_ATTEMPTS_PER_SURFACE + 1) for _ in range(N_SURFACES - 1)
]))
DISCOUNT = 0.8  # Taken from Sipko paper
FLAT_TIME = 1.  # Taken from Sipko paper
LMBDA = 10.  # Regularize!
PRIOR = -2.


def _get_surface_weights(train_df, surface_weight_map):
    return train_df['__surface__'].map(lambda x: surface_weight_map[x])


def try_surface_weights(train_df, sw_map, val_df, sipko_weighting):
    surface_weights = _get_surface_weights(train_df, sw_map)
    sample_weights = sipko_weighting * surface_weights
    nzl = NonZeroLogit(lmbda=LMBDA, prior=PRIOR, seed=10)
    nzl.fit(train_X, train_y, sample_weight=sample_weights)
    val_preds = nzl.predict_proba(val_X)[:, 1]
    auc = roc_auc_score(val_df['y'], val_preds)
    accuracy = (val_df['y'] == (val_preds > 0.5).astype(int)).mean()
    return auc, accuracy


def tune_weights_for_surface(surface):
    # Get data
    train_df = df[
        (df['date'] < '2016-01-01') 
    ].copy()
    train_X, train_y = get_X_y(train_df, player_mapping)
    val_df = df[
        (df['date'] >= '2016-01-01') &
        (df['date'] < '2017-01-01') &
        (df['__surface__'] == surface) &
        (~df['comment'].isin(['Retired', 'Walkover']))
    ].copy()
    val_X, val_y = get_X_y(val_df, player_mapping)
    s_weights = sipko_weights(train_df['date'].max(), train_df, DISCOUNT, flat_time=FLAT_TIME)
    
    tune_surfaces = sorted([x for x in SURFACES if x != surface])

    perfs = []
    for weights in TUNERS:
        sw_map = dict(zip(tune_surfaces, weights))
        sw_map.update({surface: 1.})
        print sw_map
        surface_weights = _get_surface_weights(train_df, sw_map)
        sample_weights = s_weights * surface_weights

        # mod = NonZeroLogit(lmbda=LMBDA, prior=PRIOR, seed=10)
        mod = LogisticRegression(C=0.4)
        mod.fit(train_X, train_y, sample_weight=sample_weights)

        val_preds = mod.predict_proba(val_X)[:, 1]
        auc = roc_auc_score(val_df['y'], val_preds)
        accuracy = (val_df['y'] == (val_preds > 0.5).astype(int)).mean()

        _record = list(weights)
        _record.extend([auc, accuracy])
        perfs.append(_record)
    return pd.DataFrame(
        perfs,
        columns=tune_surfaces + ['auc', 'accuracy']
    )

In [6]:
hard_results = tune_weights_for_surface('Hard')

{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.0, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.10000000000000001, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.20000000000000001, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.30000000000000004, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.40000000000000002, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.5, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.60000000000000009, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.70000000000000007, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.80000000000000004, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.90000000000000002, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 1.0, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.0, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.10000000000000001, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.20000000000000001, 'Clay': 0.0}
{'Hard':

{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.5, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.60000000000000009, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.70000000000000007, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.80000000000000004, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.90000000000000002, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 1.0, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.0, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.10000000000000001, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.20000000000000001, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.30000000000000004, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.40000000000000002, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.5, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.60000000000000009, 'Clay': 0.0}
{'Hard': 1.0, 'Grass': 1.0, 'Ind

{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.90000000000000002, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 1.0, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.0, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.10000000000000001, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.20000000000000001, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.30000000000000004, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.40000000000000002, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.5, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.60000000000000009, 'Clay': 0.10000000000000001}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.70000000000000007, 'Clay': 0.10000000000000001}
{'Hard':

{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.90000000000000002, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 1.0, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.0, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.10000000000000001, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.20000000000000001, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.30000000000000004, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.40000000000000002, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.5, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.60000000000000009, 'Clay': 0.20000000000000001}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.70000000000000007, 'Clay': 0.20000000000000001}
{'Hard':

{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.90000000000000002, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 1.0, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.0, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.10000000000000001, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.20000000000000001, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.30000000000000004, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.40000000000000002, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.5, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.60000000000000009, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.70000000000000007, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.100000000000000

{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.90000000000000002, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 1.0, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.0, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.10000000000000001, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.20000000000000001, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.30000000000000004, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.40000000000000002, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.5, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.60000000000000009, 'Clay': 0.30000000000000004}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.70000000000000007, 'Clay': 0.30000000000000004}
{'Hard':

{'Hard': 1.0, 'Grass': 0.5, 'Indoor': 0.90000000000000002, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.5, 'Indoor': 1.0, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.0, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.10000000000000001, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.20000000000000001, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.30000000000000004, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.40000000000000002, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.5, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.60000000000000009, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.70000000000000007, 'Clay': 0.40000000000000002}
{'Hard': 1.0, 'Grass': 0.600000000000000

{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.20000000000000001, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.30000000000000004, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.40000000000000002, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.5, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.60000000000000009, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.70000000000000007, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.80000000000000004, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.90000000000000002, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 1.0, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.0, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.10000000000000001, 'Clay': 0.5}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.20000000000000001, 'Clay

{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.70000000000000007, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.80000000000000004, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.90000000000000002, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 1.0, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.20000000000000001, 'Indoor': 0.0, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.20000000000000001, 'Indoor': 0.10000000000000001, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.20000000000000001, 'Indoor': 0.20000000000000001, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.20000000000000001, 'Indoor': 0.30000000000000004, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.20000000000000001, 'Indoor': 0.40000000000000002, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.20000000000000001, 'Indoor': 0.5, 'Clay': 0.60000000000000009}
{'Hard':

{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.40000000000000002, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.5, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.60000000000000009, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.70000000000000007, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.80000000000000004, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.90000000000000002, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 1.0, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.0, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.10000000000000001, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.20000000000000001, 'Clay': 0.60000000000000009}
{'Hard': 1.0, 'Grass': 1.0, 'Indoor': 0.3000000000000000

{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.40000000000000002, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.5, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.60000000000000009, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.70000000000000007, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.80000000000000004, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.90000000000000002, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 1.0, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.0, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.10000000000000001, 'Clay': 0.70000000000000007}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.20000000000000001, 'Clay': 0.70000000000000007}
{'Hard':

{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.40000000000000002, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.5, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.60000000000000009, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.70000000000000007, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.80000000000000004, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 0.90000000000000002, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.30000000000000004, 'Indoor': 1.0, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.0, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.10000000000000001, 'Clay': 0.80000000000000004}
{'Hard': 1.0, 'Grass': 0.40000000000000002, 'Indoor': 0.20000000000000001, 'Clay': 0.80000000000000004}
{'Hard':

{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.40000000000000002, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.5, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.60000000000000009, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.70000000000000007, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.80000000000000004, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 0.90000000000000002, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.0, 'Indoor': 1.0, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.0, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.10000000000000001, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.20000000000000001, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.10000000000000001, 'Indoor': 0.30000000000000004, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'G

{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.40000000000000002, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.5, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.60000000000000009, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.70000000000000007, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.80000000000000004, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 0.90000000000000002, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.80000000000000004, 'Indoor': 1.0, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.0, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.10000000000000001, 'Clay': 0.90000000000000002}
{'Hard': 1.0, 'Grass': 0.90000000000000002, 'Indoor': 0.20000000000000001, 'Clay': 0.90000000000000002}
{'Hard':

{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.5, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.60000000000000009, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.70000000000000007, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.80000000000000004, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 0.90000000000000002, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.60000000000000009, 'Indoor': 1.0, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.0, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.10000000000000001, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.20000000000000001, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.30000000000000004, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.40000000000000002, 'Clay': 1.0}
{'Hard': 1.0, 'Grass': 0.70000000000000007, 'Indoor': 0.5, 'Clay': 1.0}
{'Hard':

In [7]:
hard_results.sort('auc', ascending=False).head(15)

  """Entry point for launching an IPython kernel.


Unnamed: 0,Clay,Grass,Indoor,auc,accuracy
1077,0.8,0.9,1.0,0.743823,0.667355
1088,0.8,1.0,1.0,0.74372,0.666322
1209,0.9,1.0,1.0,0.743694,0.668388
1308,1.0,0.8,1.0,0.743664,0.672521
1176,0.9,0.7,1.0,0.743647,0.669421
835,0.6,0.9,1.0,0.743617,0.669421
1198,0.9,0.9,1.0,0.743604,0.667355
1066,0.8,0.8,1.0,0.743596,0.669421
1197,0.9,0.9,0.9,0.743587,0.674587
1076,0.8,0.9,0.9,0.743583,0.667355


In [None]:
clay_results = tune_weights_for_surface('Clay')

In [None]:
clay_results.sort('auc', ascending=False).head()

In [None]:
grass_results = tune_weights_for_surface('Grass')

In [None]:
grass_results.sort('auc', ascending=False).head()

In [None]:
indoor_results = tune_weights_for_surface('Indoor')

In [None]:
indoor_results.sort('auc', ascending=False).head()

Why is indoor performance so bad???  Are there strange tournaments here?  Is it hard vs. carpet vs. clay?  We'll come back to this...

In [None]:
df[df['court'] == 'Indoor']['surface'].value_counts()

There are a lot of carpet matches...should we throw these out?

#### Evaluate Betting Performance 

In [None]:
def evaluate_betting(val_df, buff=0):
    val_df['bet1'] = (1. / val_df['p1_odds']) < (val_df['cal_pred'] - buff)
    val_df['bet2'] = (1. / val_df['p2_odds']) < (1. - val_df['cal_pred'] - buff)
    bet_revenues = (
        val_df['p1_odds'] * val_df['bet1'] * val_df['y']  +
        val_df['p2_odds'] * val_df['bet2'] * (1. - val_df['y'])
    )
    bet_spending = val_df['bet1'] + val_df['bet2']
    profit_over_time = bet_revenues.cumsum() - bet_spending.cumsum()
    
    total_placed = bet_spending.sum()
    total_won = (val_df['bet1'] * val_df['y']).sum() + (val_df['bet2'] * (1. - val_df['y'])).sum()
    return bet_revenues.sum() - bet_spending.sum(), profit_over_time 

In [None]:
units_won, over_time = evaluate_betting(val_df)
plt.plot(over_time)

In [None]:
units_won

#### Compare to always betting on player 1

Let's make sure we lose money if we just choose a random player

In [None]:
new_val_df = val_df.copy()
new_val_df['cal_pred'] = 0.

units_won, over_time = evaluate_betting(new_val_df)
plt.plot(over_time)

Cool, we lose a ton of money by randomly choosing a player

In [None]:
import numpy as np

val_df['tot_probs'] = ((1 / val_df['p1_odds']) + (1. / val_df['p2_odds']))

In [None]:
val_df[['p1_odds', 'p2_odds', 'winner', 'loser', 'maxw', 'maxl', 'tot_probs']][val_df['tot_probs'] < 1.]