In [172]:
import torch
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# torch.set_printoptions({'float': '{: 0.8f}'.format})
torch.set_printoptions(sci_mode=False, precision=8)


Load df with y_col

In [173]:
df = pd.read_csv("./footballData/CombinedSlidingWindow4.csv", index_col=False, low_memory=False)
y_col = ['H_Won', 'H_start_odds', 'V_start_odds']
y_df = df[y_col]

# Custom loss function
**Input:** X and Y where each value is between -1 and 1. Both corresponding to the probability of home (+) vs away (-) where 0.5 = 50%, 1.0=100% home chance of winning, -0.7 = 70% visitor chance of winning etc.

Also the odds will be in the input as well

We will use Adam as the optimizer without passing in any. So by default smaller values are

1. Calculate the kelly criterion based on the predicted probability. When it suggests a negative number (no bet) set both X and y to 0
2. Calculate the pearson correlation coefficient, get its absolute value and multiply it by a hyperparameter constant. Combine this with the 



In [201]:
"""
Want: 
    An array > 1 elements for PCC formula
    A smaller array for kelly_criterion
    


x: 1d array of predictions between -1 and 1 where negative number means visitor predicted to win
y: ['H_Won', 'H_start_odds', 'V_start_odds']
pearson_multiplier: constant to multiply the pearson correlation coefficient's result by
max_bet_size: Amount to multiply to kelly criterion
"""
def nfl_custom_criterion(x, y, pearson_multiplier=1.5, max_bet_size=100):
    # ------------------------------------------------
    # Preliminary calculations
    # ------------------------------------------------
    h_start_odds = y[:,1]
    v_start_odds = y[:,2]
    h_won = y[:,0]
    y_decimal_odds = torch.where(x > 0, h_start_odds, v_start_odds) # Predicted vs actual odds (regardless of correct prediction)
    y_prob = 1 / y_decimal_odds                  # Probability (regardless of correct prediction)
    x_H_Won = torch.round(torch.sigmoid(20 * x)) # Sigmoid so that it's differentiable. The 20 is arbitrarily large number
    y_correct_prediction = torch.abs((x_H_Won - h_won))        # 1 if wrong bet, otherwise 0. Used to reset kelly when wrong
    y_correct_prediction_mult_two = 2 * y_correct_prediction   # 2 if wrong bet, 0 if correct
    x = torch.abs(x)

    # ------------------------------------------------
    # 1. Calculate the kelly criterion
    #    Entirely wrong predictions are negated and kept in a separate tensor (pcc not applied to negated )
    #    Use cumulative product? max_bet_size determined by previous results?
    # ------------------------------------------------
    kelly_criterion = x - ((1 - x) / y_decimal_odds)
    bet_multiplier = torch.clamp(kelly_criterion, min=0)   # Kelly results that are negative are ignored
    bet_unadjusted_profit = bet_multiplier*max_bet_size    # Assumes all bets were correct

    correct_bets = bet_unadjusted_profit - (bet_unadjusted_profit * y_correct_prediction)   # All correct bets after kelly, profit or 0.
    incorrect_bets = bet_unadjusted_profit - (bet_unadjusted_profit * y_correct_prediction_mult_two) # Negative numbers are incorrect bets
    incorrect_bets = torch.clamp(incorrect_bets, max=0)  # Restrict to 0 or negative.
    
    net_profit = torch.sum(correct_bets)
    net_loss = torch.sum(incorrect_bets)
    

    # ------------------------------------------------
    # 2. Calculate the Pearson Correlation Coefficient
    # ------------------------------------------------
    n = x.size(0)
    sum_x = torch.sum(x)
    sum_x_squared = torch.sum(x**2)
    sum_y = torch.sum(y_prob)
    sum_y_squared = torch.sum(y_prob**2)
    sum_pow_x = torch.sum(x**2)
    sum_pow_y = torch.sum(y_prob**2)
    x_mul_y = torch.mul(x, y_prob)
    sum_x_mul_y = torch.sum(x_mul_y)

    # PCC Formula (eps to avoid NaN)
    eps = 1e-8
    pcc_numerator = n * sum_x_mul_y - sum_x * sum_y
    pcc_denominator_one = torch.sqrt(n * sum_pow_x - sum_x_squared + eps)
    pcc_denominator_two = torch.sqrt(n * sum_pow_y - sum_y_squared + eps)
    pcc = pcc_numerator / (pcc_denominator_one * pcc_denominator_two + eps)
    pcc = pearson_multiplier * torch.abs(pcc)
    
    # ------------------------------------------------
    # Combine & Return
    #     Kelly profit diminished by 1 - pcc
    #     Net loss subtracted from that
    #     Negate everything for Adam
    # ------------------------------------------------
    result = -torch.sum((net_profit * (1 - pcc)) + net_loss)

    # numpy_debug = torch.stack([h_won, x_H_Won, y_correct_prediction, y_correct_prediction_mult_two, correct_bets, incorrect_bets], dim=0)
    # numpy_debug = torch.stack([result], dim=0)
    # return numpy_debug.numpy()
    return result


In [202]:
x = torch.arange(-1, 1.01, 0.1)
df_size = x.size(0)
y_df = y_df[:df_size]
y = torch.tensor(y_df.values, dtype=torch.float32)
#print(y)
# print(y[:,1])
res_df = nfl_custom_criterion(x,y)
print(x)
for item in res_df:
    print(item)

tensor([    -1.00000000,     -0.89999998,     -0.80000001,     -0.69999999,
            -0.60000002,     -0.50000000,     -0.40000001,     -0.30000001,
            -0.20000000,     -0.10000000,     -0.00000000,      0.09999999,
             0.20000000,      0.30000001,      0.40000001,      0.50000000,
             0.60000002,      0.69999999,      0.80000001,      0.89999998,
             1.00000000])
-330.23163


In [None]:
# Generate a tensor with random numbers between -1 and 1
amt_random_numbers = 10
x = torch.rand(amt_random_numbers) * 2 - 1
y = torch.rand(amt_random_numbers) * 2 - 1
# print(type(y))
# print(y.shape)
print(x)
print(y)

nfl_test(x,y)