In [1]:
from pulp import *
import numpy as np
import pandas as pd
import random
import datetime

In [2]:
target = pd.DataFrame.from_dict(
    {
        "home_spread": [3],
        "home_spread_ml": [-109],
        "away_spread_ml": [-101],
        "total": [210],
        "over": [-110],
        "under": [-100]
    }
)

In [3]:
target

Unnamed: 0,home_spread,home_spread_ml,away_spread_ml,total,over,under
0,3,-109,-101,210,-110,-100


In [101]:
target["implied_home"] = np.where(
    target['home_spread_ml'] < 0,
    1.0 - (100.00/(abs(target["home_spread_ml"])+100.0)),
    100.0/(target["home_spread_ml"] + 100.0)
)
target["implied_away"] = np.where(
    target['away_spread_ml'] < 0,
    1.0 - (100.00/(abs(target["away_spread_ml"])+100.0)),
    100.0/(target["home_spread_ml"] + 100.0)
)
target["implied_over"] = np.where(
    target['over'] < 0,
    1.0-(100.00/(abs(target["over"])+100.0)),
    100.0/(target["over"]+100.0)
)
target["implied_under"] = np.where(
    target['under'] < 0,
    1.0-(100.00/(abs(target["under"])+100.0)),
    100.0/(target["under"]+100.0)
)

target["home_win"] = (target["implied_home"] * 1.0) / (target["implied_home"] + target["implied_away"])
target["away_win"] = (target["implied_away"] * 1.0) / (target["implied_home"] + target["implied_away"])
target["over_prob"] = (target["implied_over"] * 1.0) / (target["implied_over"] + target["implied_under"])
target["under_prob"] = (target["implied_under"] * 1.0) / (target["implied_over"] + target["implied_under"])

In [103]:
true_target = target[["total", "home_spread", "home_win", "away_win", "over_prob", "under_prob"]]

In [104]:
true_target_values = {key: val[0] for key, val in true_target.to_dict().items()}

In [105]:
true_target_values

{'total': 210,
 'home_spread': 3,
 'home_win': 0.5092984332140034,
 'away_win': 0.49070156678599663,
 'over_prob': 0.5116279069767442,
 'under_prob': 0.48837209302325585}

In [8]:
clean_de_vigged = pd.read_excel("Sample_NBA_Data.xlsx")

In [9]:
clean_de_vigged.sort_values("game_id")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,away_true_prob,home_true_prob,under_true_prob,over_true_prob,home_margin,points,spread_home
3426,1,2011-03-04,Brooklyn Nets,Toronto Raptors,116,103,-2.5,203.5,-111,-111,0.5,0.5,0.500000,0.500000,13,219,-2.5
948,2,2011-03-05,Atlanta Hawks,Oklahoma City Thunder,104,111,-2.5,192.0,-111,-111,0.5,0.5,0.500000,0.500000,-7,215,-2.5
12276,3,2011-03-05,Boston Celtics,Golden State Warriors,107,103,-9.5,205.5,-109,-107,0.5,0.5,0.497774,0.502226,4,210,-9.5
11454,4,2011-03-05,Brooklyn Nets,Toronto Raptors,137,136,-2.0,206.0,-111,-111,0.5,0.5,0.500000,0.500000,1,273,-2.0
12104,5,2011-03-05,Dallas Mavericks,Indiana Pacers,116,108,-9.0,208.0,-108,-109,0.5,0.5,0.501105,0.498895,8,224,-9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3656,12344,2022-03-02,Houston Rockets,Los Angeles Clippers,100,113,7.0,228.0,-109,-106,0.5,0.5,0.496637,0.503363,-13,213,7.0
4679,12345,2022-03-02,Los Angeles Lakers,Dallas Mavericks,104,109,5.0,219.0,-110,-107,0.5,0.5,0.496684,0.503316,-5,213,5.0
751,12346,2022-03-02,Minnesota Timberwolves,Golden State Warriors,129,114,3.5,231.0,-110,-102,0.5,0.5,0.490834,0.509166,15,243,3.5
5450,12347,2022-03-02,Toronto Raptors,Brooklyn Nets,109,108,-8.0,219.0,-109,-106,0.5,0.5,0.496637,0.503363,1,217,-8.0


In [10]:
unranked_data = clean_de_vigged.copy(deep=True)

In [98]:
date_diff = (pd.to_datetime(unranked_data["game_date"], format='%Y-%m-%d') - datetime.datetime.utcnow()) /np.timedelta64(1, 'D')

unranked_data["pythag_error"] = (
    abs(unranked_data["spread_home"] - true_target_values["home_spread"]) + abs(unranked_data["total"] - true_target_values["total"])
)
#     + (0.00001 * date_diff))

conditions = [
    unranked_data["points"] > true_target_values["total"],
    unranked_data["points"] < true_target_values["total"],
    unranked_data["points"] == true_target_values["total"]
]

values = [
    "over",
    "under",
    "push"
]

unranked_data["total_result"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["home_score"] +  true_target_values["home_spread"] - unranked_data["away_score"]> 0,
    unranked_data["home_score"]  +  true_target_values["home_spread"] - unranked_data["away_score"]< 0,
    unranked_data["home_score"]  +  true_target_values["home_spread"] - unranked_data["away_score"]== 0
]

values = [
    "home_cover",
    "away_cover",
    "push"
]

unranked_data["spread_result"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["spread_home"] < true_target_values["home_spread"],
    unranked_data["spread_home"] > true_target_values["home_spread"],
    unranked_data["spread_home"] == true_target_values["home_spread"]
]

values = [
    "home",
    "away",
    "push"
]


unranked_data["predictive_spread_cover"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["total"] < true_target_values["total"],
    unranked_data["total"] > true_target_values["total"],
    unranked_data["total"] == true_target_values["total"]
]

values = [
    "under",
    "over",
    "push"
]


unranked_data["predictive_total"] = np.select(
    conditions, values
)





In [99]:
unranked_data

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
0,2149,2014-01-01,Houston Rockets,Sacramento Kings,106,110,-10.0,215.5,-111,-107,...,0.504390,-4,216,-10.0,18.5,over,away_cover,7005.0,home,over
1,8549,2019-01-20,Indiana Pacers,Charlotte Hornets,120,95,-8.0,218.5,-109,-110,...,0.498910,25,215,-8.0,19.5,over,home_cover,7519.0,home,over
2,2228,2014-01-12,Portland Trail Blazers,Boston Celtics,112,104,-12.0,210.0,-108,-108,...,0.500000,8,216,-12.0,15.0,over,home_cover,4983.0,home,push
3,8628,2019-01-30,Brooklyn Nets,Chicago Bulls,122,117,-6.5,220.5,-111,-109,...,0.502165,5,239,-6.5,20.0,over,home_cover,7763.0,home,over
4,15,2011-03-06,Los Angeles Clippers,Denver Nuggets,100,94,-2.0,208.0,-106,-109,...,0.496637,6,194,-2.0,7.0,under,home_cover,1114.0,home,under
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12343,10044,2020-02-25,Houston Rockets,New York Knicks,123,112,-13.0,226.5,-110,-110,...,0.500000,11,235,-13.0,32.5,over,home_cover,11893.0,home,over
12344,1037,2012-12-31,Orlando Magic,Miami Heat,110,112,9.5,194.5,-102,-106,...,0.495286,-2,222,9.5,22.0,over,home_cover,9004.0,away,under
12345,7437,2018-02-25,Golden State Warriors,Oklahoma City Thunder,112,80,-10.0,233.0,-107,-108,...,0.498879,32,192,-10.0,36.0,under,home_cover,12171.0,home,over
12346,483,2012-03-21,Houston Rockets,Los Angeles Lakers,107,104,3.0,191.0,-108,-110,...,0.497805,3,211,3.0,19.0,over,home_cover,7518.0,push,under


In [100]:
unranked_data["pythag_error_rank"] = unranked_data["pythag_error"].rank(method="first")

In [94]:
unranked_data.sort_values("pythag_error_rank")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
3973,6346,2017-03-15,New York Knicks,Indiana Pacers,87,81,3.0,210.0,-106,-108,...,0.497742,6,168,3.0,0.0,under,home_cover,1.0,push,push
4365,2504,2014-02-25,Philadelphia 76ers,Milwaukee Bucks,110,130,3.0,210.0,-108,-110,...,0.497805,-20,240,3.0,0.0,over,away_cover,2.0,push,push
6286,2439,2014-02-12,Phoenix Suns,Miami Heat,97,103,3.0,210.0,-110,-105,...,0.505605,-6,200,3.0,0.0,under,away_cover,3.0,push,push
6957,1016,2012-12-29,Dallas Mavericks,Denver Nuggets,85,106,3.0,210.0,-108,-106,...,0.502258,-21,191,3.0,0.0,under,away_cover,4.0,push,push
7306,5997,2017-01-21,Philadelphia 76ers,Portland Trail Blazers,93,92,3.0,210.0,-106,-108,...,0.497742,1,185,3.0,0.0,under,home_cover,5.0,push,push
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11904,9412,2019-12-02,Los Angeles Clippers,Washington Wizards,150,125,-12.5,240.0,-107,-110,...,0.496684,25,275,-12.5,45.5,over,home_cover,12344.0,home,over
4875,12279,2022-02-16,Milwaukee Bucks,Indiana Pacers,128,119,-14.5,238.5,-111,-108,...,0.503270,9,247,-14.5,46.0,over,home_cover,12345.0,home,over
10601,11292,2021-05-11,Portland Trail Blazers,Houston Rockets,140,129,-15.0,238.5,-107,-110,...,0.496684,11,269,-15.0,46.5,over,home_cover,12346.0,home,over
2173,9380,2019-11-28,Milwaukee Bucks,Atlanta Hawks,111,102,-14.5,239.5,-110,-111,...,0.498925,9,213,-14.5,47.0,over,home_cover,12347.0,home,over


In [96]:
ranked_data = unranked_data[["game_id", "spread_home", "pythag_error", "points", "home_margin", "home_score", "away_score", "total_result", "spread_result", "pythag_error_rank", "predictive_spread_cover", "predictive_total"]].sort_values("pythag_error_rank").copy(deep=True)

In [97]:
ranked_data.to_excel("ranked_data_from_python.xlsx",index=False)

In [41]:
ranked_data

Unnamed: 0,game_id,spread_home,pythag_error,points,home_margin,home_score,away_score,total_result,spread_result,pythag_error_rank
3973,6346,3.0,0.0,168,6,87,81,under,home_cover,1.0
4365,2504,3.0,0.0,240,-20,110,130,over,away_cover,2.0
6286,2439,3.0,0.0,200,-6,97,103,under,away_cover,3.0
6957,1016,3.0,0.0,191,-21,85,106,under,away_cover,4.0
7306,5997,3.0,0.0,185,1,93,92,under,home_cover,5.0
...,...,...,...,...,...,...,...,...,...,...
11904,9412,-12.5,45.5,275,25,150,125,over,home_cover,12344.0
4875,12279,-14.5,46.0,247,9,128,119,over,home_cover,12345.0
10601,11292,-15.0,46.5,269,11,140,129,over,home_cover,12346.0
2173,9380,-14.5,47.0,213,9,111,102,over,home_cover,12347.0


In [49]:
chosen_data = ranked_data[ranked_data["pythag_error_rank"] <= 500]

In [50]:
chosen_data["points"].median()

208.0

In [51]:
chosen_data["total_result"].value_counts()

under    276
over     218
push       6
Name: total_result, dtype: int64

In [52]:
chosen_data["spread_result"].value_counts()

away_cover    247
home_cover    230
push           23
Name: spread_result, dtype: int64

In [53]:
chosen_data.query("total_result == 'under'")

Unnamed: 0,game_id,spread_home,pythag_error,points,home_margin,home_score,away_score,total_result,spread_result,pythag_error_rank
3973,6346,3.0,0.0,168,6,87,81,under,home_cover,1.0
6286,2439,3.0,0.0,200,-6,97,103,under,away_cover,3.0
6957,1016,3.0,0.0,191,-21,85,106,under,away_cover,4.0
7306,5997,3.0,0.0,185,1,93,92,under,home_cover,5.0
1559,9600,3.5,0.5,195,1,98,97,under,home_cover,9.0
...,...,...,...,...,...,...,...,...,...,...
4178,7655,1.5,4.5,209,3,106,103,under,home_cover,495.0
4343,5744,5.5,4.5,188,-30,79,109,under,away_cover,496.0
4483,10054,1.5,4.5,208,6,107,101,under,home_cover,497.0
4603,2627,2.5,4.5,198,12,105,93,under,home_cover,499.0


In [54]:
chosen_data["pythag_error_rank"].sum()

125250.0

In [61]:
chosen = LpVariable.dicts(
            "chosen",
            (
                np.array(unranked_data["game_id"])
            ),
            lowBound=0,
            cat="Binary",
        )

In [62]:
np.array(unranked_data["game_id"])

array([2149, 8549, 2228, ..., 7437,  483, 6883], dtype=int64)

In [63]:
chosen

{2149: chosen_2149,
 8549: chosen_8549,
 2228: chosen_2228,
 8628: chosen_8628,
 15: chosen_15,
 6415: chosen_6415,
 3334: chosen_3334,
 9734: chosen_9734,
 2702: chosen_2702,
 9102: chosen_9102,
 3887: chosen_3887,
 10287: chosen_10287,
 3966: chosen_3966,
 10366: chosen_10366,
 5151: chosen_5151,
 11551: chosen_11551,
 1280: chosen_1280,
 7680: chosen_7680,
 1359: chosen_1359,
 7759: chosen_7759,
 5388: chosen_5388,
 11788: chosen_11788,
 6336: chosen_6336,
 3492: chosen_3492,
 9892: chosen_9892,
 3097: chosen_3097,
 9497: chosen_9497,
 3571: chosen_3571,
 9971: chosen_9971,
 1754: chosen_1754,
 8154: chosen_8154,
 3018: chosen_3018,
 9418: chosen_9418,
 1912: chosen_1912,
 8312: chosen_8312,
 4519: chosen_4519,
 10919: chosen_10919,
 2860: chosen_2860,
 9260: chosen_9260,
 4835: chosen_4835,
 11235: chosen_11235,
 1833: chosen_1833,
 8233: chosen_8233,
 4440: chosen_4440,
 10840: chosen_10840,
 6257: chosen_6257,
 5546: chosen_5546,
 11946: chosen_11946,
 1596: chosen_1596,
 7996: c

In [64]:
prob = LpProblem("Pythag error minimize", LpMinimize)



In [75]:
prob += (
                lpSum(
                    [
                        chosen[game_id] * pythag_errors[game_id]
                        for game_id in unranked_data["game_id"].to_list()
                    ]
                ),
                "Maximize sum ratings of recipes function",
            )

In [76]:
pythag_errors = unranked_data[["game_id","pythag_error"]].set_index("game_id")["pythag_error"].to_dict()

In [77]:
pythag_errors

{2149: 18.5,
 8549: 19.5,
 2228: 15.0,
 8628: 20.0,
 15: 7.0,
 6415: 27.0,
 3334: 24.0,
 9734: 22.5,
 2702: 13.5,
 9102: 39.0,
 3887: 6.0,
 10287: 23.0,
 3966: 16.5,
 10366: 16.0,
 5151: 11.5,
 11551: 19.0,
 1280: 11.0,
 7680: 21.5,
 1359: 2.0,
 7759: 15.0,
 5388: 13.5,
 11788: 12.0,
 6336: 18.5,
 3492: 8.5,
 9892: 21.5,
 3097: 12.0,
 9497: 28.5,
 3571: 20.5,
 9971: 8.5,
 1754: 17.5,
 8154: 12.5,
 3018: 7.5,
 9418: 9.0,
 1912: 30.0,
 8312: 16.5,
 4519: 21.5,
 10919: 42.5,
 2860: 8.5,
 9260: 37.0,
 4835: 15.0,
 11235: 19.5,
 1833: 15.5,
 8233: 18.5,
 4440: 33.0,
 10840: 36.5,
 6257: 6.0,
 5546: 12.5,
 11946: 31.0,
 1596: 6.5,
 7996: 28.5,
 2070: 22.0,
 8470: 20.0,
 6020: 6.0,
 3413: 11.0,
 9813: 26.0,
 884: 26.5,
 7284: 19.5,
 4756: 15.0,
 11156: 22.0,
 5862: 4.0,
 12262: 27.0,
 3176: 25.0,
 9576: 8.5,
 4598: 6.5,
 10998: 21.5,
 726: 23.0,
 7126: 20.0,
 1042: 23.5,
 7442: 15.5,
 4993: 11.5,
 11393: 28.5,
 2781: 23.5,
 3669: 19.0,
 10069: 23.5,
 3748: 6.0,
 10148: 18.0,
 1535: 21.0,
 793

In [78]:
prob += (
                        lpSum([chosen[game_id] for game_id in unranked_data["game_id"].to_list()]) == 500
                    )

In [79]:
prob.solve()

1

In [85]:
chosen_games = []
for value in chosen:
    if chosen[value].varValue == 1:
        print(value,chosen[value].varValue)
        chosen_games.append(value)

1359 1.0
5862 1.0
7838 1.0
8865 1.0
11875 1.0
5960 1.0
6810 1.0
6178 1.0
6423 1.0
2800 1.0
4696 1.0
4143 1.0
5238 1.0
5870 1.0
5791 1.0
6265 1.0
8478 1.0
5802 1.0
7450 1.0
9433 1.0
11639 1.0
11876 1.0
10776 1.0
5871 1.0
8558 1.0
11797 1.0
6904 1.0
5788 1.0
2470 1.0
9559 1.0
11771 1.0
6082 1.0
3181 1.0
2211 1.0
6894 1.0
4190 1.0
11459 1.0
3005 1.0
10924 1.0
5472 1.0
1540 1.0
11400 1.0
4953 1.0
607 1.0
5817 1.0
3846 1.0
7323 1.0
11511 1.0
2503 1.0
2725 1.0
10953 1.0
10716 1.0
8319 1.0
8019 1.0
9600 1.0
9456 1.0
2103 1.0
7226 1.0
2424 1.0
7481 1.0
5085 1.0
4927 1.0
12196 1.0
5006 1.0
4216 1.0
2308 1.0
7839 1.0
6416 1.0
7779 1.0
9518 1.0
5330 1.0
9340 1.0
2715 1.0
5588 1.0
5488 1.0
2801 1.0
6220 1.0
5705 1.0
5547 1.0
6299 1.0
1953 1.0
5035 1.0
11119 1.0
9696 1.0
2495 1.0
7315 1.0
7078 1.0
3127 1.0
6683 1.0
11503 1.0
4945 1.0
9290 1.0
4866 1.0
1784 1.0
9640 1.0
6796 1.0
7270 1.0
2213 1.0
10983 1.0
1739 1.0
4899 1.0
11773 1.0
5840 1.0
7578 1.0
6258 1.0
11789 1.0
11342 1.0
6443 1.0
5040 1.0
9

In [86]:
chosen_games

[1359,
 5862,
 7838,
 8865,
 11875,
 5960,
 6810,
 6178,
 6423,
 2800,
 4696,
 4143,
 5238,
 5870,
 5791,
 6265,
 8478,
 5802,
 7450,
 9433,
 11639,
 11876,
 10776,
 5871,
 8558,
 11797,
 6904,
 5788,
 2470,
 9559,
 11771,
 6082,
 3181,
 2211,
 6894,
 4190,
 11459,
 3005,
 10924,
 5472,
 1540,
 11400,
 4953,
 607,
 5817,
 3846,
 7323,
 11511,
 2503,
 2725,
 10953,
 10716,
 8319,
 8019,
 9600,
 9456,
 2103,
 7226,
 2424,
 7481,
 5085,
 4927,
 12196,
 5006,
 4216,
 2308,
 7839,
 6416,
 7779,
 9518,
 5330,
 9340,
 2715,
 5588,
 5488,
 2801,
 6220,
 5705,
 5547,
 6299,
 1953,
 5035,
 11119,
 9696,
 2495,
 7315,
 7078,
 3127,
 6683,
 11503,
 4945,
 9290,
 4866,
 1784,
 9640,
 6796,
 7270,
 2213,
 10983,
 1739,
 4899,
 11773,
 5840,
 7578,
 6258,
 11789,
 11342,
 6443,
 5040,
 9790,
 6304,
 6679,
 9444,
 42,
 8437,
 7410,
 1810,
 5021,
 9920,
 4784,
 7075,
 11677,
 3538,
 2650,
 1465,
 6522,
 2004,
 11695,
 5086,
 6982,
 1740,
 1464,
 6903,
 3083,
 5955,
 4928,
 11458,
 11475,
 11791,
 5470,

In [87]:
len(chosen_games)

500

In [90]:
unranked_data[unranked_data["game_id"].isin(chosen_games)].sort_values("pythag_error")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
4365,2504,2014-02-25,Philadelphia 76ers,Milwaukee Bucks,110,130,3.0,210.0,-108,-110,...,0.497805,-20,240,3.0,0.0,over,away_cover,2.0,push,push
7306,5997,2017-01-21,Philadelphia 76ers,Portland Trail Blazers,93,92,3.0,210.0,-106,-108,...,0.497742,1,185,3.0,0.0,under,home_cover,5.0,push,push
8611,8310,2018-12-20,Orlando Magic,San Antonio Spurs,90,129,3.0,210.0,-111,-107,...,0.504390,-39,219,3.0,0.0,over,away_cover,6.0,push,push
6286,2439,2014-02-12,Phoenix Suns,Miami Heat,97,103,3.0,210.0,-110,-105,...,0.505605,-6,200,3.0,0.0,under,away_cover,3.0,push,push
3973,6346,2017-03-15,New York Knicks,Indiana Pacers,87,81,3.0,210.0,-106,-108,...,0.497742,6,168,3.0,0.0,under,home_cover,1.0,push,push
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123,10924,2021-03-30,New York Knicks,Miami Heat,88,98,3.5,206.0,-108,-109,...,0.498895,-10,186,3.5,4.5,under,away_cover,466.0,away_cover,under
3125,1465,2013-03-18,Dallas Mavericks,Oklahoma City Thunder,101,107,5.5,208.0,-108,-108,...,0.500000,-6,208,5.5,4.5,under,away_cover,486.0,away_cover,under
3110,3538,2015-02-01,Orlando Magic,Dallas Mavericks,93,108,7.0,209.5,-112,-108,...,0.504330,-15,201,7.0,4.5,under,away_cover,485.0,away_cover,under
4099,25,2011-03-08,Minnesota Timberwolves,Dallas Mavericks,105,108,7.0,209.5,-112,-110,...,0.502135,-3,213,7.0,4.5,over,push,494.0,away_cover,under
