In [261]:
from pulp import *
import numpy as np
import pandas as pd
import random
import datetime

In [262]:
sample_num = 500
target = pd.DataFrame.from_dict(
   {
        "home_spread": [3],
        "home_spread_ml": [-109],
        "away_spread_ml": [-101],
        "total": [210],
        "over": [-110],
        "under": [-100]
    }
)

In [263]:
target

Unnamed: 0,home_spread,home_spread_ml,away_spread_ml,total,over,under
0,3,-109,-101,210,-110,-100


In [264]:
target["implied_home"] = np.where(
    target['home_spread_ml'] < 0,
    1.0 - (100.00/(abs(target["home_spread_ml"])+100.0)),
    100.0/(target["home_spread_ml"] + 100.0)
)
target["implied_away"] = np.where(
    target['away_spread_ml'] < 0,
    1.0 - (100.00/(abs(target["away_spread_ml"])+100.0)),
    100.0/(target["home_spread_ml"] + 100.0)
)
target["implied_over"] = np.where(
    target['over'] < 0,
    1.0-(100.00/(abs(target["over"])+100.0)),
    100.0/(target["over"]+100.0)
)
target["implied_under"] = np.where(
    target['under'] < 0,
    1.0-(100.00/(abs(target["under"])+100.0)),
    100.0/(target["under"]+100.0)
)

target["home_win"] = round((target["implied_home"] * 1.0) / (target["implied_home"] + target["implied_away"]),7)
target["away_win"] = round((target["implied_away"] * 1.0) / (target["implied_home"] + target["implied_away"]),7)
target["over_prob"] = round((target["implied_over"] * 1.0) / (target["implied_over"] + target["implied_under"]),7)
target["under_prob"] = round((target["implied_under"] * 1.0) / (target["implied_over"] + target["implied_under"]),7)

In [265]:
true_target = target[["total", "home_spread", "home_win", "away_win", "over_prob", "under_prob"]]

In [266]:
true_target_values = {key: val[0] for key, val in true_target.to_dict().items()}

In [267]:
true_target_values

{'total': 210,
 'home_spread': 3,
 'home_win': 0.5092984,
 'away_win': 0.4907016,
 'over_prob': 0.5116279,
 'under_prob': 0.4883721}

In [268]:
clean_de_vigged = pd.read_excel("Sample_NBA_Data.xlsx")

In [269]:
clean_de_vigged.sort_values("game_id")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,away_true_prob,home_true_prob,under_true_prob,over_true_prob,home_margin,points,spread_home
3426,1,2011-03-04,Brooklyn Nets,Toronto Raptors,116,103,-2.5,203.5,-111,-111,0.5,0.5,0.500000,0.500000,13,219,-2.5
948,2,2011-03-05,Atlanta Hawks,Oklahoma City Thunder,104,111,-2.5,192.0,-111,-111,0.5,0.5,0.500000,0.500000,-7,215,-2.5
12276,3,2011-03-05,Boston Celtics,Golden State Warriors,107,103,-9.5,205.5,-109,-107,0.5,0.5,0.497774,0.502226,4,210,-9.5
11454,4,2011-03-05,Brooklyn Nets,Toronto Raptors,137,136,-2.0,206.0,-111,-111,0.5,0.5,0.500000,0.500000,1,273,-2.0
12104,5,2011-03-05,Dallas Mavericks,Indiana Pacers,116,108,-9.0,208.0,-108,-109,0.5,0.5,0.501105,0.498895,8,224,-9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3656,12344,2022-03-02,Houston Rockets,Los Angeles Clippers,100,113,7.0,228.0,-109,-106,0.5,0.5,0.496637,0.503363,-13,213,7.0
4679,12345,2022-03-02,Los Angeles Lakers,Dallas Mavericks,104,109,5.0,219.0,-110,-107,0.5,0.5,0.496684,0.503316,-5,213,5.0
751,12346,2022-03-02,Minnesota Timberwolves,Golden State Warriors,129,114,3.5,231.0,-110,-102,0.5,0.5,0.490834,0.509166,15,243,3.5
5450,12347,2022-03-02,Toronto Raptors,Brooklyn Nets,109,108,-8.0,219.0,-109,-106,0.5,0.5,0.496637,0.503363,1,217,-8.0


In [270]:
unranked_data = clean_de_vigged.copy(deep=True)

In [271]:
date_diff = (pd.to_datetime(unranked_data["game_date"], format='%Y-%m-%d') - datetime.datetime.utcnow()) /np.timedelta64(1, 'D')

unranked_data["pythag_error"] = (
    abs(unranked_data["spread_home"] - true_target_values["home_spread"]) + abs(unranked_data["total"] - true_target_values["total"])
)
#     + (0.00001 * date_diff))

conditions = [
    unranked_data["points"] > true_target_values["total"],
    unranked_data["points"] < true_target_values["total"],
    unranked_data["points"] == true_target_values["total"]
]

values = [
    "over",
    "under",
    "push"
]

unranked_data["total_result"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["home_score"] +  true_target_values["home_spread"] - unranked_data["away_score"]> 0,
    unranked_data["home_score"]  +  true_target_values["home_spread"] - unranked_data["away_score"]< 0,
    unranked_data["home_score"]  +  true_target_values["home_spread"] - unranked_data["away_score"]== 0
]

values = [
    "home_cover",
    "away_cover",
    "push"
]

unranked_data["spread_result"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["spread_home"] < true_target_values["home_spread"],
    unranked_data["spread_home"] > true_target_values["home_spread"],
    unranked_data["spread_home"] == true_target_values["home_spread"]
]

values = [
    "home",
    "away",
    "push"
]


unranked_data["predictive_spread_cover"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["total"] < true_target_values["total"],
    unranked_data["total"] > true_target_values["total"],
    unranked_data["total"] == true_target_values["total"]
]

values = [
    "under",
    "over",
    "push"
]


unranked_data["predictive_total"] = np.select(
    conditions, values
)





In [272]:
unranked_data

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,under_true_prob,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,predictive_spread_cover,predictive_total
0,2149,2014-01-01,Houston Rockets,Sacramento Kings,106,110,-10.0,215.5,-111,-107,...,0.495610,0.504390,-4,216,-10.0,18.5,over,away_cover,home,over
1,8549,2019-01-20,Indiana Pacers,Charlotte Hornets,120,95,-8.0,218.5,-109,-110,...,0.501090,0.498910,25,215,-8.0,19.5,over,home_cover,home,over
2,2228,2014-01-12,Portland Trail Blazers,Boston Celtics,112,104,-12.0,210.0,-108,-108,...,0.500000,0.500000,8,216,-12.0,15.0,over,home_cover,home,push
3,8628,2019-01-30,Brooklyn Nets,Chicago Bulls,122,117,-6.5,220.5,-111,-109,...,0.497835,0.502165,5,239,-6.5,20.0,over,home_cover,home,over
4,15,2011-03-06,Los Angeles Clippers,Denver Nuggets,100,94,-2.0,208.0,-106,-109,...,0.503363,0.496637,6,194,-2.0,7.0,under,home_cover,home,under
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12343,10044,2020-02-25,Houston Rockets,New York Knicks,123,112,-13.0,226.5,-110,-110,...,0.500000,0.500000,11,235,-13.0,32.5,over,home_cover,home,over
12344,1037,2012-12-31,Orlando Magic,Miami Heat,110,112,9.5,194.5,-102,-106,...,0.504714,0.495286,-2,222,9.5,22.0,over,home_cover,away,under
12345,7437,2018-02-25,Golden State Warriors,Oklahoma City Thunder,112,80,-10.0,233.0,-107,-108,...,0.501121,0.498879,32,192,-10.0,36.0,under,home_cover,home,over
12346,483,2012-03-21,Houston Rockets,Los Angeles Lakers,107,104,3.0,191.0,-108,-110,...,0.502195,0.497805,3,211,3.0,19.0,over,home_cover,push,under


In [273]:
unranked_data["pythag_error_rank"] = unranked_data["pythag_error"].rank(method="first")

In [274]:
unranked_data.sort_values("pythag_error_rank")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,predictive_spread_cover,predictive_total,pythag_error_rank
3973,6346,2017-03-15,New York Knicks,Indiana Pacers,87,81,3.0,210.0,-106,-108,...,0.497742,6,168,3.0,0.0,under,home_cover,push,push,1.0
4365,2504,2014-02-25,Philadelphia 76ers,Milwaukee Bucks,110,130,3.0,210.0,-108,-110,...,0.497805,-20,240,3.0,0.0,over,away_cover,push,push,2.0
6286,2439,2014-02-12,Phoenix Suns,Miami Heat,97,103,3.0,210.0,-110,-105,...,0.505605,-6,200,3.0,0.0,under,away_cover,push,push,3.0
6957,1016,2012-12-29,Dallas Mavericks,Denver Nuggets,85,106,3.0,210.0,-108,-106,...,0.502258,-21,191,3.0,0.0,under,away_cover,push,push,4.0
7306,5997,2017-01-21,Philadelphia 76ers,Portland Trail Blazers,93,92,3.0,210.0,-106,-108,...,0.497742,1,185,3.0,0.0,under,home_cover,push,push,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11904,9412,2019-12-02,Los Angeles Clippers,Washington Wizards,150,125,-12.5,240.0,-107,-110,...,0.496684,25,275,-12.5,45.5,over,home_cover,home,over,12344.0
4875,12279,2022-02-16,Milwaukee Bucks,Indiana Pacers,128,119,-14.5,238.5,-111,-108,...,0.503270,9,247,-14.5,46.0,over,home_cover,home,over,12345.0
10601,11292,2021-05-11,Portland Trail Blazers,Houston Rockets,140,129,-15.0,238.5,-107,-110,...,0.496684,11,269,-15.0,46.5,over,home_cover,home,over,12346.0
2173,9380,2019-11-28,Milwaukee Bucks,Atlanta Hawks,111,102,-14.5,239.5,-110,-111,...,0.498925,9,213,-14.5,47.0,over,home_cover,home,over,12347.0


In [275]:
ranked_data = unranked_data[["game_id", "spread_home", "pythag_error", "points", "home_margin", "home_score", "away_score", "total_result", "spread_result", "pythag_error_rank", "predictive_spread_cover", "predictive_total"]].sort_values("pythag_error_rank").copy(deep=True)

In [276]:
ranked_data.to_excel("ranked_data_from_python.xlsx",index=False)

In [277]:
ranked_data

Unnamed: 0,game_id,spread_home,pythag_error,points,home_margin,home_score,away_score,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
3973,6346,3.0,0.0,168,6,87,81,under,home_cover,1.0,push,push
4365,2504,3.0,0.0,240,-20,110,130,over,away_cover,2.0,push,push
6286,2439,3.0,0.0,200,-6,97,103,under,away_cover,3.0,push,push
6957,1016,3.0,0.0,191,-21,85,106,under,away_cover,4.0,push,push
7306,5997,3.0,0.0,185,1,93,92,under,home_cover,5.0,push,push
...,...,...,...,...,...,...,...,...,...,...,...,...
11904,9412,-12.5,45.5,275,25,150,125,over,home_cover,12344.0,home,over
4875,12279,-14.5,46.0,247,9,128,119,over,home_cover,12345.0,home,over
10601,11292,-15.0,46.5,269,11,140,129,over,home_cover,12346.0,home,over
2173,9380,-14.5,47.0,213,9,111,102,over,home_cover,12347.0,home,over


In [278]:
chosen_data = ranked_data[ranked_data["pythag_error_rank"] <= sample_num]

In [279]:
chosen_data["points"].median()

208.0

In [280]:
chosen_data["total_result"].value_counts()

under    276
over     218
push       6
Name: total_result, dtype: int64

In [281]:
chosen_data["spread_result"].value_counts()

away_cover    247
home_cover    230
push           23
Name: spread_result, dtype: int64

In [282]:
chosen_data.query("total_result == 'under'")

Unnamed: 0,game_id,spread_home,pythag_error,points,home_margin,home_score,away_score,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
3973,6346,3.0,0.0,168,6,87,81,under,home_cover,1.0,push,push
6286,2439,3.0,0.0,200,-6,97,103,under,away_cover,3.0,push,push
6957,1016,3.0,0.0,191,-21,85,106,under,away_cover,4.0,push,push
7306,5997,3.0,0.0,185,1,93,92,under,home_cover,5.0,push,push
1559,9600,3.5,0.5,195,1,98,97,under,home_cover,9.0,away,push
...,...,...,...,...,...,...,...,...,...,...,...,...
4178,7655,1.5,4.5,209,3,106,103,under,home_cover,495.0,home,under
4343,5744,5.5,4.5,188,-30,79,109,under,away_cover,496.0,away,over
4483,10054,1.5,4.5,208,6,107,101,under,home_cover,497.0,home,under
4603,2627,2.5,4.5,198,12,105,93,under,home_cover,499.0,home,under


In [283]:
chosen_data["pythag_error_rank"].sum()

125250.0

In [284]:
chosen = LpVariable.dicts(
            "chosen",
            (
                np.array(unranked_data["game_id"])
            ),
            lowBound=0,
            cat="Binary",
        )

In [285]:
np.array(unranked_data["game_id"])

array([2149, 8549, 2228, ..., 7437,  483, 6883], dtype=int64)

In [286]:
chosen

{2149: chosen_2149,
 8549: chosen_8549,
 2228: chosen_2228,
 8628: chosen_8628,
 15: chosen_15,
 6415: chosen_6415,
 3334: chosen_3334,
 9734: chosen_9734,
 2702: chosen_2702,
 9102: chosen_9102,
 3887: chosen_3887,
 10287: chosen_10287,
 3966: chosen_3966,
 10366: chosen_10366,
 5151: chosen_5151,
 11551: chosen_11551,
 1280: chosen_1280,
 7680: chosen_7680,
 1359: chosen_1359,
 7759: chosen_7759,
 5388: chosen_5388,
 11788: chosen_11788,
 6336: chosen_6336,
 3492: chosen_3492,
 9892: chosen_9892,
 3097: chosen_3097,
 9497: chosen_9497,
 3571: chosen_3571,
 9971: chosen_9971,
 1754: chosen_1754,
 8154: chosen_8154,
 3018: chosen_3018,
 9418: chosen_9418,
 1912: chosen_1912,
 8312: chosen_8312,
 4519: chosen_4519,
 10919: chosen_10919,
 2860: chosen_2860,
 9260: chosen_9260,
 4835: chosen_4835,
 11235: chosen_11235,
 1833: chosen_1833,
 8233: chosen_8233,
 4440: chosen_4440,
 10840: chosen_10840,
 6257: chosen_6257,
 5546: chosen_5546,
 11946: chosen_11946,
 1596: chosen_1596,
 7996: c

In [287]:
pythag_errors = unranked_data[["game_id","pythag_error"]].set_index("game_id")["pythag_error"].to_dict()

In [288]:
prob = LpProblem("Pythag error minimize", LpMinimize)



In [289]:
prob += (
    lpSum(
        [
            chosen[game_id] * pythag_errors[game_id]
            for game_id in unranked_data["game_id"].to_list()
        ]
    ),
    "Minimize pythag error",
)

In [290]:
prob

Pythag_error_minimize:
MINIMIZE
12.0*chosen_1 + 20.0*chosen_10 + 13.5*chosen_100 + 17.0*chosen_1000 + 31.5*chosen_10000 + 24.0*chosen_10001 + 23.5*chosen_10002 + 10.5*chosen_10003 + 27.5*chosen_10004 + 14.5*chosen_10005 + 22.0*chosen_10006 + 30.5*chosen_10007 + 23.0*chosen_10008 + 8.0*chosen_10009 + 17.0*chosen_1001 + 27.0*chosen_10010 + 30.0*chosen_10011 + 14.0*chosen_10012 + 23.0*chosen_10013 + 34.0*chosen_10014 + 37.5*chosen_10015 + 21.5*chosen_10016 + 6.5*chosen_10017 + 10.5*chosen_10018 + 12.0*chosen_10019 + 8.5*chosen_1002 + 31.5*chosen_10020 + 29.5*chosen_10021 + 23.5*chosen_10022 + 35.5*chosen_10023 + 31.0*chosen_10024 + 1.5*chosen_10025 + 12.0*chosen_10026 + 29.0*chosen_10027 + 23.0*chosen_10028 + 28.0*chosen_10029 + 12.5*chosen_1003 + 29.0*chosen_10030 + 17.5*chosen_10031 + 28.0*chosen_10032 + 20.5*chosen_10033 + 20.5*chosen_10034 + 35.0*chosen_10035 + 35.0*chosen_10036 + 20.5*chosen_10037 + 20.5*chosen_10038 + 19.0*chosen_10039 + 20.0*chosen_1004 + 19.0*chosen_10040 + 9.5*ch

In [291]:
# prob += (
#     lpSum([chosen[game_id] for game_id in unranked_data["game_id"].to_list()]) == sample_num
# )

In [292]:
e = LpAffineExpression([(chosen[game_id],1) for game_id in unranked_data["game_id"].to_list()])
c = LpConstraint(e=e, sense=0, rhs=500)
prob.extend(c.makeElasticSubProblem(penalty=2, proportionFreeBound=))

In [293]:
predictive_spread_cover_push_games = unranked_data.query("predictive_spread_cover == 'push'")["game_id"].to_list()
predictive_home_covers = unranked_data.query("predictive_spread_cover == 'home'")["game_id"].to_list()
predictive_away_covers = unranked_data.query("predictive_spread_cover == 'away'")["game_id"].to_list()
print(len(predictive_spread_cover_push_games), len(predictive_home_covers), len(predictive_away_covers))

280 9211 2857


In [294]:
predictive_total_result_push_games = unranked_data.query("predictive_total == 'push'")["game_id"].to_list()
predictive_total_over_games = unranked_data.query("predictive_total == 'over'")["game_id"].to_list()
predictive_total_under_games = unranked_data.query("predictive_total == 'under'")["game_id"].to_list()
print(len(predictive_total_result_push_games), len(predictive_total_over_games), len(predictive_total_under_games))

154 6440 5754


In [295]:
spread_result_push_games =  unranked_data.query("spread_result == 'push'")["game_id"].to_list()
spread_result_home_games =  unranked_data.query("spread_result == 'home_cover'")["game_id"].to_list()
spread_result_away_games =  unranked_data.query("spread_result == 'away_cover'")["game_id"].to_list()
print(len(spread_result_push_games), len(spread_result_home_games), len(spread_result_away_games))

322 7702 4324


In [296]:
total_result_push_games =  unranked_data.query("total_result == 'push'")["game_id"].to_list()
total_result_over_games =  unranked_data.query("total_result == 'over'")["game_id"].to_list()
total_result_under_games =  unranked_data.query("total_result == 'under'")["game_id"].to_list()
print(len(total_result_push_games), len(total_result_over_games), len(total_result_under_games))

184 6257 5907


In [297]:
equal_median_games = unranked_data.query(f"points == {true_target_values['total']}")["game_id"].to_list()

In [298]:
over_median_games = unranked_data.query(f"points > {true_target_values['total']}")["game_id"].to_list()
under_median_games = unranked_data.query(f"points < {true_target_values['total']}")["game_id"].to_list()

In [299]:
## PREDICTIVE SPREAD COVER FOR HOME

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_home_covers]) - 0.5
    )

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_home_covers]) + 0.5
    )


## PREDICTIVE SPREAD COVER FOR AWAY
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_away_covers]) - 0.5
    )
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_away_covers]) + 0.5
    )


## PREDICTIVE TOTAL_RESULT for OVER

prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_total_over_games]) - 0.5
    )
prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_total_over_games]) + 0.5
    )


## PREDICTIVE TOTAL_RESULT for UNDER
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_total_under_games]) - 0.5
    )
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_total_under_games]) + 0.5
    )

In [300]:
## TOTAL_RESULT for OVER

prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in total_result_over_games]) - 0.5
    )
prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in total_result_over_games]) + 0.5
    )

## TOTAL_RESULT for UNDER
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in total_result_under_games]) - 0.5
    )
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in total_result_under_games]) + 0.5
    )



## SPREAD COVER FOR HOME

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in spread_result_home_games]) - 0.5
    )

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in spread_result_home_games]) + 0.5
    )


## SPREAD COVER FOR AWAY
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in spread_result_away_games]) - 0.5
    )
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in spread_result_away_games]) + 0.5
    )

In [301]:

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     >= lpSum([chosen[game_id] for game_id in over_median_games]) - 4
#     )

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     <= lpSum([chosen[game_id] for game_id in over_median_games]) + 4
#     )

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     >= lpSum([chosen[game_id] for game_id in under_median_games]) - 4
#     )

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     <= lpSum([chosen[game_id] for game_id in under_median_games]) + 4
#     )

In [302]:
prob

Pythag_error_minimize:
MINIMIZE
-2*None_elastic_SubProblem_neg_penalty_var + 2*None_elastic_SubProblem_pos_penalty_var + 12.0*chosen_1 + 20.0*chosen_10 + 13.5*chosen_100 + 17.0*chosen_1000 + 31.5*chosen_10000 + 24.0*chosen_10001 + 23.5*chosen_10002 + 10.5*chosen_10003 + 27.5*chosen_10004 + 14.5*chosen_10005 + 22.0*chosen_10006 + 30.5*chosen_10007 + 23.0*chosen_10008 + 8.0*chosen_10009 + 17.0*chosen_1001 + 27.0*chosen_10010 + 30.0*chosen_10011 + 14.0*chosen_10012 + 23.0*chosen_10013 + 34.0*chosen_10014 + 37.5*chosen_10015 + 21.5*chosen_10016 + 6.5*chosen_10017 + 10.5*chosen_10018 + 12.0*chosen_10019 + 8.5*chosen_1002 + 31.5*chosen_10020 + 29.5*chosen_10021 + 23.5*chosen_10022 + 35.5*chosen_10023 + 31.0*chosen_10024 + 1.5*chosen_10025 + 12.0*chosen_10026 + 29.0*chosen_10027 + 23.0*chosen_10028 + 28.0*chosen_10029 + 12.5*chosen_1003 + 29.0*chosen_10030 + 17.5*chosen_10031 + 28.0*chosen_10032 + 20.5*chosen_10033 + 20.5*chosen_10034 + 35.0*chosen_10035 + 35.0*chosen_10036 + 20.5*chosen_1003

In [303]:
prob.solve(pulp.PULP_CBC_CMD(timeLimit=60, msg=True))

1

In [304]:
chosen_games = []
for value in chosen:
    if chosen[value].varValue == 1:
        print(value,chosen[value].varValue)
        chosen_games.append(value)

1359 1.0
5862 1.0
11491 1.0
7838 1.0
8865 1.0
11875 1.0
5960 1.0
6810 1.0
4282 1.0
6178 1.0
6423 1.0
2800 1.0
4696 1.0
4143 1.0
5238 1.0
5870 1.0
5791 1.0
6265 1.0
8478 1.0
5802 1.0
7450 1.0
9433 1.0
11639 1.0
11876 1.0
10776 1.0
8558 1.0
11797 1.0
6904 1.0
5788 1.0
9559 1.0
11771 1.0
4897 1.0
8453 1.0
6082 1.0
3181 1.0
2211 1.0
6894 1.0
4190 1.0
11459 1.0
3005 1.0
5472 1.0
1540 1.0
11400 1.0
4953 1.0
607 1.0
9135 1.0
5817 1.0
3846 1.0
7323 1.0
2503 1.0
2725 1.0
10953 1.0
10716 1.0
8319 1.0
8019 1.0
9600 1.0
9456 1.0
2103 1.0
7226 1.0
7481 1.0
5085 1.0
4927 1.0
12196 1.0
5006 1.0
2308 1.0
9498 1.0
7839 1.0
6416 1.0
7779 1.0
9518 1.0
5330 1.0
9340 1.0
2715 1.0
5588 1.0
5488 1.0
2801 1.0
6220 1.0
5705 1.0
5547 1.0
6299 1.0
1953 1.0
5035 1.0
11119 1.0
9696 1.0
2495 1.0
5746 1.0
7315 1.0
9606 1.0
7078 1.0
3127 1.0
6683 1.0
11503 1.0
4945 1.0
9290 1.0
4866 1.0
9640 1.0
6796 1.0
7270 1.0
2213 1.0
10983 1.0
4899 1.0
11773 1.0
5840 1.0
7578 1.0
6258 1.0
11342 1.0
6443 1.0
5040 1.0
5365 1.0
489

In [305]:
len(chosen_games)

500

In [306]:
games_500 = unranked_data[unranked_data["game_id"].isin(chosen_games)].sort_values("pythag_error_rank")

In [307]:
true_target_values

{'total': 210,
 'home_spread': 3,
 'home_win': 0.5092984,
 'away_win': 0.4907016,
 'over_prob': 0.5116279,
 'under_prob': 0.4883721}

In [308]:
home_spread_cover_count = games_500.query("spread_result == 'home_cover'").shape[0]
away_spread_cover_count = games_500.query("spread_result == 'away_cover'").shape[0]
push_spread_cover_count = games_500.query("spread_result == 'push'").shape[0]

print(home_spread_cover_count, away_spread_cover_count, push_spread_cover_count,home_spread_cover_count + away_spread_cover_count + push_spread_cover_count)
print(home_spread_cover_count/(home_spread_cover_count + away_spread_cover_count), away_spread_cover_count/(home_spread_cover_count + away_spread_cover_count))

242 234 24 500
0.5084033613445378 0.49159663865546216


In [309]:
under_total_result_count = games_500.query("total_result == 'under'").shape[0]
over_total_result_count = games_500.query("total_result == 'over'").shape[0]
push_total_result_count = games_500.query("total_result == 'push'").shape[0]

print(over_total_result_count, under_total_result_count, push_total_result_count, under_total_result_count + over_total_result_count + push_total_result_count)
print(over_total_result_count/(under_total_result_count+ over_total_result_count), under_total_result_count/(over_total_result_count+ under_total_result_count))

252 241 7 500
0.5111561866125761 0.48884381338742394


In [310]:
home_predictive_spread_cover_count = games_500.query("predictive_spread_cover == 'home'").shape[0]
away_predictive_spread_cover_count = games_500.query("predictive_spread_cover == 'away'").shape[0]
push_predictive_spread_cover_count = games_500.query("predictive_spread_cover == 'push'").shape[0]

print(home_predictive_spread_cover_count, away_predictive_spread_cover_count, push_predictive_spread_cover_count,home_predictive_spread_cover_count + away_predictive_spread_cover_count + push_predictive_spread_cover_count)
print(home_predictive_spread_cover_count/(home_predictive_spread_cover_count + away_predictive_spread_cover_count), away_predictive_spread_cover_count/(home_predictive_spread_cover_count + away_predictive_spread_cover_count))

215 208 77 500
0.508274231678487 0.491725768321513


In [311]:
over_predictive_total_count = games_500.query("predictive_total == 'over'").shape[0]
under_predictive_total_count = games_500.query("predictive_total == 'under'").shape[0]
push_predictive_total_count = games_500.query("predictive_total == 'push'").shape[0]

print(over_predictive_total_count, under_predictive_total_count, push_predictive_total_count,over_predictive_total_count + under_predictive_total_count + push_predictive_total_count)
print(over_predictive_total_count/(over_predictive_total_count + under_predictive_total_count), under_predictive_total_count/(over_predictive_total_count + under_predictive_total_count))

236 226 38 500
0.5108225108225108 0.48917748917748916


In [312]:
over_actual_total_count = games_500.query(f"points > {true_target_values['total']}").shape[0]
away_actual_total_count = games_500.query(f"points < {true_target_values['total']}").shape[0]
push_actual_total_count = games_500.query(f"points == {true_target_values['total']}").shape[0]

print(over_actual_total_count, away_actual_total_count, push_actual_total_count,over_actual_total_count + away_actual_total_count + push_actual_total_count)
print(over_actual_total_count/(over_actual_total_count + away_actual_total_count), away_actual_total_count/(over_actual_total_count + away_actual_total_count))

252 241 7 500
0.5111561866125761 0.48884381338742394


In [313]:
games_500

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,predictive_spread_cover,predictive_total,pythag_error_rank
3973,6346,2017-03-15,New York Knicks,Indiana Pacers,87,81,3.0,210.0,-106,-108,...,0.497742,6,168,3.0,0.0,under,home_cover,push,push,1.0
4365,2504,2014-02-25,Philadelphia 76ers,Milwaukee Bucks,110,130,3.0,210.0,-108,-110,...,0.497805,-20,240,3.0,0.0,over,away_cover,push,push,2.0
6286,2439,2014-02-12,Phoenix Suns,Miami Heat,97,103,3.0,210.0,-110,-105,...,0.505605,-6,200,3.0,0.0,under,away_cover,push,push,3.0
6957,1016,2012-12-29,Dallas Mavericks,Denver Nuggets,85,106,3.0,210.0,-108,-106,...,0.502258,-21,191,3.0,0.0,under,away_cover,push,push,4.0
7306,5997,2017-01-21,Philadelphia 76ers,Portland Trail Blazers,93,92,3.0,210.0,-106,-108,...,0.497742,1,185,3.0,0.0,under,home_cover,push,push,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7358,8940,2019-03-19,Cleveland Cavaliers,Detroit Pistons,126,119,6.5,211.5,-108,-111,...,0.496730,7,245,6.5,5.0,over,home_cover,away,over,636.0
7479,7975,2018-11-06,Denver Nuggets,Boston Celtics,115,107,-1.5,210.5,-109,-111,...,0.497835,8,222,-1.5,5.0,over,home_cover,home,over,638.0
8202,5564,2016-11-26,Denver Nuggets,Oklahoma City Thunder,129,132,2.5,214.5,-107,-105,...,0.502290,-3,261,2.5,5.0,over,push,home,over,642.0
9319,11720,2021-12-05,Cleveland Cavaliers,Utah Jazz,108,109,5.0,213.0,-106,-110,...,0.495548,-1,217,5.0,5.0,over,home_cover,away,over,653.0


In [314]:
games_500.to_excel("sampled_data_with_4_medians.xlsx",index=False)

In [315]:
for c in prob.constraints.values():
    print(type(c))

<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
<class 'pulp.pulp.LpConstraint'>
