In [160]:
from pulp import *
import numpy as np
import pandas as pd
import random
import datetime

In [161]:
sample_num = 120
target = pd.DataFrame.from_dict(
    {
        "home_spread": [12],
        "home_spread_ml": [-110],
        "away_spread_ml": [-110],
        "total": [240],
        "over": [-110],
        "under": [-110]
    }
)

In [162]:
target

Unnamed: 0,home_spread,home_spread_ml,away_spread_ml,total,over,under
0,12,-110,-110,240,-110,-110


In [163]:
target["implied_home"] = np.where(
    target['home_spread_ml'] < 0,
    1.0 - (100.00/(abs(target["home_spread_ml"])+100.0)),
    100.0/(target["home_spread_ml"] + 100.0)
)
target["implied_away"] = np.where(
    target['away_spread_ml'] < 0,
    1.0 - (100.00/(abs(target["away_spread_ml"])+100.0)),
    100.0/(target["home_spread_ml"] + 100.0)
)
target["implied_over"] = np.where(
    target['over'] < 0,
    1.0-(100.00/(abs(target["over"])+100.0)),
    100.0/(target["over"]+100.0)
)
target["implied_under"] = np.where(
    target['under'] < 0,
    1.0-(100.00/(abs(target["under"])+100.0)),
    100.0/(target["under"]+100.0)
)

target["home_win"] = round((target["implied_home"] * 1.0) / (target["implied_home"] + target["implied_away"]),7)
target["away_win"] = round((target["implied_away"] * 1.0) / (target["implied_home"] + target["implied_away"]),7)
target["over_prob"] = round((target["implied_over"] * 1.0) / (target["implied_over"] + target["implied_under"]),7)
target["under_prob"] = round((target["implied_under"] * 1.0) / (target["implied_over"] + target["implied_under"]),7)

In [164]:
true_target = target[["total", "home_spread", "home_win", "away_win", "over_prob", "under_prob"]]

In [165]:
true_target_values = {key: val[0] for key, val in true_target.to_dict().items()}

In [166]:
true_target_values

{'total': 240,
 'home_spread': 12,
 'home_win': 0.5,
 'away_win': 0.5,
 'over_prob': 0.5,
 'under_prob': 0.5}

In [167]:
clean_de_vigged = pd.read_excel("Sample_NBA_Data.xlsx")

In [168]:
clean_de_vigged.sort_values("game_id")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,away_true_prob,home_true_prob,under_true_prob,over_true_prob,home_margin,points,spread_home
3426,1,2011-03-04,Brooklyn Nets,Toronto Raptors,116,103,-2.5,203.5,-111,-111,0.5,0.5,0.500000,0.500000,13,219,-2.5
948,2,2011-03-05,Atlanta Hawks,Oklahoma City Thunder,104,111,-2.5,192.0,-111,-111,0.5,0.5,0.500000,0.500000,-7,215,-2.5
12276,3,2011-03-05,Boston Celtics,Golden State Warriors,107,103,-9.5,205.5,-109,-107,0.5,0.5,0.497774,0.502226,4,210,-9.5
11454,4,2011-03-05,Brooklyn Nets,Toronto Raptors,137,136,-2.0,206.0,-111,-111,0.5,0.5,0.500000,0.500000,1,273,-2.0
12104,5,2011-03-05,Dallas Mavericks,Indiana Pacers,116,108,-9.0,208.0,-108,-109,0.5,0.5,0.501105,0.498895,8,224,-9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3656,12344,2022-03-02,Houston Rockets,Los Angeles Clippers,100,113,7.0,228.0,-109,-106,0.5,0.5,0.496637,0.503363,-13,213,7.0
4679,12345,2022-03-02,Los Angeles Lakers,Dallas Mavericks,104,109,5.0,219.0,-110,-107,0.5,0.5,0.496684,0.503316,-5,213,5.0
751,12346,2022-03-02,Minnesota Timberwolves,Golden State Warriors,129,114,3.5,231.0,-110,-102,0.5,0.5,0.490834,0.509166,15,243,3.5
5450,12347,2022-03-02,Toronto Raptors,Brooklyn Nets,109,108,-8.0,219.0,-109,-106,0.5,0.5,0.496637,0.503363,1,217,-8.0


In [169]:
unranked_data = clean_de_vigged.copy(deep=True)

In [170]:
date_diff = (pd.to_datetime(unranked_data["game_date"], format='%Y-%m-%d') - datetime.datetime.utcnow()) /np.timedelta64(1, 'D')

unranked_data["pythag_error"] = (
    abs(unranked_data["spread_home"] - true_target_values["home_spread"]) + abs(unranked_data["total"] - true_target_values["total"])
)
#     + (0.00001 * date_diff))

conditions = [
    unranked_data["points"] > true_target_values["total"],
    unranked_data["points"] < true_target_values["total"],
    unranked_data["points"] == true_target_values["total"]
]

values = [
    "over",
    "under",
    "push"
]

unranked_data["total_result"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["home_score"] +  true_target_values["home_spread"] - unranked_data["away_score"]> 0,
    unranked_data["home_score"]  +  true_target_values["home_spread"] - unranked_data["away_score"]< 0,
    unranked_data["home_score"]  +  true_target_values["home_spread"] - unranked_data["away_score"]== 0
]

values = [
    "home_cover",
    "away_cover",
    "push"
]

unranked_data["spread_result"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["spread_home"] < true_target_values["home_spread"],
    unranked_data["spread_home"] > true_target_values["home_spread"],
    unranked_data["spread_home"] == true_target_values["home_spread"]
]

values = [
    "home",
    "away",
    "push"
]


unranked_data["predictive_spread_cover"] = np.select(
    conditions, values
)

conditions = [
    unranked_data["total"] < true_target_values["total"],
    unranked_data["total"] > true_target_values["total"],
    unranked_data["total"] == true_target_values["total"]
]

values = [
    "under",
    "over",
    "push"
]


unranked_data["predictive_total"] = np.select(
    conditions, values
)





In [171]:
unranked_data

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,under_true_prob,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,predictive_spread_cover,predictive_total
0,2149,2014-01-01,Houston Rockets,Sacramento Kings,106,110,-10.0,215.5,-111,-107,...,0.495610,0.504390,-4,216,-10.0,46.5,under,home_cover,home,under
1,8549,2019-01-20,Indiana Pacers,Charlotte Hornets,120,95,-8.0,218.5,-109,-110,...,0.501090,0.498910,25,215,-8.0,41.5,under,home_cover,home,under
2,2228,2014-01-12,Portland Trail Blazers,Boston Celtics,112,104,-12.0,210.0,-108,-108,...,0.500000,0.500000,8,216,-12.0,54.0,under,home_cover,home,under
3,8628,2019-01-30,Brooklyn Nets,Chicago Bulls,122,117,-6.5,220.5,-111,-109,...,0.497835,0.502165,5,239,-6.5,38.0,under,home_cover,home,under
4,15,2011-03-06,Los Angeles Clippers,Denver Nuggets,100,94,-2.0,208.0,-106,-109,...,0.503363,0.496637,6,194,-2.0,46.0,under,home_cover,home,under
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12343,10044,2020-02-25,Houston Rockets,New York Knicks,123,112,-13.0,226.5,-110,-110,...,0.500000,0.500000,11,235,-13.0,38.5,under,home_cover,home,under
12344,1037,2012-12-31,Orlando Magic,Miami Heat,110,112,9.5,194.5,-102,-106,...,0.504714,0.495286,-2,222,9.5,48.0,under,home_cover,home,under
12345,7437,2018-02-25,Golden State Warriors,Oklahoma City Thunder,112,80,-10.0,233.0,-107,-108,...,0.501121,0.498879,32,192,-10.0,29.0,under,home_cover,home,under
12346,483,2012-03-21,Houston Rockets,Los Angeles Lakers,107,104,3.0,191.0,-108,-110,...,0.502195,0.497805,3,211,3.0,58.0,under,home_cover,home,under


In [172]:
unranked_data["pythag_error_rank"] = unranked_data["pythag_error"].rank(method="first")

In [173]:
unranked_data.sort_values("pythag_error_rank")

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,predictive_spread_cover,predictive_total,pythag_error_rank
9116,10831,2021-03-19,Washington Wizards,Utah Jazz,131,122,10.5,241.0,-111,-108,...,0.503270,9,253,10.5,2.5,over,home_cover,home,over,1.0
6106,6846,2017-11-30,Los Angeles Lakers,Golden State Warriors,123,127,13.0,242.0,-114,-114,...,0.500000,-4,250,13.0,3.0,over,home_cover,away,over,2.0
2487,10035,2020-02-24,Golden State Warriors,New Orleans Pelicans,101,115,10.0,238.0,-110,-108,...,0.502195,-14,216,10.0,4.0,under,away_cover,home,under,3.0
3284,10036,2020-02-24,Golden State Warriors,New Orleans Pelicans,101,115,10.0,238.0,-110,-108,...,0.502195,-14,216,10.0,4.0,under,away_cover,home,under,4.0
8153,10047,2020-02-25,Washington Wizards,Milwaukee Bucks,134,137,12.5,243.5,-109,-108,...,0.501105,-3,271,12.5,4.0,over,home_cover,away,over,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,5239,2016-03-29,Utah Jazz,Los Angeles Lakers,123,75,-14.0,190.0,-109,-108,...,0.501105,48,198,-14.0,76.0,under,home_cover,home,under,12344.0
4448,3529,2015-02-01,Atlanta Hawks,Philadelphia 76ers,91,85,-16.5,192.0,-107,-111,...,0.495610,6,176,-16.5,76.5,under,home_cover,home,under,12345.0
11468,3876,2015-03-29,Chicago Bulls,New York Knicks,111,80,-17.5,190.0,-108,-108,...,0.500000,31,191,-17.5,79.5,under,home_cover,home,under,12346.0
2542,3477,2015-01-25,Memphis Grizzlies,Philadelphia 76ers,101,83,-18.0,190.0,-108,-108,...,0.500000,18,184,-18.0,80.0,under,home_cover,home,under,12347.0


In [174]:
ranked_data = unranked_data[["game_id", "spread_home", "pythag_error", "points", "home_margin", "home_score", "away_score", "total_result", "spread_result", "pythag_error_rank", "predictive_spread_cover", "predictive_total"]].sort_values("pythag_error_rank").copy(deep=True)

In [175]:
ranked_data.to_excel("ranked_data_from_python.xlsx",index=False)

In [176]:
ranked_data

Unnamed: 0,game_id,spread_home,pythag_error,points,home_margin,home_score,away_score,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
9116,10831,10.5,2.5,253,9,131,122,over,home_cover,1.0,home,over
6106,6846,13.0,3.0,250,-4,123,127,over,home_cover,2.0,away,over
2487,10035,10.0,4.0,216,-14,101,115,under,away_cover,3.0,home,under
3284,10036,10.0,4.0,216,-14,101,115,under,away_cover,4.0,home,under
8153,10047,12.5,4.0,271,-3,134,137,over,home_cover,5.0,away,over
...,...,...,...,...,...,...,...,...,...,...,...,...
537,5239,-14.0,76.0,198,48,123,75,under,home_cover,12344.0,home,under
4448,3529,-16.5,76.5,176,6,91,85,under,home_cover,12345.0,home,under
11468,3876,-17.5,79.5,191,31,111,80,under,home_cover,12346.0,home,under
2542,3477,-18.0,80.0,184,18,101,83,under,home_cover,12347.0,home,under


In [177]:
chosen_data = ranked_data[ranked_data["pythag_error_rank"] <= sample_num]

In [178]:
chosen_data["points"].median()

238.5

In [179]:
chosen_data["total_result"].value_counts()

under    63
over     55
push      2
Name: total_result, dtype: int64

In [180]:
chosen_data["spread_result"].value_counts()

home_cover    78
away_cover    40
push           2
Name: spread_result, dtype: int64

In [181]:
chosen_data.query("total_result == 'under'")

Unnamed: 0,game_id,spread_home,pythag_error,points,home_margin,home_score,away_score,total_result,spread_result,pythag_error_rank,predictive_spread_cover,predictive_total
2487,10035,10.0,4.0,216,-14,101,115,under,away_cover,3.0,home,under
3284,10036,10.0,4.0,216,-14,101,115,under,away_cover,4.0,home,under
11182,9795,10.5,5.5,214,-20,97,117,under,away_cover,11.0,home,under
11526,8188,11.0,5.5,239,-17,111,128,under,away_cover,12.0,home,under
8189,10758,8.5,6.0,236,2,119,117,under,home_cover,14.0,home,under
...,...,...,...,...,...,...,...,...,...,...,...,...
174,10058,7.5,11.5,194,-10,92,102,under,home_cover,115.0,home,under
202,5407,11.5,11.5,214,20,117,97,under,home_cover,116.0,home,under
1027,7210,8.5,11.5,231,-7,112,119,under,home_cover,118.0,home,under
1163,6676,9.0,11.5,235,-19,108,127,under,away_cover,119.0,home,under


In [182]:
chosen_data["pythag_error_rank"].sum()

7260.0

In [183]:
chosen = LpVariable.dicts(
            "chosen",
            (
                np.array(unranked_data["game_id"])
            ),
            lowBound=0,
            cat="Binary",
        )

In [184]:
np.array(unranked_data["game_id"])

array([2149, 8549, 2228, ..., 7437,  483, 6883], dtype=int64)

In [185]:
chosen

{2149: chosen_2149,
 8549: chosen_8549,
 2228: chosen_2228,
 8628: chosen_8628,
 15: chosen_15,
 6415: chosen_6415,
 3334: chosen_3334,
 9734: chosen_9734,
 2702: chosen_2702,
 9102: chosen_9102,
 3887: chosen_3887,
 10287: chosen_10287,
 3966: chosen_3966,
 10366: chosen_10366,
 5151: chosen_5151,
 11551: chosen_11551,
 1280: chosen_1280,
 7680: chosen_7680,
 1359: chosen_1359,
 7759: chosen_7759,
 5388: chosen_5388,
 11788: chosen_11788,
 6336: chosen_6336,
 3492: chosen_3492,
 9892: chosen_9892,
 3097: chosen_3097,
 9497: chosen_9497,
 3571: chosen_3571,
 9971: chosen_9971,
 1754: chosen_1754,
 8154: chosen_8154,
 3018: chosen_3018,
 9418: chosen_9418,
 1912: chosen_1912,
 8312: chosen_8312,
 4519: chosen_4519,
 10919: chosen_10919,
 2860: chosen_2860,
 9260: chosen_9260,
 4835: chosen_4835,
 11235: chosen_11235,
 1833: chosen_1833,
 8233: chosen_8233,
 4440: chosen_4440,
 10840: chosen_10840,
 6257: chosen_6257,
 5546: chosen_5546,
 11946: chosen_11946,
 1596: chosen_1596,
 7996: c

In [186]:
pythag_errors = unranked_data[["game_id","pythag_error"]].set_index("game_id")["pythag_error"].to_dict()

In [187]:
prob = LpProblem("Pythag error minimize", LpMinimize)



In [188]:
prob += (
    lpSum(
        [
            chosen[game_id] * pythag_errors[game_id]
            for game_id in unranked_data["game_id"].to_list()
        ]
    ),
    "Minimize pythag error",
)

In [189]:
prob

Pythag_error_minimize:
MINIMIZE
51.0*chosen_1 + 59.0*chosen_10 + 48.5*chosen_100 + 56.0*chosen_1000 + 20.5*chosen_10000 + 33.0*chosen_10001 + 27.5*chosen_10002 + 49.5*chosen_10003 + 31.5*chosen_10004 + 40.5*chosen_10005 + 25.0*chosen_10006 + 21.5*chosen_10007 + 16.0*chosen_10008 + 45.0*chosen_10009 + 56.0*chosen_1001 + 14.0*chosen_10010 + 9.0*chosen_10011 + 45.0*chosen_10012 + 22.0*chosen_10013 + 29.0*chosen_10014 + 28.5*chosen_10015 + 17.5*chosen_10016 + 32.5*chosen_10017 + 37.5*chosen_10018 + 27.0*chosen_10019 + 45.5*chosen_1002 + 7.5*chosen_10020 + 31.5*chosen_10021 + 35.5*chosen_10022 + 21.5*chosen_10023 + 18.0*chosen_10024 + 37.5*chosen_10025 + 27.0*chosen_10026 + 45.0*chosen_10027 + 37.0*chosen_10028 + 43.0*chosen_10029 + 51.5*chosen_1003 + 34.0*chosen_10030 + 37.5*chosen_10031 + 21.0*chosen_10032 + 18.5*chosen_10033 + 18.5*chosen_10034 + 4.0*chosen_10035 + 4.0*chosen_10036 + 34.5*chosen_10037 + 34.5*chosen_10038 + 37.0*chosen_10039 + 59.0*chosen_1004 + 37.0*chosen_10040 + 42.5*c

In [190]:
prob += (
    lpSum([chosen[game_id] for game_id in unranked_data["game_id"].to_list()]) == sample_num
)

In [191]:
predictive_spread_cover_push_games = unranked_data.query("predictive_spread_cover == 'push'")["game_id"].to_list()
predictive_home_covers = unranked_data.query("predictive_spread_cover == 'home'")["game_id"].to_list()
predictive_away_covers = unranked_data.query("predictive_spread_cover == 'away'")["game_id"].to_list()
print(len(predictive_spread_cover_push_games), len(predictive_home_covers), len(predictive_away_covers))

29 12219 100


In [192]:
predictive_total_result_push_games = unranked_data.query("predictive_total == 'push'")["game_id"].to_list()
predictive_total_over_games = unranked_data.query("predictive_total == 'over'")["game_id"].to_list()
predictive_total_under_games = unranked_data.query("predictive_total == 'under'")["game_id"].to_list()
print(len(predictive_total_result_push_games), len(predictive_total_over_games), len(predictive_total_under_games))

12 63 12273


In [193]:
spread_result_push_games =  unranked_data.query("spread_result == 'push'")["game_id"].to_list()
spread_result_home_games =  unranked_data.query("spread_result == 'home_cover'")["game_id"].to_list()
spread_result_away_games =  unranked_data.query("spread_result == 'away_cover'")["game_id"].to_list()
print(len(spread_result_push_games), len(spread_result_home_games), len(spread_result_away_games))

215 10496 1637


In [194]:
total_result_push_games =  unranked_data.query("total_result == 'push'")["game_id"].to_list()
total_result_over_games =  unranked_data.query("total_result == 'over'")["game_id"].to_list()
total_result_under_games =  unranked_data.query("total_result == 'under'")["game_id"].to_list()
print(len(total_result_push_games), len(total_result_over_games), len(total_result_under_games))

108 1121 11119


In [195]:
equal_median_games = unranked_data.query(f"points == {true_target_values['total']}")["game_id"].to_list()

In [196]:
over_median_games = unranked_data.query(f"points > {true_target_values['total']}")["game_id"].to_list()
under_median_games = unranked_data.query(f"points < {true_target_values['total']}")["game_id"].to_list()

In [197]:
## PREDICTIVE SPREAD COVER FOR HOME

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_home_covers]) - 0.5
    )

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_home_covers]) + 0.5
    )


## PREDICTIVE SPREAD COVER FOR AWAY
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_away_covers]) - 0.5
    )
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_spread_cover_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_away_covers]) + 0.5
    )


## PREDICTIVE TOTAL_RESULT for OVER

prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_total_over_games]) - 0.5
    )
prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_total_over_games]) + 0.5
    )


## PREDICTIVE TOTAL_RESULT for UNDER
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in predictive_total_under_games]) - 0.5
    )
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in predictive_total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in predictive_total_under_games]) + 0.5
    )

In [198]:
## TOTAL_RESULT for OVER

prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in total_result_over_games]) - 0.5
    )
prob += (
    true_target_values["over_prob"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in total_result_over_games]) + 0.5
    )

## TOTAL_RESULT for UNDER
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in total_result_under_games]) - 0.5
    )
prob += (
    (1-true_target_values["over_prob"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in total_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in total_result_under_games]) + 0.5
    )



## SPREAD COVER FOR HOME

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in spread_result_home_games]) - 0.5
    )

prob += (
    true_target_values["home_win"]  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in spread_result_home_games]) + 0.5
    )


## SPREAD COVER FOR AWAY
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    >= lpSum([chosen[game_id] for game_id in spread_result_away_games]) - 0.5
    )
prob += (
    (1-true_target_values["home_win"])  * 
    (sample_num - lpSum([chosen[game_id] for game_id in spread_result_push_games]))
    
    <= lpSum([chosen[game_id] for game_id in spread_result_away_games]) + 0.5
    )

In [199]:

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     >= lpSum([chosen[game_id] for game_id in over_median_games]) - 4
#     )

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     <= lpSum([chosen[game_id] for game_id in over_median_games]) + 4
#     )

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     >= lpSum([chosen[game_id] for game_id in under_median_games]) - 4
#     )

# prob += (
#     (500 - lpSum([chosen[game_id] for game_id in equal_median_games]))/2.0
    
#     <= lpSum([chosen[game_id] for game_id in under_median_games]) + 4
#     )

In [200]:
prob

Pythag_error_minimize:
MINIMIZE
51.0*chosen_1 + 59.0*chosen_10 + 48.5*chosen_100 + 56.0*chosen_1000 + 20.5*chosen_10000 + 33.0*chosen_10001 + 27.5*chosen_10002 + 49.5*chosen_10003 + 31.5*chosen_10004 + 40.5*chosen_10005 + 25.0*chosen_10006 + 21.5*chosen_10007 + 16.0*chosen_10008 + 45.0*chosen_10009 + 56.0*chosen_1001 + 14.0*chosen_10010 + 9.0*chosen_10011 + 45.0*chosen_10012 + 22.0*chosen_10013 + 29.0*chosen_10014 + 28.5*chosen_10015 + 17.5*chosen_10016 + 32.5*chosen_10017 + 37.5*chosen_10018 + 27.0*chosen_10019 + 45.5*chosen_1002 + 7.5*chosen_10020 + 31.5*chosen_10021 + 35.5*chosen_10022 + 21.5*chosen_10023 + 18.0*chosen_10024 + 37.5*chosen_10025 + 27.0*chosen_10026 + 45.0*chosen_10027 + 37.0*chosen_10028 + 43.0*chosen_10029 + 51.5*chosen_1003 + 34.0*chosen_10030 + 37.5*chosen_10031 + 21.0*chosen_10032 + 18.5*chosen_10033 + 18.5*chosen_10034 + 4.0*chosen_10035 + 4.0*chosen_10036 + 34.5*chosen_10037 + 34.5*chosen_10038 + 37.0*chosen_10039 + 59.0*chosen_1004 + 37.0*chosen_10040 + 42.5*c

In [201]:
prob.solve(pulp.PULP_CBC_CMD(timeLimit=120, msg=True))

-1

In [202]:
chosen_games = []
for value in chosen:
    if chosen[value].varValue == 1:
        print(value,chosen[value].varValue)
        chosen_games.append(value)

9497 1.0
9260 1.0
11235 1.0
11243 1.0
2641 1.0
11329 1.0
9897 1.0
11891 1.0
10981 1.0
3870 1.0
6375 1.0
8572 1.0
7937 1.0
5567 1.0
10558 1.0
6586 1.0
11314 1.0
9826 1.0
8957 1.0
4640 1.0
3364 1.0
9008 1.0
9561 1.0
10789 1.0
6758 1.0
8832 1.0
10156 1.0
11249 1.0
6006 1.0
9827 1.0
8789 1.0
9974 1.0
4781 1.0
7998 1.0
10692 1.0
9867 1.0
3581 1.0
11245 1.0
10484 1.0
10010 1.0
7324 1.0
7923 1.0
11239 1.0
3134 1.0
11905 1.0
4557 1.0
5945 1.0
11247 1.0
10255 1.0
9667 1.0
8877 1.0
5120 1.0
10383 1.0
3934 1.0
9939 1.0
2670 1.0
4627 1.0
8814 1.0
11192 1.0
6846 1.0
10085 1.0
11193 1.0
8523 1.0
10485 1.0
9853 1.0
11178 1.0
11197 1.0
10243 1.0
11130 1.0
10500 1.0
10956 1.0
4416 1.0
10093 1.0
3935 1.0
4646 1.0
10809 1.0
10109 1.0
12280 1.0
10047 1.0
9891 1.0
9912 1.0
9991 1.0
7829 1.0
10831 1.0
9383 1.0
9778 1.0
8653 1.0
10654 1.0
10338 1.0
9074 1.0
11069 1.0
1408 1.0
5121 1.0
9966 1.0
168 1.0
4192 1.0
11224 1.0
10495 1.0
5992 1.0
11127 1.0
10653 1.0
3858 1.0
9412 1.0
5540 1.0
8537 1.0
7828 1.0
12250

In [203]:
len(chosen_games)

113

In [204]:
games_500 = unranked_data[unranked_data["game_id"].isin(chosen_games)].sort_values("pythag_error_rank")

In [205]:
true_target_values

{'total': 240,
 'home_spread': 12,
 'home_win': 0.5,
 'away_win': 0.5,
 'over_prob': 0.5,
 'under_prob': 0.5}

In [206]:
home_spread_cover_count = games_500.query("spread_result == 'home_cover'").shape[0]
away_spread_cover_count = games_500.query("spread_result == 'away_cover'").shape[0]
push_spread_cover_count = games_500.query("spread_result == 'push'").shape[0]

print(home_spread_cover_count, away_spread_cover_count, push_spread_cover_count,home_spread_cover_count + away_spread_cover_count + push_spread_cover_count)
print(home_spread_cover_count/(home_spread_cover_count + away_spread_cover_count), away_spread_cover_count/(home_spread_cover_count + away_spread_cover_count))

50 61 2 113
0.45045045045045046 0.5495495495495496


In [207]:
under_total_result_count = games_500.query("total_result == 'under'").shape[0]
over_total_result_count = games_500.query("total_result == 'over'").shape[0]
push_total_result_count = games_500.query("total_result == 'push'").shape[0]

print(over_total_result_count, under_total_result_count, push_total_result_count, under_total_result_count + over_total_result_count + push_total_result_count)
print(over_total_result_count/(under_total_result_count+ over_total_result_count), under_total_result_count/(over_total_result_count+ under_total_result_count))

56 57 0 113
0.49557522123893805 0.504424778761062


In [208]:
home_predictive_spread_cover_count = games_500.query("predictive_spread_cover == 'home'").shape[0]
away_predictive_spread_cover_count = games_500.query("predictive_spread_cover == 'away'").shape[0]
push_predictive_spread_cover_count = games_500.query("predictive_spread_cover == 'push'").shape[0]

print(home_predictive_spread_cover_count, away_predictive_spread_cover_count, push_predictive_spread_cover_count,home_predictive_spread_cover_count + away_predictive_spread_cover_count + push_predictive_spread_cover_count)
print(home_predictive_spread_cover_count/(home_predictive_spread_cover_count + away_predictive_spread_cover_count), away_predictive_spread_cover_count/(home_predictive_spread_cover_count + away_predictive_spread_cover_count))

53 56 4 113
0.48623853211009177 0.5137614678899083


In [209]:
over_predictive_total_count = games_500.query("predictive_total == 'over'").shape[0]
under_predictive_total_count = games_500.query("predictive_total == 'under'").shape[0]
push_predictive_total_count = games_500.query("predictive_total == 'push'").shape[0]

print(over_predictive_total_count, under_predictive_total_count, push_predictive_total_count,over_predictive_total_count + under_predictive_total_count + push_predictive_total_count)
print(over_predictive_total_count/(over_predictive_total_count + under_predictive_total_count), under_predictive_total_count/(over_predictive_total_count + under_predictive_total_count))

51 58 4 113
0.46788990825688076 0.5321100917431193


In [210]:
over_actual_total_count = games_500.query(f"points > {true_target_values['total']}").shape[0]
away_actual_total_count = games_500.query(f"points < {true_target_values['total']}").shape[0]
push_actual_total_count = games_500.query(f"points == {true_target_values['total']}").shape[0]

print(over_actual_total_count, away_actual_total_count, push_actual_total_count,over_actual_total_count + away_actual_total_count + push_actual_total_count)
print(over_actual_total_count/(over_actual_total_count + away_actual_total_count), away_actual_total_count/(over_actual_total_count + away_actual_total_count))

56 57 0 113
0.49557522123893805 0.504424778761062


In [211]:
games_500

Unnamed: 0,game_id,game_date,home_team,away_team,home_score,away_score,spread,total,over_odds,under_odds,...,over_true_prob,home_margin,points,spread_home,pythag_error,total_result,spread_result,predictive_spread_cover,predictive_total,pythag_error_rank
9116,10831,2021-03-19,Washington Wizards,Utah Jazz,131,122,10.5,241.0,-111,-108,...,0.503270,9,253,10.5,2.5,over,home_cover,home,over,1.0
6106,6846,2017-11-30,Los Angeles Lakers,Golden State Warriors,123,127,13.0,242.0,-114,-114,...,0.500000,-4,250,13.0,3.0,over,home_cover,away,over,2.0
8153,10047,2020-02-25,Washington Wizards,Milwaukee Bucks,134,137,12.5,243.5,-109,-108,...,0.501105,-3,271,12.5,4.0,over,home_cover,away,over,5.0
1921,11314,2021-05-14,Indiana Pacers,Milwaukee Bucks,133,142,9.0,241.5,-108,-110,...,0.497805,-9,275,9.0,4.5,over,home_cover,home,over,7.0
2663,10789,2021-03-14,Washington Wizards,Milwaukee Bucks,119,125,13.0,236.0,-110,-108,...,0.502195,-6,244,13.0,5.0,over,home_cover,away,under,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5957,4627,2016-01-21,Brooklyn Nets,Cleveland Cavaliers,78,91,13.0,197.0,-106,-106,...,0.500000,-13,169,13.0,44.0,under,away_cover,away,under,6169.0
7670,4646,2016-01-23,Los Angeles Lakers,San Antonio Spurs,95,108,16.0,199.0,-106,-108,...,0.497742,-13,203,16.0,45.0,under,away_cover,away,under,6528.0
2382,3364,2015-01-09,New York Knicks,Houston Rockets,96,120,12.5,194.5,-112,-108,...,0.504330,-24,216,12.5,46.0,under,away_cover,away,under,6797.0
11875,3858,2015-03-26,New York Knicks,Los Angeles Clippers,80,111,15.0,197.0,-106,-109,...,0.496637,-31,191,15.0,46.0,under,away_cover,away,under,6939.0


In [212]:
games_500.to_excel("sampled_data_with_4_medians.xlsx",index=False)