In [2]:
import pandas as pd
import numpy as np
import math
from numba import njit, prange
import dask
import dask.array as da
import cupy as cp
import itertools
import logging
import numba
import numba.cuda
from dask import delayed
import dask.config

#from dask_cuda import LocalCUDACluster
#from dask.distributed import Client

In [3]:
def get_size(d,f,q,r,t,w):
    # use this to estimate how many iterations in the for loop
    num_teams = len(q)* ( len(r)* (len(r)-1) ) * ( len(w) * (len(w)-1) * (len(w)-2) ) * len(t) * len(f) * len(d)
    return num_teams 


In [4]:
def get_players_at_value(z,players):
    # input a pandas array corresponding to a position, as well as desired z-score
    # return those players at  z standard deviations >= to the mean, in a numpy array. 
    # let z = 0 to get players above >= mean
    # special case, let z<0 to return players who have rate >0 (not necessarily above mean)
    # the first column is the index. the second column is  Fppg. The third is Salary
    if z<0:
        players=players[players['Value']>0]
    if z>=0:
        players=players[players['Value']>= (players['Value'].mean()+z*players['Value'].std())]
    players = players.sort_values(by='Value', ascending=False)
    players = players.loc[:,['Fppg','Salary']]
    players = players.reset_index()
    players = players.to_numpy()

    return players
    

In [16]:
@njit
def inner_loop_dask(players_d, players_f, players_q, players_r, players_t, players_w, w1, w2, w3):
    d_len = len(players_d)
    f_len = len(players_f)
    q_len = len(players_q)
    r_len = len(players_r)
    t_len = len(players_t)
    best_EV = 0

    def test_no_repeats(arr):
        for x in range(len(arr)):
            for y in range(x+1,len(arr)):
                if arr[x]==arr[y]:
                    return False
        return True
    
    for d in range(d_len):
        for f in range(f_len):
            for q in range(q_len):
                for r1 in range(r_len):
                    for r2 in range(r1+1, r_len):
                        for t in range(t_len):
                            points_EV = (players_d[d, 1] + players_f[f, 1] +
                                            players_q[q, 1] + players_r[r1, 1] +
                                            players_r[r2, 1] + players_t[t, 1] +
                                            players_w[w1, 1] + players_w[w2, 1] +
                                            players_w[w3, 1])
                            team_price = (players_d[d, 2] + players_f[f, 2] + players_q[q, 2]
                                           +players_r[r1, 2] + players_r[r2, 2]
                                            + players_t[t, 2] +
                                            players_w[w1, 2] + players_w[w2, 2] +players_w[w3,2])
                            test_set = np.array([players_r[r1, 0], players_r[r2, 0],
                                                players_w[w1, 0], players_w[w2, 0],
                                                players_w[w3, 0]])
                            no_repeats = test_no_repeats(test_set)
                            
                            if ((points_EV > best_EV) and (team_price < 50000) and no_repeats):
                                best_EV = points_EV
                                                                
                                best_team = np.array([
                                     players_d[d, 0], players_f[f, 0], players_q[q, 0],
                                     players_r[r1, 0], players_r[r2, 0], players_t[t, 0],
                                     players_w[w1, 0], players_w[w2, 0], players_w[w3, 0]
                                ])            

    return best_team, best_EV

@dask.delayed
def make_team_dask(players_d, players_f, players_q, players_r, players_t, players_w):
    w = list(itertools.combinations(range(len(players_w)), 3))
    # best_team = []
    best_EV = 0
    for idx in range(len(w)):
        w1, w2, w3 = w[idx]
        team, EV = inner_loop_dask(players_d, players_f, players_q, players_r, players_t, players_w, w1, w2, w3)
        if EV >= best_EV:
            best_EV = EV
            best_team = team
    return best_team


In [19]:
@njit
def make_team_njit(dst,flex,qb,rb,te,wr):
    #M = get_size(select_q,select_r,select_w, select_t, select_f, my_dsts)
    #1-QB 2-RB 3-WR 1-TE 1-Flex 1-DST
    #1 column for each position = 9 columns
    #best_team = np.zeros((1,9))
    qbl = len(qb)
    rbl = len(rb)
    wrl = len(wr)
    fl = len(flex)
    dl = len(dst)
    tel = len(te)
    current_best_EV=0

    def test_no_repeats(arr):
        for x in range(len(arr)):
            for y in range(x+1,len(arr)):
                if arr[x]==arr[y]:
                    return False
        return True

    for qbX in range(0,qbl):
        for rbX in range(0,rbl):
            for rbY in range(rbX+1,rbl):
                for wrX in range(0,wrl):
                    for wrY in range(wrX+1,wrl):
                        for wrZ in range(wrY+1,wrl):
                            for teX in range(0,tel):
                                for flX in range(0,fl):
                                    for dstX in range(0,dl):
                                        points_EV = qb[qbX,1] + rb[rbX,1] + rb[rbY,1] + wr[wrX,1] + wr[wrY,1] +wr[wrZ,1] + te[teX,1] + flex[flX,1] + dst[dstX,1]
                                        team_price = qb[qbX,2] + rb[rbX,2] + rb[rbY,2] + wr[wrX,2] + wr[wrY,2] +wr[wrZ,2] + te[teX,2] + flex[flX,2] + dst[dstX,2] 
                                        test_set = np.array([wr[wrX, 0], wr[wrY, 0],
                                                wr[wrZ, 0], te[teX, 0],
                                                rb[rbX, 0], rb[rbY,0], flex[flX,0]])
                                        no_repeats = test_no_repeats(test_set)
                                        if ( (points_EV>current_best_EV) & (team_price<=50000) &(no_repeats) ):
                                            best_team=[
                                                    qb[qbX,0],
                                                    rb[rbX,0],
                                                    rb[rbY,0],
                                                    wr[wrX,0],
                                                    wr[wrY,0],
                                                    wr[wrZ,0],
                                                    te[teX,0],
                                                    flex[flX,0],
                                                    dst[dstX,0]]
                                            current_best_EV=points_EV 
    return best_team

In [9]:
## Import and process the data

db_all = pd.read_csv('Football_166972393_Classic.csv')
db_all.rename(columns={'AvgPointsPerGame':'Fppg'}, inplace=True)
db_all = db_all.loc[:,['Name','Position','Salary','Fppg']]
db_all['Value']=db_all['Fppg']/db_all['Salary']*10000

db_d = db_all[db_all['Position']=='DST']
db_f = db_all[(db_all['Position']=='RB') | (db_all['Position']=='WR') | (db_all['Position']=='TE') ]
db_q = db_all[db_all['Position']=='QB']
db_r = db_all[db_all['Position']=='RB']
db_t = db_all[db_all['Position']=='TE']
db_w = db_all[db_all['Position']=='WR']

great_rate = 2.7
poor_rate = -1



select_d = get_players_at_value(great_rate,db_d)
select_f = get_players_at_value(great_rate,db_f)
select_q = get_players_at_value(great_rate,db_q)
select_r = get_players_at_value(great_rate,db_r)
select_t = get_players_at_value(great_rate,db_t)
select_w = get_players_at_value(great_rate,db_w)

## sanity check : ##
#print(get_size(select_d, select_f, select_q, select_r, select_t, select_w)/10**9)
#( note, when get_size was 1.34, the time to run without parallelization was 35 seconds)

In [35]:
def make_team_vanilla(dst,flex,qb,rb,te,wr):
    #M = get_size(select_q,select_r,select_w, select_t, select_f, my_dsts)
    #1-QB 2-RB 3-WR 1-TE 1-Flex 1-DST
    #1 column for each position = 9 columns
    #best_team = np.zeros((1,9))
    qbl = len(qb)
    rbl = len(rb)
    wrl = len(wr)
    fl = len(flex)
    dl = len(dst)
    tel = len(te)

    def test_no_repeats(arr):
        for x in range(len(arr)):
            for y in range(x+1,len(arr)):
                if arr[x]==arr[y]:
                    return False
        return True

    current_best_EV=0
    for qbX in range(0,qbl):
        for rbX in range(0,rbl):
            for rbY in range(rbX+1,rbl):
                for wrX in range(0,wrl):
                    for wrY in range(wrX+1,wrl):
                        for wrZ in range(wrY+1,wrl):
                            for teX in range(0,tel):
                                for flX in range(0,fl):
                                    for dstX in range(0,dl):
                                        points_EV = qb[qbX,1] + rb[rbX,1] + rb[rbY,1] + wr[wrX,1] + wr[wrY,1] +wr[wrZ,1] + te[teX,1] + flex[flX,1] + dst[dstX,1]
                                        team_price = qb[qbX,2] + rb[rbX,2] + rb[rbY,2] + wr[wrX,2] + wr[wrY,2] +wr[wrZ,2] + te[teX,2] + flex[flX,2] + dst[dstX,2] 
                                        test_set = np.array([wr[wrX, 0], wr[wrY, 0],
                                                wr[wrZ, 0], te[teX, 0],
                                                rb[rbX, 0], rb[rbY,0], flex[flX,0]])
                                        no_repeats = test_no_repeats(test_set)
                                        if ( (points_EV>current_best_EV) & (team_price<=50000) &(no_repeats) ):
                                            best_team=[
                                                    qb[qbX,0],
                                                    rb[rbX,0],
                                                    rb[rbY,0],
                                                    wr[wrX,0],
                                                    wr[wrY,0],
                                                    wr[wrZ,0],
                                                    te[teX,0],
                                                    flex[flX,0],
                                                    dst[dstX,0]]
                                            current_best_EV=points_EV 
    return best_team

In [17]:
team_dask = make_team_dask(select_d,select_f,select_q,select_r,select_t,select_w)
team_dask_computed=team_dask.compute()
print('Salary is ' + str(db_all.iloc[team_dask_computed,:]['Salary'].sum()) + ' and points EV is ' + str(db_all.iloc[team_dask_computed,:]['Fppg'].sum()))
print(db_all.iloc[team_dask_computed,:])


Salary is 49100 and points EV is 278.96000000000004
               Name Position  Salary   Fppg       Rate
667          Bears       DST    2600  24.00  92.307692
65      Jayden Reed       WR    6000  36.10  60.166667
71   Baker Mayfield       QB    5900  29.66  50.271186
13   Saquon Barkley       RB    7500  36.20  48.266667
105    J.K. Dobbins       RB    5400  25.90  47.962963
162   Isaiah Likely       TE    4800  29.10  60.625000
431    Allen Lazard       WR    3300  26.90  81.515152
65      Jayden Reed       WR    6000  36.10  60.166667
11      Cooper Kupp       WR    7600  35.00  46.052632


In [None]:
team_166895484=team_dask_166895484.compute()
print('Salary is ' + db_all.iloc[team_166895484,:]['Salary'].sum() + ' and points EV is ' db_all.iloc[team_166895484,:]['Fppg'].sum())
print(db_all.iloc[team_166895484,:])

In [None]:

db_all.iloc[team_dask_computed,:]['Salary'].sum()
#2.3 gives 4.5s (567000000 size)
#2.2 gives 25.7s (3217294080 size, 5 fold increase)


In [20]:
#2.3 gives 14.8s  (567000000 size)
#2.2 gives 80.6   (3217294080 size, 5 fold increase)
team_njit = make_team_njit(select_d,select_f,select_q,select_r,select_t,select_w)

In [40]:
#2.3 gives 301 seconds. (567000000 size)
#2.2 gives 1753 seconds. 
team_vanilla = make_team_vanilla(select_d,select_f,select_q,select_r,select_t,select_w)