# Colin Bowen & Bhawramaett Punruckwong Broehm
# 520.637: Foundations of Reinforcement Learning
# Final Project Implementation Workflow
# December 9, 2020

- Additional code is available at: https://github.com/colinpbowen/fantasy-RL. This notebook includes our model design and implementation. Additional files in the GitHub Repo are primarily for cleaning/preparing data for input to the model.

## Section 1: Import Data, Functions, and Packages

In [None]:
import pandas as pd
import numpy as np
#from xgoals import * #use getPts and getXPts functions
import requests
import pickle
import time
from io import StringIO

def get_df(player):
    '''access vaastav gw csvs'''
    base_str = "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2018-19/players/"
    url = base_str + player +"/gw.csv"
    s = requests.get(url).text    
    df = pd.read_csv(StringIO(s))
    return df

In [None]:
# load in the optimal starting selection according to the Integer Program
selection = pd.read_csv("https://raw.githubusercontent.com/colinpbowen/fantasy-RL/main/optimal_starting_team_1819.csv",
                       index_col=0) #the vector of 1s and 0s corresponding to the starting team
player_initial_data = pd.read_csv('player_initial_values_1819.csv') #positional data
full_names = player_initial_data['full_names'] #get player names

## Section 2: Selecting the top 50 percentile players

In [None]:
if False:
    max_selected = []
    for player in full_names:
        df = get_df(player)
        try:
            val = df['selected'].max()
            max_selected.append(val)
        except KeyError:
            max_selected.append(0)

    # This takes a minute to run, so pickle max_selected to save time for future runs
    with open('max_selected.pkl', 'wb') as f:
        pickle.dump(max_selected, f)
# Load in max_selected:
with open('max_selected.pkl', 'rb') as f:  
    max_selected = pickle.load(f)

In [None]:
maximum_selected = pd.Series(max_selected)
idx = maximum_selected[maximum_selected > maximum_selected.mean()].index #152 players
selection = selection.loc[idx] #reduces 500 initialization vector to 152
selection = selection.to_numpy()[:,0]
full_names = full_names.loc[idx]
player_initial_data = player_initial_data.loc[idx]

#pd.Series(idx).to_csv('idx.csv')
#idx = pd.read_csv('idx.csv',index_col=0)
#selection = selection.loc[idx] #reduces 500 initialization vector to 152
#selection = selection.to_numpy()[:,0]
#full_names = full_names.loc[idx]
#player_initial_data = player_initial_data.loc[idx]

In [None]:
player_initial_data

## Section 3: Weekly Updates of Expected Rewards

For each epoch (pass through the data/episode/whatever):
    
    For each week:

        1. Get integer program initialization for GW0.
        2. Start the most selected team. 
        3. Observe GW1. Get points data for week 1
        4. Update expected reward distribution.
        5. Sample rewards for week 2 to get expected points for team and player pool.
        6. Decide who to transfer for week 2.
        7. Start whomever has the highest expected reward. Make captain the highest expected reward, regardless of position.

### Functions to choose starters, make sure selection doesn't violate constraints (e.g., valid formation)

In [None]:
def pick_starters(curTeam, init=True):
    '''
    From cur_team, picks starters based upon xP criterion
    If init=True, picks based upon most selected in GW1
    Returns idx_vec: indexer into cur_team for computing the points return for the coming week.
    '''
    idx_vec = np.zeros((len(curTeam,)))
    #if initializing, choose based upon most selected in each position
    if init:
        criterion = 'selected'
    else:
        criterion = 'xP'  # criterion = 'total_points'
        #pick GK
    out_pos = [2,3,4]
    counts = [0,0,0] #position player counts
    count_lim = [5,5,3]
    GK = np.argmax(curTeam[criterion][curTeam.position == 1]) #returns idx of starting GK
    GK = curTeam.full_name[curTeam.position == 1].iloc[GK] #get starting GK
    idx_vec[curTeam.full_name==GK] = 1

    #sort players based upon their point scoring
    outfield = curTeam.position != 1
    pos_sort = np.argsort(curTeam[criterion][outfield])[::-1]
    for i in range(len(pos_sort)):

        if sum(counts) == 10:
            return idx_vec
        
        player = curTeam.full_name[outfield].iloc[pos_sort.iloc[i]]
        #print(player)
        player_pos = curTeam.position[curTeam.full_name==player].values[0]
        #print(player_pos)
        valid = check_valid(player_pos, counts)
        #if a valid addition, then add player to team
        
        if valid:
            #print('valid')
            idxr = (out_pos==player_pos).astype(int)
            #print(idxr)
            counts += idxr
            idx_vec[curTeam.full_name==player] = 1

    return idx_vec.astype(bool)

def check_valid(player_pos, counts):
    '''check for valid addition to starters given formational constraints'''
    out_pos = [2,3,4]
    cur_count = sum(counts)
    #valid formations are 3-4-3, 3-5-2, 4-4-2, 4-5-1, 4-3-3, 5-3-2, 5-4-1, 5-2-3
    #teams are valid with at least three defenders, two midfielders, and one forward
    if player_pos == 2:
        #can't have a fifth defender if there are 5 mids
        if counts[0] == 4 and counts[1] == 5:
            return False
        return True
    if player_pos == 3:
        if counts[1] == 4 and counts[0] == 5:
            return False
        return True 
    if player_pos == 4:
        if counts[2]==2 and counts[1] == 5:
            return False
        return True


### Initialize Budget, Current Team DataFrame

In [None]:
#get current budget
def initializeTeam():
    curTeam = pd.DataFrame()
    cur_team = full_names[selection.astype(bool)]
    #print(cur_team)
    curTeam['full_name'] = cur_team
    curTeam['position'] = player_initial_data.element_type[selection.astype(bool)]
    curTeam['team'] = player_initial_data.team[selection.astype(bool)]
    curTeam['value'] = player_initial_data.cost[selection.astype(bool)]
    init_selected = []

    budget = 1000
    CURRENT_SCORE = 0
    for player in cur_team:
        df = get_df(player)
        idx = df['round']==1
        val = df.value[idx].values[0]
        selec = df.selected[idx].values[0]
        init_selected.append(selec)
        budget -= val
    curTeam['selected'] = init_selected
    getCaptain = [1 if curTeam['selected'].iloc[x] == curTeam.selected.max() else 0 for x in range(len(curTeam))]
    curTeam['isCaptain'] = getCaptain
    curTeam['isStarting'] = pick_starters(curTeam, init=True)

    return curTeam, budget


In [None]:
curTeam, budget = initializeTeam()
print("Budget remaining: {}".format(budget))
curTeam

### Helper Functions
#### These functions help us update distributions for each player statistic so that we can get an expectation of points
#### They also help us determine which players should be transferred and let us update our squad each week

In [None]:
from numpy.random import choice as ch
from statistics import median

# Set the number of times we want to run np.random.choice:
numChoices = 500 # INPUT A VALUE HERE

def getXpts(df, week, position):
    """
    Generate simulation outcome for a particular week based upon prior distributions.
    """
    xG = xGprobs(df,week)
    xA = xAprobs(df, week)
    xB = xBprobs(df, week)
    xCS = xCSprobs(df, week)
    xGC = xGCprobs(df, week)
    xM = xMprobs(df, week)
    xOG = xOGprobs(df, week)
    xPC = xPCprobs(df, week)
    xPM = xPMprobs(df, week)
    xPS = xPSprobs(df, week)
    xRC = xRCprobs(df, week)
    xS = xSprobs(df, week)
    xYC = xYCprobs(df, week)

    G = median(ch(a=np.arange(len(xG)), p=xG, size=numChoices))
    A = median(ch(a=np.arange(len(xA)),p=xA, size=numChoices))
    B = median(ch(a=np.arange(len(xB)),p=xB, size=numChoices))
    CS = median(ch(a=np.arange(len(xCS)),p=xCS, size=numChoices))
    GC = median(ch(a=np.arange(len(xGC)),p=xGC, size=numChoices))
    M = median(ch(a=np.arange(len(xM)),p=xM, size=numChoices))
    OG = median(ch(a=np.arange(len(xOG)),p=xOG, size=numChoices))
    PC = median(ch(a=np.arange(len(xPC)),p=xPC, size=numChoices))
    PM = median(ch(a=np.arange(len(xPM)),p=xPM, size=numChoices))
    PS = median(ch(a=np.arange(len(xPS)),p=xPS, size=numChoices))
    RC = median(ch(a=np.arange(len(xRC)), p=xRC, size=numChoices))
    S = median(ch(a=np.arange(len(xS)),p=xS, size=numChoices))
    YC = median(ch(a=np.arange(len(xYC)),p=xYC, size=numChoices))
    data = {'goals_scored':G, 'assists': A,'bonus': B,'clean_sheets': CS,'goals_conceded': GC,
            'minutes': M,'own_goals': OG,'penalties_conceded': PC,'penalties_missed': PM,
            'penalties_saved': PS, 'red_cards': RC, 'saves' : S,'yellow_cards' : YC}
    return getPts(data, position)






def xGprobs(df, week):
    """
    Calculate distribution of expected goals for a player. 
    Args: df (player's gw DataFrame) ,week (int)
    """
    gs = df['goals_scored'][0:week+1].value_counts().sort_index()
    if gs.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0, 0, 0]  # Some prior we can change later. This assume p(0 Goals) = 1
    else:
        idx = pd.DataFrame(index=np.arange(4+1))
        stats = idx.join(gs).fillna(0)
        return [stats.goals_scored[x]/stats.goals_scored.sum() for x in range(len(stats))]

# ## Expected Assists
def xAprobs(df, week):
    """
    Calculate distribution of expected assists for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    ass = df['assists'][0:week+1].value_counts().sort_index()
    if ass.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0, 0]  # Some prior we can change later. This assume p(0 assists) = 1
    else:
        idx = pd.DataFrame(index=np.arange(3+1))
        stats = idx.join(ass).fillna(0)
        return [stats.assists[x]/stats.assists.sum() for x in range(len(stats))]

# ## Expected Bonus
def xBprobs(df, week):
    """
    Calculate distribution of expected bonus for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    bon = df['bonus'][0:week+1].value_counts().sort_index()
    if bon.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0, 0]  # Some prior we can change later. This assume p(0 bonus) = 1
    else:
        idx = pd.DataFrame(index=np.arange(3+1))
        stats = idx.join(bon).fillna(0)
        return [stats.bonus[x]/stats.bonus.sum() for x in range(len(stats))]

# ## Expected Clean Sheets

def xCSprobs(df, week):
    """
    Calculate distribution of expected clean sheets for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    cs = df['clean_sheets'][0:week+1].value_counts().sort_index()
    if cs.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(cs).fillna(0)
        return [stats.clean_sheets[x]/stats.clean_sheets.sum() for x in range(len(stats))]


# ## Expected Goals Conceded
def xGCprobs(df, week):
    """
    Calculate distribution of expected goals conceded for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    gc = df['goals_conceded'][0:week+1].value_counts().sort_index()
    if gc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0., 0., 0., 0., 0., 0., 0., 0., 0.]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(9+1))
        stats = idx.join(gc).fillna(0)
        return [stats.goals_conceded[x]/stats.goals_conceded.sum() for x in range(len(stats))]


# ## Expected Minutes


def xMprobs(df, week):
    """
    Calculate distribution of expected minutes for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    p = np.zeros(91)
    p[0] = 1.0
    M = df['minutes'][0:week+1].value_counts().sort_index()
    if M.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return p  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(90+1))
        stats = idx.join(M).fillna(0)
        return [stats.minutes[x]/stats.minutes.sum() for x in range(len(stats))]


# ## Expected Own Goals


def xOGprobs(df, week):
    """
    Calculate distribution of expected own goals for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    og = df['own_goals'][0:week+1].value_counts().sort_index()
    if og.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(og).fillna(0)
        return [stats.own_goals[x]/stats.own_goals.sum() for x in range(len(stats))]



# ## Expected Penalties Conceded


def xPCprobs(df, week):
    """
    Calculate distribution of expected penalties conceded for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    try:
        pc = df['penalties_conceded'][0:week+1].value_counts().sort_index()
    except KeyError: # this column is missing in 2019/2020 data for some players
        return [1,0,0]
    if pc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(2+1))
        stats = idx.join(pc).fillna(0)
        return [stats.penalties_conceded[x]/stats.penalties_conceded.sum() for x in range(len(stats))]


# ## Expected Penalties Missed

def xPMprobs(df, week):
    """
    Calculate distribution of expected penalties missed for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    pm = df['penalties_missed'][0:week+1].value_counts().sort_index()
    if pm.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(pm).fillna(0)
        return [stats.penalties_missed[x]/stats.penalties_missed.sum() for x in range(len(stats))]

# ## Expected Penalties Saved

def xPSprobs(df, week):
    """
    Calculate distribution of expected penalties saved for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    ps = df['penalties_saved'][0:week+1].value_counts().sort_index()
    if ps.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(2+1))
        stats = idx.join(ps).fillna(0)
        return [stats.penalties_saved[x]/stats.penalties_saved.sum() for x in range(len(stats))]




# ## Expected Red Cards

def xRCprobs(df, week):
    """
    Calculate distribution of expected red cards for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    rc = df['red_cards'][0:week+1].value_counts().sort_index()
    if rc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(rc).fillna(0)
        return [stats.red_cards[x]/stats.red_cards.sum() for x in range(len(stats))]



# ## Expected Saves

def xSprobs(df, week):
    """
    Calculate distribution of expected saves for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    p = np.zeros(15)
    p[0] = 1.0
    s = df['saves'][0:week+1].value_counts().sort_index()
    if s.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return p  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(14+1))
        stats = idx.join(s).fillna(0)
        return [stats.saves[x]/stats.saves.sum() for x in range(len(stats))]


# ## Expected Yellow Cards


def xYCprobs(df, week):
    """
    Calculate distribution of expected yellow cards for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    yc = df['yellow_cards'][0:week+1].value_counts().sort_index()
    if yc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(yc).fillna(0)
        return [stats.yellow_cards[x]/stats.yellow_cards.sum() for x in range(len(stats))]
        
#generate realization
#no need to use this for accessing historical data because it's already stored? 
#need to add penalties conceded
def getPts(x, position):
    """ 
    Use with getXpts for simulating reward.
    """
    pts = 0
    pts += x['bonus']
    if ((x['minutes'] > 0) & (x['minutes'] < 60)):
        pts += 1
    elif (x['minutes'] >=60):
        pts += 2
    if position == 1.0:  # Goalie
        pts += 6*x['goals_scored'] + x['assists']*3 + 4*x['clean_sheets'] + int(x['saves']//3) + 5*x['penalties_saved'] - x['goals_conceded']//2 - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']   
    elif position == 2.0:  # Defender
        pts += 6*x['goals_scored'] + x['assists']*3 + 4*x['clean_sheets'] - 2*x['penalties_missed']  - x['goals_conceded']//2 - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']
    elif position == 3.0: # Mid
        pts += 5*x['goals_scored'] + x['assists']*3 + 1*x['clean_sheets'] - 2*x['penalties_missed'] - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']
    else:  # Striker
        pts += 4*x['goals_scored'] + x['assists']*3 - 2*x['penalties_missed'] - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']
    return pts
    
def ilocfromloc(player):
    """
    Use this function to get the relative position of a player in the full_names series.
    Helpful because other objects such as xPs use the same ordering as full_names.
    """
    return full_names.index.get_loc(full_names[full_names==player].index[0])

def getPlayerPool(team=curTeam):
    player_pool = set(full_names) - set(team.full_name)
    pp = player_initial_data.loc[player_initial_data["full_names"].isin(player_pool)][["full_names", "element_type", 'team']]
    pp['xP'] = 0
    return pp

def updateXP(week,team=curTeam):
    for player in team.full_name.values:
        team.loc[team.full_name == player, 'xP'] = getXpts(get_df(player), week, team.loc[team.full_name == player]['position'].values[0])
    return team

def selectCaptain(team=curTeam):
    """
    input your starting squad and this function picks the captain based on whose xP is highest
    """
    getCaptain = [1 if team['xP'].iloc[x] == team.xP.max() else 0 for x in range(len(team))]
    team['isCaptain'] = getCaptain
    return team

def getTopN(df, position, week,N, max_budget):
    for player in df.full_names.loc[df.element_type==position]: 
        #print(player)
        df.loc[df.full_names == player, "xP"] = getXpts(get_df(player),week,position)
        #df.loc[df.full_names == player, "value"] = get_df(player)['value'][week]
        try:
            df.loc[df.full_names == player, "value"] = get_df(player).loc[get_df(player)['round'] == week]['value'].iloc[0]
        except IndexError: # If this player wasn't available in the week, we'll be able to exclude them using the following
            df.loc[df.full_names == player, "value"] = max_budget + 1 
    temp = df.loc[df.element_type==position].sort_values(by='xP',ascending=False)
    temp = temp[temp.value <= max_budget]
    return temp[0:N].rename(columns={"full_names":'full_name', "element_type":"position"})

def worstAtPos(pos, team=curTeam, bud=budget):
    """
    Input your current team, the current budget, and the position to replace.
    Returns the player who has the lowest expected points per value at the specified position and the available budget if
    that player were replaced
    """
    options = team.loc[team.position == pos] # players 
    worst = options.iloc[np.argmin([options.iloc[x]['xP']/options.iloc[x]['value'] for x in range(len(options))])]
    return worst.full_name, bud+worst.value

def swapPlayer(team, playerOut, choices, idx, curBudget):
    """
    Given players to swap and the current budget
    Swaps a the player from the current team with the player in the player pool and updates the budget
    Should figure out a way to integrate this with epsilon-greedy policy
    """
    newTeam = pd.concat([team.loc[team.full_name!=playerOut], choices.iloc[[idx]]])
    newBudget = curBudget + team.loc[team.full_name==playerOut]['value'].values[0] - choices.iloc[[idx]]['value'].values[0]
    return newTeam, newBudget   

def updateSquad(action, week, team=curTeam, bud=budget, PAs=3):
    if action == 0: # Do Nothing
        return team, bud
    elif action < PAs+1: # Replace GoalKeeper
        worstPlayer, potential_budget = worstAtPos(pos=1, team=team, bud=bud)
        topN = getTopN(df=pp, position=1, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-1, curBudget=bud)
        return team, budget
    elif action < PAs+4: # Replace Defender
        worstPlayer, potential_budget = worstAtPos(pos=2, team=team, bud=bud)
        topN = getTopN(df=pp, position=2, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-PAs-1, curBudget=bud)
        return team, budget
    elif action < PAs+7: # Replace Midfielder
        worstPlayer, potential_budget = worstAtPos(pos=3, team=team, bud=bud)
        topN = getTopN(df=pp, position=3, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-2*PAs-1, curBudget=bud)
        return team, budget
    else: # Replace Forward
        worstPlayer, potential_budget = worstAtPos(pos=4, team=team, bud=bud)
        topN = getTopN(df=pp, position=4, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-3*PAs-1, curBudget=bud)
        return team, budget
    return

def get_weekly_pts_data(week):
    pts = []
    for player in list(curTeam.loc[curTeam.isStarting == 1].full_name):
        df = get_df(player)
        idx = df['round'] == week
        try:
            pt = df['total_points'][idx].iloc[0]
        except IndexError:
            pt = 2  # If our starter doesn't play this week, assume their replacement got 2 pts (played 60+ min)
        if curTeam.loc[curTeam['full_name'] == player, 'isCaptain'].values[0] == 1:
            pt *=2
        pts.append(pt)
    return sum(pts)

## Define the state-action space

- Possible Actions:
    - Do Nothing
    - Replace GK with top 5 GKs
    - Replace Def/Mid/Attack with top 10 at that position
    - 10 + 10 + 10 + 5 + 1 = 36
- Possible States:
    - Budget value: 0, 1, ..., B

**Need to encode each action with a number (0-35)**
    - 0: Do Nothing
    - 1-5: Swap Worst GK with top 1-5 GK
    - 6-15: Swap Worst Def with top 1-10 Def
    - 16-25: Swap Worst Mid with top 1-10 Mid
    - 26-35: Swap Worst Forward with top 1-10 Forward
 
**Ultimately reduced to top 3 at each position because exploration was insuffucient in 100 episodes when |A(s)| = 36**


## SARSA Implementation

In [None]:
###########################
########## SARSA ##########
###########################

# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
gamma = 1
# gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> for each possible budget and team configuration
asize = 36 # number of possible actions in each state
Qsarsa = np.zeros((ssize,asize))
numEpisodes = 100
weeks = list(range(2,39))
sarsaRewards = np.zeros((numEpisodes,))
WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    sarsaRewards[i] += get_weekly_pts_data(1)
    WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qsarsa, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        sarsaRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        next_action = get_action(int(next_state), Qsarsa, epsilon)  # Choose A' from S' using policy derived from Q
        Qsarsa = SARSAupdate_Q(Q=Qsarsa, S=int(current_state), Sprime=int(next_state), A=int(current_action), Aprime=int(next_action), alpha=alpha, R=WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
        current_action = int(next_action) # A <- A'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeST-episodeEND)/60, sarsaRewards[i]))
    
END = time.time()

print(END-ST)

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(sarsaRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")

#### 100 episodes doesn't look like enough for sufficient training. Reducing the number of "promising actions" to top 3 at each position.

In [None]:
###########################
########## SARSA ##########
###########################

# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
gamma = 1
# gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> for each possible budget and team configuration
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
Qsarsa = np.zeros((ssize,asize))
numEpisodes = 100
weeks = list(range(2,39))
sarsaRewards = np.zeros((numEpisodes,))
WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    sarsaRewards[i] += get_weekly_pts_data(1)
    WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qsarsa, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        sarsaRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        next_action = get_action(int(next_state), Qsarsa, epsilon)  # Choose A' from S' using policy derived from Q
        Qsarsa = SARSAupdate_Q(Q=Qsarsa, S=int(current_state), Sprime=int(next_state), A=int(current_action), Aprime=int(next_action), alpha=alpha, R=WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
        current_action = int(next_action) # A <- A'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, sarsaRewards[i]))
    
END = time.time()

print(END-ST)

### PICKLE THE DATA

In [None]:
with open('SARSA1.pkl', 'wb') as f:  
    pickle.dump([Qsarsa, sarsaRewards, WEEKLY_SCORES], f)  # SAVE THE Trained Q function, Episodic Rewards, and Weekly Rewards

with open('SARSA1.pkl', 'rb') as f:  
    obj0, obj1, obj2 = pickle.load(f)

In [None]:
plt.plot(pd.DataFrame(sarsaRewards).rolling(window=10).mean())
pd.DataFrame(sarsaRewards).rolling(window=20).mean().iloc[-1]

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(sarsaRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("SARSA, γ=0.5")
plt.savefig("SARSA_Training_gamma5.png")

# Q-Learning

### Let gamma = 1

In [None]:
################################
########## Q-Learning ##########
################################

# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
gamma = 1
# gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> set to something sufficiently large so that we don't get IndexError
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
Qlearning = np.zeros((ssize,asize))
numEpisodes = 100
weeks = list(range(2,39))
QLRewards = np.zeros((numEpisodes,))
QL_WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    QLRewards[i] += get_weekly_pts_data(1)
    QL_WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qlearning, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        QLRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        QL_WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        Qlearning = QLupdateQ(Q=Qlearning, S=int(current_state), Sprime=int(next_state), A=int(current_action), alpha=alpha, R=QL_WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, QLRewards[i]))
    
END = time.time()

print(END-ST)

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(QLRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("Q-Learning, γ=1")
plt.savefig("QL_Training_gamma1.png")

### Let gamma = 0.5

In [None]:
################################
########## Q-Learning ##########
################################

# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
# gamma = 1
gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> set to something sufficiently large so that we don't get IndexError
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
Qlearning = np.zeros((ssize,asize))
numEpisodes = 100
weeks = list(range(2,39))
QLRewards = np.zeros((numEpisodes,))
QL_WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    QLRewards[i] += get_weekly_pts_data(1)
    QL_WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qlearning, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        QLRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        QL_WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        Qlearning = QLupdateQ(Q=Qlearning, S=int(current_state), Sprime=int(next_state), A=int(current_action), alpha=alpha, R=QL_WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, QLRewards[i]))
    
END = time.time()

print("It took {:.2f} minutes to train 100 episodes.".format((END-ST)/60))

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(QLRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("Q-Learning, γ=0.5")
plt.savefig("QL_Training_gamma5.png")

## Choosing Actions Randomly

In [None]:
numSims = 100
action = np.random.choice(a=list(range(13)))
weeks = list(range(2,39))
# weeks = list(range(2,27))
startTime = time.time()
CURRENT_SCORE = np.zeros((numSims,))
WEEKLY_SCORE = []
curTeam, budget = initializeTeam()
WEEKLY_SCORE.append(get_weekly_pts_data(1))
CURRENT_SCORE += get_weekly_pts_data(1)
action_list = []  # Keep track of actions
for i in range(numSims):
    print("starting simulation {}".format(i))
    for curWeek in weeks:
        curTeam, budget = initializeTeam()
        action = np.random.choice(a=list(range(13)))
        action_list.append(action)
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, budget = updateSquad(action, week=curWeek, team=curTeam, bud=budget)  # Update squad based on action, state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        WEEKLY_SCORE.append(get_weekly_pts_data(curWeek)) # Realize GW results, update your Rewards
        CURRENT_SCORE[i] += get_weekly_pts_data(curWeek)  # Realize GW results, update your Rewards
endTime = time.time()
print("{:.2f} minutes".format((endTime-startTime)/60))
print("End of Season Score: {}".format(CURRENT_SCORE[i]))

with open('RandomAction.pkl', 'wb') as f:  
    pickle.dump([CURRENT_SCORE, WEEKLY_SCORE], f)  # SAVE THE Trained Q function, Episodic Rewards, and Weekly Rewards

with open('RandomAction.pkl', 'rb') as f:  
    obj0, obj1 = pickle.load(f)


In [None]:
plt.plot(CURRENT_SCORE)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("Random Action Selection")
plt.savefig("RandomActionSelection10Sims.png")

# Test on 2019-2020 Season

#### Update the get_df function to pull data from 2019-2020 season

In [None]:
import pandas as pd
import numpy as np
#from xgoals import * #use getPts and getXPts functions
import requests
import pickle
import time
from io import StringIO

def get_df(player):
    '''access vaastav gw csvs'''
    base_str = "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2019-20/players/"
    url = base_str + player +"/gw.csv"
    s = requests.get(url).text    
    df = pd.read_csv(StringIO(s))
    return df

In [None]:
# load in the optimal starting selection according to the Integer Program
selection = pd.read_csv("https://raw.githubusercontent.com/colinpbowen/fantasy-RL/main/optimal_starting_team_1920.csv",
                       index_col=0) #the vector of 1s and 0s corresponding to the starting team
player_initial_data = pd.read_csv('player_initial_values_1920.csv') #positional data
full_names = player_initial_data['full_names'] #get player names

In [None]:
if False:
    max_selected = []
    for player in full_names:
        df = get_df(player)
        try:
            val = df['selected'].max()
            max_selected.append(val)
        except KeyError:
            max_selected.append(0)

    # This takes a minute to run, so pickle max_selected to save time for future runs
    with open('max_selected_1920.pkl', 'wb') as f:
        pickle.dump(max_selected, f)
# Load in max_selected:
with open('max_selected_1920.pkl', 'rb') as f:  
    max_selected = pickle.load(f)

In [None]:
maximum_selected = pd.Series(max_selected)
idx = maximum_selected[maximum_selected > maximum_selected.mean()].index #152 players
selection = selection.loc[idx] #reduces 500 initialization vector to 152
selection = selection.to_numpy()[:,0]
full_names = full_names.loc[idx]
player_initial_data = player_initial_data.loc[idx]
player_initial_data
#pd.Series(idx).to_csv('idx.csv')
#idx = pd.read_csv('idx.csv',index_col=0)
#selection = selection.loc[idx] #reduces 500 initialization vector to 152
#selection = selection.to_numpy()[:,0]
#full_names = full_names.loc[idx]
#player_initial_data = player_initial_data.loc[idx]

In [None]:
curTeam, budget = initializeTeam()
print("Budget remaining: {}".format(budget))
curTeam

### Update Helper functions for 2019-2020
- We run into a few issues due to differences in data across seasons. For example, some players are missing the "Penalties Conceded" feature

In [None]:
from numpy.random import choice as ch
from statistics import median

# Set the number of times we want to run np.random.choice:
numChoices = 500 # INPUT A VALUE HERE

def getXpts(df, week, position):
    """
    Generate simulation outcome for a particular week based upon prior distributions.
    """
    xG = xGprobs(df,week)
    xA = xAprobs(df, week)
    xB = xBprobs(df, week)
    xCS = xCSprobs(df, week)
    xGC = xGCprobs(df, week)
    xM = xMprobs(df, week)
    xOG = xOGprobs(df, week)
    xPC = xPCprobs(df, week)
    xPM = xPMprobs(df, week)
    xPS = xPSprobs(df, week)
    xRC = xRCprobs(df, week)
    xS = xSprobs(df, week)
    xYC = xYCprobs(df, week)

    G = median(ch(a=np.arange(len(xG)), p=xG, size=numChoices))
    A = median(ch(a=np.arange(len(xA)),p=xA, size=numChoices))
    B = median(ch(a=np.arange(len(xB)),p=xB, size=numChoices))
    CS = median(ch(a=np.arange(len(xCS)),p=xCS, size=numChoices))
    GC = median(ch(a=np.arange(len(xGC)),p=xGC, size=numChoices))
    M = median(ch(a=np.arange(len(xM)),p=xM, size=numChoices))
    OG = median(ch(a=np.arange(len(xOG)),p=xOG, size=numChoices))
    PC = median(ch(a=np.arange(len(xPC)),p=xPC, size=numChoices))
    PM = median(ch(a=np.arange(len(xPM)),p=xPM, size=numChoices))
    PS = median(ch(a=np.arange(len(xPS)),p=xPS, size=numChoices))
    RC = median(ch(a=np.arange(len(xRC)), p=xRC, size=numChoices))
    S = median(ch(a=np.arange(len(xS)),p=xS, size=numChoices))
    YC = median(ch(a=np.arange(len(xYC)),p=xYC, size=numChoices))
    data = {'goals_scored':G, 'assists': A,'bonus': B,'clean_sheets': CS,'goals_conceded': GC,
            'minutes': M,'own_goals': OG,'penalties_conceded': PC,'penalties_missed': PM,
            'penalties_saved': PS, 'red_cards': RC, 'saves' : S,'yellow_cards' : YC}
    return getPts(data, position)






def xGprobs(df, week):
    """
    Calculate distribution of expected goals for a player. 
    Args: df (player's gw DataFrame) ,week (int)
    """
    gs = df['goals_scored'][0:week+1].value_counts().sort_index()
    if gs.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0, 0, 0]  # Some prior we can change later. This assume p(0 Goals) = 1
    else:
        idx = pd.DataFrame(index=np.arange(4+1))
        stats = idx.join(gs).fillna(0)
        return [stats.goals_scored[x]/stats.goals_scored.sum() for x in range(len(stats))]

# ## Expected Assists
def xAprobs(df, week):
    """
    Calculate distribution of expected assists for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    ass = df['assists'][0:week+1].value_counts().sort_index()
    if ass.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0, 0]  # Some prior we can change later. This assume p(0 assists) = 1
    else:
        idx = pd.DataFrame(index=np.arange(3+1))
        stats = idx.join(ass).fillna(0)
        return [stats.assists[x]/stats.assists.sum() for x in range(len(stats))]

# ## Expected Bonus
def xBprobs(df, week):
    """
    Calculate distribution of expected bonus for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    bon = df['bonus'][0:week+1].value_counts().sort_index()
    if bon.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0, 0]  # Some prior we can change later. This assume p(0 bonus) = 1
    else:
        idx = pd.DataFrame(index=np.arange(3+1))
        stats = idx.join(bon).fillna(0)
        return [stats.bonus[x]/stats.bonus.sum() for x in range(len(stats))]

# ## Expected Clean Sheets

def xCSprobs(df, week):
    """
    Calculate distribution of expected clean sheets for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    cs = df['clean_sheets'][0:week+1].value_counts().sort_index()
    if cs.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(cs).fillna(0)
        return [stats.clean_sheets[x]/stats.clean_sheets.sum() for x in range(len(stats))]


# ## Expected Goals Conceded
def xGCprobs(df, week):
    """
    Calculate distribution of expected goals conceded for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    gc = df['goals_conceded'][0:week+1].value_counts().sort_index()
    if gc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0., 0., 0., 0., 0., 0., 0., 0., 0.]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(9+1))
        stats = idx.join(gc).fillna(0)
        return [stats.goals_conceded[x]/stats.goals_conceded.sum() for x in range(len(stats))]


# ## Expected Minutes


def xMprobs(df, week):
    """
    Calculate distribution of expected minutes for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    p = np.zeros(91)
    p[0] = 1.0
    M = df['minutes'][0:week+1].value_counts().sort_index()
    if M.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return p  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(90+1))
        stats = idx.join(M).fillna(0)
        return [stats.minutes[x]/stats.minutes.sum() for x in range(len(stats))]


# ## Expected Own Goals


def xOGprobs(df, week):
    """
    Calculate distribution of expected own goals for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    og = df['own_goals'][0:week+1].value_counts().sort_index()
    if og.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(og).fillna(0)
        return [stats.own_goals[x]/stats.own_goals.sum() for x in range(len(stats))]



# ## Expected Penalties Conceded


def xPCprobs(df, week):
    """
    Calculate distribution of expected penalties conceded for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    try:
        pc = df['penalties_conceded'][0:week+1].value_counts().sort_index()
    except KeyError: # this column is missing in 2019/2020 data for some players
        return [1,0,0]
    if pc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(2+1))
        stats = idx.join(pc).fillna(0)
        return [stats.penalties_conceded[x]/stats.penalties_conceded.sum() for x in range(len(stats))]


# ## Expected Penalties Missed

def xPMprobs(df, week):
    """
    Calculate distribution of expected penalties missed for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    pm = df['penalties_missed'][0:week+1].value_counts().sort_index()
    if pm.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(pm).fillna(0)
        return [stats.penalties_missed[x]/stats.penalties_missed.sum() for x in range(len(stats))]

# ## Expected Penalties Saved

def xPSprobs(df, week):
    """
    Calculate distribution of expected penalties saved for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    ps = df['penalties_saved'][0:week+1].value_counts().sort_index()
    if ps.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(2+1))
        stats = idx.join(ps).fillna(0)
        return [stats.penalties_saved[x]/stats.penalties_saved.sum() for x in range(len(stats))]




# ## Expected Red Cards

def xRCprobs(df, week):
    """
    Calculate distribution of expected red cards for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    rc = df['red_cards'][0:week+1].value_counts().sort_index()
    if rc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(rc).fillna(0)
        return [stats.red_cards[x]/stats.red_cards.sum() for x in range(len(stats))]



# ## Expected Saves

def xSprobs(df, week):
    """
    Calculate distribution of expected saves for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    p = np.zeros(15)
    p[0] = 1.0
    s = df['saves'][0:week+1].value_counts().sort_index()
    if s.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return p  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(14+1))
        stats = idx.join(s).fillna(0)
        return [stats.saves[x]/stats.saves.sum() for x in range(len(stats))]


# ## Expected Yellow Cards


def xYCprobs(df, week):
    """
    Calculate distribution of expected yellow cards for a player. 
    Args: df (player's gw DataFrame), week (int)
    """
    yc = df['yellow_cards'][0:week+1].value_counts().sort_index()
    if yc.empty == True:  ## WHEN WE DON'T HAVE ANY DATA
        return [1, 0]  # Some prior we can change later. This assume p(0 CS) = 1
    else:
        idx = pd.DataFrame(index=np.arange(1+1))
        stats = idx.join(yc).fillna(0)
        return [stats.yellow_cards[x]/stats.yellow_cards.sum() for x in range(len(stats))]
        
#generate realization
#no need to use this for accessing historical data because it's already stored? 
#need to add penalties conceded
def getPts(x, position):
    """ 
    Use with getXpts for simulating reward.
    """
    pts = 0
    pts += x['bonus']
    if ((x['minutes'] > 0) & (x['minutes'] < 60)):
        pts += 1
    elif (x['minutes'] >=60):
        pts += 2
    if position == 1.0:  # Goalie
        pts += 6*x['goals_scored'] + x['assists']*3 + 4*x['clean_sheets'] + int(x['saves']//3) + 5*x['penalties_saved'] - x['goals_conceded']//2 - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']   
    elif position == 2.0:  # Defender
        pts += 6*x['goals_scored'] + x['assists']*3 + 4*x['clean_sheets'] - 2*x['penalties_missed']  - x['goals_conceded']//2 - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']
    elif position == 3.0: # Mid
        pts += 5*x['goals_scored'] + x['assists']*3 + 1*x['clean_sheets'] - 2*x['penalties_missed'] - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']
    else:  # Striker
        pts += 4*x['goals_scored'] + x['assists']*3 - 2*x['penalties_missed'] - x['yellow_cards'] - 3*x['red_cards'] - 2*x['own_goals']
    return pts
    
def ilocfromloc(player):
    """
    Use this function to get the relative position of a player in the full_names series.
    Helpful because other objects such as xPs use the same ordering as full_names.
    """
    return full_names.index.get_loc(full_names[full_names==player].index[0])

def getPlayerPool(team=curTeam):
    player_pool = set(full_names) - set(team.full_name)
    pp = player_initial_data.loc[player_initial_data["full_names"].isin(player_pool)][["full_names", "element_type", 'team']]
    pp['xP'] = 0
    return pp

def updateXP(week,team=curTeam):
    for player in team.full_name.values:
        team.loc[team.full_name == player, 'xP'] = getXpts(get_df(player), week, team.loc[team.full_name == player]['position'].values[0])
    return team

def selectCaptain(team=curTeam):
    """
    input your starting squad and this function picks the captain based on whose xP is highest
    """
    getCaptain = [1 if team['xP'].iloc[x] == team.xP.max() else 0 for x in range(len(team))]
    team['isCaptain'] = getCaptain
    return team

def getTopN(df, position, week,N, max_budget):
    for player in df.full_names.loc[df.element_type==position]: 
        #print(player)
        df.loc[df.full_names == player, "xP"] = getXpts(get_df(player),week,position)
        #df.loc[df.full_names == player, "value"] = get_df(player)['value'][week]
        try:
            df.loc[df.full_names == player, "value"] = get_df(player).loc[get_df(player)['round'] == week]['value'].iloc[0]
        except IndexError: # If this player wasn't available in the week, we'll be able to exclude them using the following
            df.loc[df.full_names == player, "value"] = max_budget + 1 
    temp = df.loc[df.element_type==position].sort_values(by='xP',ascending=False)
    temp = temp[temp.value <= max_budget]
    return temp[0:N].rename(columns={"full_names":'full_name', "element_type":"position"})

def worstAtPos(pos, team=curTeam, bud=budget):
    """
    Input your current team, the current budget, and the position to replace.
    Returns the player who has the lowest expected points per value at the specified position and the available budget if
    that player were replaced
    """
    options = team.loc[team.position == pos] # players 
    worst = options.iloc[np.argmin([options.iloc[x]['xP']/options.iloc[x]['value'] for x in range(len(options))])]
    return worst.full_name, bud+worst.value

def swapPlayer(team, playerOut, choices, idx, curBudget):
    """
    Given players to swap and the current budget
    Swaps a the player from the current team with the player in the player pool and updates the budget
    Should figure out a way to integrate this with epsilon-greedy policy
    """
    try:
        newTeam = pd.concat([team.loc[team.full_name!=playerOut], choices.iloc[[idx]]])
    except IndexError:  # Occurs when there's no one to swap
        print("Cannot Swap Player this round using this action.")
        return team, curBudget
    newBudget = curBudget + team.loc[team.full_name==playerOut]['value'].values[0] - choices.iloc[[idx]]['value'].values[0]
    return newTeam, newBudget   

def updateSquad(action, week, team=curTeam, bud=budget, PAs=3):
    if action == 0: # Do Nothing
        return team, bud
    elif action < PAs+1: # Replace GoalKeeper
        worstPlayer, potential_budget = worstAtPos(pos=1, team=team, bud=bud)
        topN = getTopN(df=pp, position=1, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-1, curBudget=bud)
        return team, budget
    elif action < PAs+4: # Replace Defender
        worstPlayer, potential_budget = worstAtPos(pos=2, team=team, bud=bud)
        topN = getTopN(df=pp, position=2, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-PAs-1, curBudget=bud)
        return team, budget
    elif action < PAs+7: # Replace Midfielder
        worstPlayer, potential_budget = worstAtPos(pos=3, team=team, bud=bud)
        topN = getTopN(df=pp, position=3, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-2*PAs-1, curBudget=bud)
        return team, budget
    else: # Replace Forward
        worstPlayer, potential_budget = worstAtPos(pos=4, team=team, bud=bud)
        topN = getTopN(df=pp, position=4, week=week, N=PAs, max_budget=potential_budget)
        team, budget = swapPlayer(team=team, playerOut=worstPlayer, choices=topN, idx=action-3*PAs-1, curBudget=bud)
        return team, budget
    return

def get_weekly_pts_data(week):
    pts = []
    for player in list(curTeam.loc[curTeam.isStarting == 1].full_name):
        df = get_df(player)
        idx = df['round'] == week
        try:
            pt = df['total_points'][idx].iloc[0]
        except IndexError:
            pt = 2  # If our starter doesn't play this week, assume their replacement got 2 pts (played 60+ min)
        if curTeam.loc[curTeam['full_name'] == player, 'isCaptain'].values[0] == 1:
            pt *=2
        pts.append(pt)
    return sum(pts)

# Use our Q function trained on 100 episodes of the 2018-2019 season using SARSA and a $\gamma$ of 1

In [None]:
###########################
########## SARSA ##########
###########################
# Load in trained Q Function
with open('SARSA1.pkl', 'rb') as f:  
    Qsarsa, _, _ = pickle.load(f)
# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
gamma = 1
# gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> for each possible budget and team configuration
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
numEpisodes = 100
weeks = list(range(2,39))
sarsaRewards = np.zeros((numEpisodes,))
WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    sarsaRewards[i] += get_weekly_pts_data(1)
    WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qsarsa, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        sarsaRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        next_action = get_action(int(next_state), Qsarsa, epsilon)  # Choose A' from S' using policy derived from Q
        #Qsarsa = SARSAupdate_Q(Q=Qsarsa, S=int(current_state), Sprime=int(next_state), A=int(current_action), Aprime=int(next_action), alpha=alpha, R=WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
        current_action = int(next_action) # A <- A'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, sarsaRewards[i]))
    
END = time.time()

print(END-ST)

with open('SARSA1_1920_noUpdate.pkl', 'wb') as f:  
    pickle.dump([Qsarsa, sarsaRewards, WEEKLY_SCORES], f)  # SAVE THE Tested Q function, Episodic Rewards, and Weekly Rewards

import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(sarsaRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("SARSA, γ=1.0")
plt.savefig("SARSA_Testing1920_gamma1_NoUpdate.png")

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(sarsaRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("SARSA, γ=1.0")
plt.savefig("SARSA_Testing1920_gamma1.png")

# Use our Q function trained on 100 episodes of the 2018-2019 season using SARSA and a $\gamma$ of 0.5

In [None]:
###########################
########## SARSA ##########
###########################
# Load in trained Q Function
with open('SARSA5.pkl', 'rb') as f:  
    Qsarsa, _, _ = pickle.load(f)
# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> for each possible budget and team configuration
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
numEpisodes = 100
weeks = list(range(2,39))
sarsaRewards = np.zeros((numEpisodes,))
WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    sarsaRewards[i] += get_weekly_pts_data(1)
    WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qsarsa, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        sarsaRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        next_action = get_action(int(next_state), Qsarsa, epsilon)  # Choose A' from S' using policy derived from Q
        Qsarsa = SARSAupdate_Q(Q=Qsarsa, S=int(current_state), Sprime=int(next_state), A=int(current_action), Aprime=int(next_action), alpha=alpha, R=WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
        current_action = int(next_action) # A <- A'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, sarsaRewards[i]))
    
END = time.time()

print(END-ST)

with open('SARSA5_1920.pkl', 'wb') as f:  
    pickle.dump([Qsarsa, sarsaRewards, WEEKLY_SCORES], f)  # SAVE THE Tested Q function, Episodic Rewards, and Weekly Rewards

plt.style.use('seaborn')
plt.plot(sarsaRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("SARSA, γ=1.0")
plt.savefig("SARSA_Testing1920_gamma5.png")

# Use our Q function trained on 100 episodes of the 2018-2019 season using Q-Learning and a $\gamma$ of 1

In [None]:
################################
########## Q-Learning ##########
################################
# Load in trained Q Function
with open('QL1.pkl', 'rb') as f:  
    Qlearning, _, _ = pickle.load(f)
# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
gamma = 1
#gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> set to something sufficiently large so that we don't get IndexError
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
numEpisodes = 100
weeks = list(range(2,39))
QLRewards = np.zeros((numEpisodes,))
QL_WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    QLRewards[i] += get_weekly_pts_data(1)
    QL_WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qlearning, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        QLRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        QL_WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        Qlearning = QLupdateQ(Q=Qlearning, S=int(current_state), Sprime=int(next_state), A=int(current_action), alpha=alpha, R=QL_WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, QLRewards[i]))
    
END = time.time()

print(END-ST)

with open('QL1_1920.pkl', 'wb') as f:  
    pickle.dump([Qlearning, QLRewards, QL_WEEKLY_SCORES], f)  # SAVE THE Tested Q function, Episodic Rewards, and Weekly Rewards

plt.style.use('seaborn')
plt.plot(QLRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("Q-Learning, γ=1.0")
plt.savefig("QL_Testing1920_gamma1.png")

# Use our Q function trained on 100 episodes of the 2018-2019 season using Q-Learning and a $\gamma$ of 0.5

In [None]:
################################
########## Q-Learning ##########
################################
# Load in trained Q Function
with open('QL5.pkl', 'rb') as f:  
    Qlearning, _, _ = pickle.load(f)
# Initialize Algorithm Parameters
alpha = 0.5
epsilon = 0.1
# gamma = 1
gamma = 0.5 # Suggested by Mathews 
ssize = 2000 # number of possible states ==> set to something sufficiently large so that we don't get IndexError
asize = 13 # number of possible actions in each state: 13 now because do nothing + 3 PAs for each of 4 positions
numEpisodes = 100
weeks = list(range(2,39))
QLRewards = np.zeros((numEpisodes,))
QL_WEEKLY_SCORES = np.zeros((numEpisodes,1,38+1))
# Algorithm Functions
def get_action(state, Q, epsilon):
    Q_s = Q[state,:] # Get state-action rewards corresponding to the current state
    if (np.random.uniform() <= epsilon):
        action = np.random.choice(a=list(range(asize))) # Chose action randomily with pr 1-epsilon
    else: # Choose greedy action with pr 1-epsilon
        action = np.random.choice(np.flatnonzero(Q_s == Q_s.max())) 
    return action

def SARSAupdate_Q(Q, S, Sprime, A, Aprime, alpha, R, gamma):
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Q[Sprime,Aprime] - Q[S,A])
    return Q

def QLupdateQ(Q, S, Sprime, A, alpha, R, gamma):
    Qmax = np.max(Q[Sprime, :])
    Q[S,A] = Q[S,A] + alpha*(R+gamma*Qmax - Q[S,A])
    return Q

ST = time.time()

for i in range(numEpisodes):
    episodeST = time.time()
    print("starting episode {}".format(i))
    curTeam, budget = initializeTeam() # initialize the team and budget (state)
    current_state = budget  # initialize S
    QLRewards[i] += get_weekly_pts_data(1)
    QL_WEEKLY_SCORES[i,0,0] += get_weekly_pts_data(1)
    for curWeek in weeks: # For each step in the episode
        current_action = get_action(state=current_state, Q=Qlearning, epsilon=epsilon) # Choose A from S using policy derived from Q
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, next_state = updateSquad(current_action, week=curWeek, team=curTeam, bud=current_state)  # Update squad based on action, transition to new state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        QLRewards[i] += get_weekly_pts_data(curWeek) # Take Action a, observe
        QL_WEEKLY_SCORES[i,0,curWeek] += get_weekly_pts_data(curWeek) # Realize GW results, update your Rewards
        Qlearning = QLupdateQ(Q=Qlearning, S=int(current_state), Sprime=int(next_state), A=int(current_action), alpha=alpha, R=QL_WEEKLY_SCORES[i,0,curWeek], gamma=gamma)
        current_state = int(next_state) # S <- S'
    episodeEND = time.time()
    print("Episode {} took {:.2f} minutes to run. EPISODE PTS = {}".format(i, (episodeEND-episodeST)/60, QLRewards[i]))
    
END = time.time()

print(END-ST)

with open('QL5_1920.pkl', 'wb') as f:  
    pickle.dump([Qlearning, QLRewards, QL_WEEKLY_SCORES], f)  # SAVE THE Tested Q function, Episodic Rewards, and Weekly Rewards

plt.style.use('seaborn')
plt.plot(QLRewards)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("Q-Learning, γ=0.5")
plt.savefig("QL_Testing1920_gamma5.png")

# Choose actions randomly

In [None]:
numSims = 100
action = np.random.choice(a=list(range(13)))
weeks = list(range(2,39))
# weeks = list(range(2,27))
startTime = time.time()
CURRENT_SCORE = np.zeros((numSims,))
WEEKLY_SCORE = []
curTeam, budget = initializeTeam()
WEEKLY_SCORE.append(get_weekly_pts_data(1))
CURRENT_SCORE += get_weekly_pts_data(1)
action_list = []  # Keep track of actions
for i in range(numSims):
    print("starting simulation {}".format(i))
    for curWeek in weeks:
        curTeam, budget = initializeTeam()
        action = np.random.choice(a=list(range(13)))
        action_list.append(action)
        pp = getPlayerPool(team=curTeam) # Start by getting the available player pool
        curTeam = updateXP(week=curWeek) # Get expectations for your current team's points
        curTeam, budget = updateSquad(action, week=curWeek, team=curTeam, bud=budget)  # Update squad based on action, state (budget)
        curTeam['isStarting'] = pick_starters(curTeam, init=False) # Choose your starters once you've updated the squad
        curTeam = selectCaptain()  # Choose the captain once you have the starters
        WEEKLY_SCORE.append(get_weekly_pts_data(curWeek)) # Realize GW results, update your Rewards
        CURRENT_SCORE[i] += get_weekly_pts_data(curWeek)  # Realize GW results, update your Rewards
endTime = time.time()
print("{:.2f} minutes".format((endTime-startTime)/60))
print("End of Season Score: {}".format(CURRENT_SCORE[i]))

with open('RandomAction1920.pkl', 'wb') as f:  
    pickle.dump([CURRENT_SCORE, WEEKLY_SCORE], f)  # SAVE THE Trained Q function, Episodic Rewards, and Weekly Rewards

with open('RandomAction1920.pkl', 'rb') as f:  
    obj0, obj1 = pickle.load(f)

plt.plot(CURRENT_SCORE)
plt.xlabel('Episode')
plt.ylabel("Season Rewards")
plt.title("Random Action Selection: 2019-2020 Season")
plt.savefig("RandomActionSelection100Sims.png")

# Comparison of Results

## Testing Results

In [None]:
with open("QL5_1920_noUpdate.pkl", 'rb') as f: # Q-Learning, gamma=0.5
    _, QL5, _ = pickle.load(f)
    
with open("QL1_1920_noUpdate.pkl", 'rb') as f: # Q-Learning, gamma=1
    _, QL1, _ = pickle.load(f)

with open("SARSA1_1920_noUpdate.pkl", 'rb') as f: # SARSA, gamma=1
    _, sarsa1, _ = pickle.load(f)
    
with open("SARSA5_1920_noUpdate.pkl", 'rb') as f: # SARSA, gamma=0.5
    _, sarsa5, _ = pickle.load(f)

with open("RandomAction1920.pkl", 'rb') as f:  # Taking random actions
    randomAction, _ = pickle.load(f)

In [None]:
plt.figure()
plt.hist(sarsa1)
plt.hist(sarsa5)
plt.hist(QL1)
plt.hist(QL5)
plt.hist(randomAction)

In [None]:
# Get Descriptive Statistics
def get_stats(alg, gamma, data):
    print("Algorithm: {}".format(alg))
    print("γ: {}".format(gamma))
    print("Max: {:.0f}".format(data.max()))
    print("Minimum: {:.0f}".format(data.min()))
    print("Mean: {:.0f}".format(data.mean()))
    print("Variance: {:.0f}".format(np.var(data)))
    print()
    return

In [None]:
get_stats("SARSA", 1.0, sarsa1)
get_stats("SARSA", 0.5, sarsa5)
get_stats("Q-Learning", 1.0, QL1)
get_stats("Q-Learning", 0.5, QL5)
get_stats("Random Actions", 'N/A', randomAction)

In [None]:
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.plot(QL5, label="Q-Learning, γ=0.5",linewidth=2)
plt.plot(QL1, label="Q-Learning, γ=1",linewidth=2)
plt.plot(sarsa5, label="SARSA, γ=0.5",linewidth=2)
plt.plot(sarsa1, label="SARSA, γ=0.5",linewidth=2)
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.legend(loc='best')

In [None]:
plt.style.use('fivethirtyeight')
#plt.plot(pd.DataFrame(QL5).rolling(10).mean(), label="Q-Learning, γ=0.5",linewidth=1.5)
#plt.plot(pd.DataFrame(QL1).rolling(10).mean(), label="Q-Learning, γ=1",linewidth=1.5)
#plt.plot(pd.DataFrame(sarsa5).rolling(10).mean(), label="SARSA, γ=0.5",linewidth=1.5)
#plt.plot(pd.DataFrame(sarsa1).rolling(10).mean(), label="SARSA, γ=0.5",linewidth=1.5)
plt.plot(pd.DataFrame(randomAction).rolling(10).mean(), label="Random Action",linewidth=1.5)

plt.xlabel("Episode")
plt.ylabel("Reward")
plt.legend(loc='best')

# Training Results

In [None]:
with open("QL5.pkl", 'rb') as f: # Q-Learning, gamma=0.5
    _, QL5, _ = pickle.load(f)
    
with open("QL1.pkl", 'rb') as f: # Q-Learning, gamma=1
    _, QL1, _ = pickle.load(f)

with open("SARSA1.pkl", 'rb') as f: # SARSA, gamma=1
    _, sarsa1, _ = pickle.load(f)
    
with open("SARSA5.pkl", 'rb') as f: # SARSA, gamma=0.5
    _, sarsa5, _ = pickle.load(f)

with open("RandomAction.pkl", 'rb') as f:  # Taking random actions
    randomAction, _ = pickle.load(f)

In [None]:
get_stats("SARSA", 1.0, sarsa1)
get_stats("SARSA", 0.5, sarsa5)
get_stats("Q-Learning", 1.0, QL1)
get_stats("Q-Learning", 0.5, QL5)
get_stats("Random Actions", 'N/A', randomAction)

In [None]:
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
#plt.figure()
#plt.ylim((1900,1945))
plt.tight_layout()
plt.gcf().subplots_adjust(bottom=0.15, left=0.15)
plt.ticklabel_format(useOffset=False) #useMathText=False,style='plain'
# plt.plot(pd.DataFrame(QL5).rolling(10).mean(), label="Q-Learning, γ=0.5",linewidth=2)
# plt.plot(pd.DataFrame(QL1).rolling(10).mean(), label="Q-Learning, γ=1",linewidth=2)
plt.plot(pd.DataFrame(sarsa1).rolling(10).mean(), label="SARSA, γ=1.0",linewidth=2)
plt.plot(pd.DataFrame(sarsa5).rolling(10).mean(), label="SARSA, γ=0.5",linewidth=2)
plt.plot(pd.DataFrame(randomAction).rolling(10).mean(), label="Random Action",linewidth=2, color='black')
#plt.title("Comparison of Algorithm Performance during Training")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.legend(loc='best')
#plt.savefig("FORPAPER_SARSATestingPerformanceWithRandomAction.png")
#plt.savefig("FORPAPER_QLearningTestingPerformance.png")