# Fantasy NFL Lineup Optimizer
First attempt at getting a working model. 

In [1]:
using POMDPs
using POMDPModelTools
using Random
using Distributions
using POMDPSimulators
using POMDPPolicies
using MCTS
using SARSOP
using Printf
using CSV 
using Plots 
using DataFrames
using LinearAlgebra

rng = Random.GLOBAL_RNG; 
pyplot(); 

HIST_RAND_FILENAME = "rand_results.csv"; 
HIST_MCTS_FILENAME = "mcts_results.csv";

## Global Model Parameters 

First, try to get an extremely basic version of the model running. See SIMPLE MODEL DATA PARAMS for details 

### Real Model Data
Need to write function to read in arrays of all the load/solar, occupancy, etc. from CSV files 

## States, Actions, and MDP Definition
- Data containers representing the state and actions of the FantasyGame
- MDP data container holds all the information needed to define MDP tuple (S,A,T,R)


In [None]:
struct FantasyGameState
    proj::Array{Float64}
    sal::Array{Float64}
    pos::Array{Float64} 
    team::Array{String} 
    inj::Array{String} # Should convert the transition function to sampling from Dirichlet
    week::Int64
end

struct FantasyGameAction
    lineup::Array{Bool}
end

struct FantasyGameMDP <: MDP{FantasyGameState, FantasyGameAction}
    # Define DFS FantasyGameMDP 
    
    # Roster Constrain Params 
    rb_max::Int64
    wr_max::Int64
    qb_max::Int64
    te_max::Int64
    sal_max::Int64
end 

RB_MAX = 2; 
WR_MAX = 2;
QB_MAX = 1;
TE_MAX = 1;
SAL_MAX = 60000; # $60K 

FantasyGameMDP() = FantasyGameMDP(RB_MAX, WR_MAX, QB_MAX, TE_MAX, SAL_MAX)  

## Define gen 
Implement the complete generative models for both the FantasyGameMDP and FantasyGamePOMDP
- State transition model 
- Observation model 
- Reward model 

LOTS of work still needed to refine these... 

In [None]:
# In theory, none of these state transition functions should impact policy 
# because the reward is pretty independent of what the state transition is... 
# Like, the action of our agent has no impact on the next state, so yeah... 
function update_proj(proj) 
    # Add random step sampled from normal dist each week. (this shouldn't impact policy) 
    proj_next = proj + rand(Normal(0,0.05*proj),1) 
    proj_next = max(proj_next, 0) 
    return proj_next 
end

function update_sal(sal) 
    # Add random step sampled from normal dist each week. (this shouldn't impact policy) 
    sal_next = sal + rand(Normal(0,0.05*sal),1) 
    sal_next = max(sal_next, 0) 
    return sal_next 
end

function update_week(week)
    return week+1
end

# TODO: make the update injury function work with arrayed input 
function update_inj(inj)
    if false # Using this to mask the following computations until it can be re-written for array input 
        heal_prob = 0.3 
        inj_prob = 0.03 
        if inj != 0 
            if rand(Binomial(1,heal_prob),1) == 1
                inj = 0 
            end
        else
            if rand(Binomial(1,inj_prob),1) == 1
                inj = "Q" 
            end
        end
    end
    return inj
end

function update_team(team) 
    return team 
end

function update_pos(pos) 
    return pos 
end

# MDP Generative Model 
function POMDPs.gen(m::FantasyGameMDP, s::FantasyGameState, a::FantasyGameAction, rng)
    # Transition Model 
    week = update_week(s.week) 
    proj = update_proj(s.proj) 
    sal = update_sal(s.sal) 
    inj = update_sal(s.inj) 
    team = update_team(s.team) 
    pos = update_pos(s.pos) 
    
    sp = FantasyGameState(proj, sal, pos, team, inj, week) 
    
    # Observation Model 
    # N/A 
    
    # Reward Model 
    r = dot(a.lineup, s.proj) # Raw Projected Score 
    r += count_te(a.lineup, s.pos) > m.te_max ? m.lineup_penalty : 0 
    r += count_qb(a.lineup, s.pos) > m.qb_max ? m.lineup_penalty : 0 
    r += count_rb(a.lineup, s.pos) > m.rb_max ? m.lineup_penalty : 0 
    r += count_wr(a.lineup, s.pos) > m.wr_max ? m.lineup_penalty : 0 
    r += dot(a.lineup, s.sal) > m.sal_max ? m.lineup_penalty : 0 
    
    # create and return a NamedTuple 
    return (sp=sp, r=r) 
end

In [1]:
fg = FantasyGameMDP(); 

LoadError: UndefVarError: FantasyGameMDP not defined

## Step Through Random Policy 

In [None]:

POMDPs.initialstate(m::SmartHomeMDP, rng::MersenneTwister) = SmartHomeState(5, 5, 5, 5, true, 4, 6, 1, 5, 2, 1)  
POMDPs.initialstate_distribution(m::SmartHomeMDP) = SparseCat([SmartHomeState(5, 5, 5, 5, true, 4, 6, 1, 5, 2, 1), SmartHomeState(4, 5, 5, 5, true, 4, 6, 1, 5, 2, 1)], [0.4, 0.6])

# TODO: Enumerate more actions 
POMDPs.actions(m::SmartHomeMDP) = [
    SmartHomeAction(-C_RATE_MAX ,0,0),            SmartHomeAction(0,0,0),            SmartHomeAction(C_RATE_MAX,0,0), 
    SmartHomeAction(-C_RATE_MAX ,SP_ADJ_SIZE,0),  SmartHomeAction(0,SP_ADJ_SIZE,0),  SmartHomeAction(C_RATE_MAX,SP_ADJ_SIZE,0), 
    SmartHomeAction(-C_RATE_MAX ,0,SP_ADJ_SIZE),  SmartHomeAction(0,0,SP_ADJ_SIZE),  SmartHomeAction(C_RATE_MAX,0,SP_ADJ_SIZE), 
    SmartHomeAction(-C_RATE_MAX ,-SP_ADJ_SIZE,0), SmartHomeAction(0,-SP_ADJ_SIZE,0), SmartHomeAction(C_RATE_MAX,-SP_ADJ_SIZE,0), 
    SmartHomeAction(-C_RATE_MAX ,0,-SP_ADJ_SIZE), SmartHomeAction(0,0,-SP_ADJ_SIZE), SmartHomeAction(C_RATE_MAX,1,-SP_ADJ_SIZE)]

POMDPs.discount(m::SmartHomeMDP) = DISCOUNT



In [None]:
rand_policy = RandomPolicy(sh)
iter = 1 
for (s, a, r) in stepthrough(sh, rand_policy, "s,a,r", max_steps=100)
    if iter < SIM_DURATION
        println(string("TOD: ", s.tod, ", SOC: ", s.soc, ", OCC: ", s.occ, ", ODT: ", s.odt, ", HSP/CSP: ", s.hsp, "/", s.csp)) 
    end
    iter += 1
end

## Solve MDP 

Implementing on MonteCarlo Tree Search 

In [None]:
@requirements_info MCTSSolver() SmartHomeMDP()
n_iter = 100000
depth = TOD_RESOLUTION #* 2
ec = 10.0

solver = MCTSSolver(n_iterations=n_iter,
    depth=depth,
    exploration_constant=ec,
    enable_tree_vis=true
)


policy = solve(solver, sh)
state = initialstate(sh, Random.MersenneTwister(4))

a = action(policy, state)



In [None]:
using D3Trees
D3Tree(policy, state, init_expand=2)  # click on the node to expand it

## Evaluate Model 

In [None]:
hist_rand = HistoryRecorder(max_steps=SIM_DURATION)
hist_rand = simulate(hist_rand, sh, rand_policy, state)

println("Random Policy Total discounted reward: $(discounted_reward(hist_rand))")

In [None]:
hist_mcts = HistoryRecorder(max_steps=SIM_DURATION)
hist_mcts = simulate(hist_mcts, sh, policy, state)

println("Monte Carlo Policy Total discounted reward: $(discounted_reward(hist_mcts))")

## Export Simulation Results 

In [None]:
function export_results(hist, filename)
    # Write to file
    open(filename, "w") do io
        @printf(io, "t,TOD,c,dhsp,dcsp,d_hv,d_,soc,rmt,occ,hsp,csp,odt,tou,r\n")
        for (s, a, r, sp) in eachstep(hist, "(s, a, r, sp)")  
            @printf(io, "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", s.t, s.tod, a.c, a.dhsp, a.dcsp, s.d_hv, s.d_, s.soc, s.rmt, s.occ, s.hsp, s.csp, s.odt, s.tou, r)
        end
    end
end

In [None]:
export_results(hist_rand, HIST_RAND_FILENAME) 
export_results(hist_mcts, HIST_MCTS_FILENAME) 