# Initialization

In [10]:
println("Loading dependencies")
using StatsBase;
using JSON;
using YAML;
using BenchmarkTools;
using ProgressMeter;
using SQLite;
using DataFrames;
using ThreadsX;
using Flux;
include("game.jl");

Loading dependencies


In [2]:
println("Initializing training with $(Threads.nthreads()) threads")

Initializing training with 8 threads


In [3]:
println("Loading training config from config.yaml")
config = YAML.load_file("config.yaml");
println("Loading model parameters from model.yaml")
model = YAML.load_file("model.yaml");

Loading training config from config.yaml
Loading model parameters from model.yaml


# State Space

In [4]:
if config["statespace"]["generate"]
    println("Generating state space")
    
    println("Saving to $(config["statespace"]["filepath"])")
    
else
    println("Loading state space from $(config["statespace"]["filepath"])")
    STATE_SPACE = Int64[JSON.parsefile(config["statespace"]["filepath"],use_mmap=false)...];
end;

Loading state space from statespace.json


# ETL

In [7]:
training_games_query = """
SELECT 
    games.game_id, winners.username as winner_username, losers.username as loser_username,
    winners.is_bot as winner_is_bot, losers.is_bot as loser_is_bot, COUNT(*)/2 as duration
FROM games
LEFT JOIN players winners
    on games.winner_id = winners.player_id
LEFT JOIN players losers
    on games.loser_id = losers.player_id
RIGHT JOIN breaths 
    on games.game_id = breaths.game_id
WHERE
    (games.game_id>=$(model["data"]["epoch-start"] == -Inf ? 0 : model["data"]["epoch-start"]))
AND
(games.game_id<=$(model["data"]["epoch-end"] == Inf ? time() : model["data"]["epoch-end"]))
AND 
    ((winner_is_bot=0) OR (loser_is_bot=0))
GROUP BY games.game_id
"""
#Replace winner/loser IDs with usernames, and indicate whether each is a bot.
#Right join the breaths table in order to count the duration of each game.
#Games in the training set must have at least one human player, and must occur
#before the epoch time cutoff.

training_breaths_query = """
SELECT
    training_games.game_id, breaths.state, breaths.action, breaths.is_winner,
    (CASE breaths.is_winner
        WHEN 1 THEN training_games.winner_is_bot
        ELSE training_games.loser_is_bot
    END) as is_bot
FROM ($(training_games_query)) training_games
RIGHT JOIN breaths
    on training_games.game_id = breaths.game_id
WHERE (is_bot=0)
"""
#Use a right join on the `training_games` table to ensure we are only using breaths
#from the training set.
;

In [12]:
println("Retrieving gameplay data")

db = SQLite.DB("archived/data/punish_data.db")
#Initialize connection to database.

training_breaths = DBInterface.execute(db,training_breaths_query) |> DataFrame
println("""Data contain $(nrow(training_breaths)) breaths.""")

n_starting_states = sum(1 for state in unique(training_breaths[:,"state"]) if state in STARTING_STATES)
println("""$(n_starting_states) of $(length(STARTING_STATES)) possible starting states ($(
    round(100*n_starting_states/length(STARTING_STATES),digits=2))%).""")

println("""$(length(unique(training_breaths[:,"state"]))) distinct states of $(
    length(STATE_SPACE)) possible states visited ($(
    round(100*length(unique(training_breaths[:,"state"]))/length(STATE_SPACE),digits=2)
    )% of state space).""")

DBInterface.close!(db)

Retrieving gameplay data
Data contain 7226 breaths.


LoadError: UndefVarError: STARTING_STATES not defined

In [31]:
training_state_matrix = vcat([
    transpose(state_int2vector(state)) for state in unique(training_breaths[:,"state"])]...
    #Reverse the digits so the first component is breath number; transpose observations 
    #into row-vectors. Multiply by 1.0 so vectors are float-valued.
)
size(training_states)

(5361, 18)

# PARLESS

In [22]:
parless_strategies = Dict{Int64,Dict{Int64,Float64}}()

if model["parless"]["enabled"]
    println("Performing PARLESS reweighting")  
    
        @showprogress for state in unique(training_breaths[:,"state"])
            action_counts = countmap(
                training_breaths[training_breaths[:,"state"].==state,"action"]
            )
            actions = possible_actions(state)
            dirichlet_posterior = Dict(
                action => get(action_counts,action,0)+model["parless"]["prior-pseudocounts"]/
                    length(actions) for action in actions
            )
            sum_pseudocounts = sum(values(dirichlet_posterior))
            categorical_posterior = Dict(
                action => pseudocounts/sum_pseudocounts 
                for (action,pseudocounts) in dirichlet_posterior
            )
            parless_strategies[state] = categorical_posterior
        end
end

Performing PARLESS reweighting

[32mProgress:   0%|█                                        |  ETA: 0:10:32[39m




[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


In [29]:
training_strategy_matrix = vcat([
    transpose(strategy_dict2vector(parless_strategies[state])) 
    for state in unique(training_breaths[:,"state"])]...
    #Match strategy vectors to their corresponding states.
);

# PAWNN

In [50]:
println("Initializing PAWNN network")

Random.seed!(config["pawnn"]["random-seed"])
pawnn_network = Chain(
    (eval(Meta.parse(layer)) for layer in model["pawnn"]["network-structure"])...
)
loss(x,y) = eval(Meta.parse(model["pawnn"]["loss-metric"]))(pawnn_network(x), y)
optimizer = eval(Meta.parse(model["pawnn"]["optimizer"]));
#Initialize the model.

Initializing PAWNN network


In [53]:
pawnn_network

Chain(
  Dense(18 => 36, relu),                [90m# 684 parameters[39m
  Dropout(0.5),
  Dense(36 => 24, relu),                [90m# 888 parameters[39m
  Dense(24 => 16, relu),                [90m# 400 parameters[39m
  Dense(16 => 11),                      [90m# 187 parameters[39m
  NNlib.softmax,
) [90m                  # Total: 8 arrays, [39m2_159 parameters, 8.965 KiB.

In [51]:
println("Training PAWNN network")

@showprogress for epoch in 1:model["pawnn"]["n-epochs"]
    Flux.train!(
        loss,
        Flux.params(pawnn),
        zip(
            eachrow(training_state_matrix),
            eachrow(training_strategy_matrix)
            #The `eachrow` calls are essentiall; otherwise, `zip` doesn't know
            #what to pair up.
        ), 
        optimizer
    )
end
#Train the model.

Training PAWNN network


LoadError: Can't differentiate foreigncall expression $(Expr(:foreigncall, :(:jl_toplevel_eval_in), Any, svec(Any, Any), 0, :(:ccall), %2, %3)).
You might want to check the Zygote limitations documentation.
https://fluxml.ai/Zygote.jl/latest/limitations


In [27]:
    
    open(CONFIG["neural-networks"]["general-pawnn"]["filepath"],"w") do f
        JSON.print(f,[layer_params for layer_params in Flux.params(general_pawnn_network)])
        #Parameters are formatted as a vector of matrices. Each matrix is serialized as a list
        #of column vectors. 
    end
    save_run_times()
    #Save results.

println("""Run time: $(round(run_times["neural-net-training"]["general-final-model"]/60,digits=2)) min""")
println("""MSE: $(mean(loss.(eachrow(general_training_states),eachrow(general_training_strategies))) )""")

Training PAWNN network


LoadError: UndefVarError: training_strategies not defined

# Transition Mapping

In [173]:
println("Calculating state-action transition probabilities: ");

progress = Progress(length(STATE_SPACE))

transitions = ThreadsX.mapreduce(
    state -> begin next!(progress); return Dict(state=>transitionmap(state)) end,
    merge,
    STATE_SPACE;
    init = Dict{Int64, Dict{Int64, Dict{Int64, Float64}}}()
    #Must specify initial value as an empty dict.
);

Calculating state-action transition probabilities: 


[30m 100%|███████████████████████████████████████████████████| Time: 0:38:35[39m                                                                            08[39m[30m  23%|████████████                                       |  ETA: 0:14:55[39m


# Value Iteration