In [1]:
using Knet: Knet, dir, accuracy, progress, sgd, load, save, gc, Param, KnetArray, Data, minibatch, nll, relu, training, dropout
using Statistics
using CUDA
using Random
using LinearAlgebra
using Knet
using Plots; default(fmt=:png,ls=:auto)
using ProgressBars
using NBInclude

In [2]:
if CUDA.functional()
    atype = KnetArray{Float32};
else
    atype = Array{Float32};
end

KnetArray{Float32,N} where N

# Random Deals and Hand Features
Hands are represented with 52-bit vectors and bidding history with 36-bit vectors.

In [3]:
@nbinclude("deal_features.ipynb")

human_readable (generic function with 1 method)

# Deep Q-network Model

In [4]:
@nbinclude("model.ipynb")


In [5]:
function initialize_model(;turn_depth=6, feature=nothing)
    Q=[]
    d=Dict([("lowest",56),("median",41),("highest",45),(nothing,36)])
    output_size=d[feature]
    Q1=Chain(Dense(52,128),Dense(128,128),Dense(128,128),Dense(128,output_size,identity));
    push!(Q,Q1);
    for i in 2:turn_depth
        push!(Q,Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,output_size,identity)));
    end
    return Q
end

initialize_model (generic function with 1 method)

# Hand Reading From txt


Generate a data instance from a PBN-formatted txt 

In [6]:
@nbinclude("hand_reading.ipynb")
deal_data_dpass= read_deals(["deals_new.txt"],hand_type="double pass");
deal_data_slam= read_deals(["deals_new.txt"],hand_type="slam");
deal_data_all= read_deals(["deals_new.txt"]);

In [None]:
println(length(deal_data_dpass))
println(length(deal_data_slam))
println(length(deal_data_all))


In [12]:
deal_data=load("deals.jld2","deal_data_all");
training_size=100000
test_size=80000;
training_set=deal_data[1:training_size];
test_set=deal_data[training_size+1:training_size+test_size];

# Main Learning Algorithm

In [6]:
@nbinclude("target_determine.ipynb")
@nbinclude("explore.ipynb")
@nbinclude("bid_functions.ipynb")

model_accuracy (generic function with 1 method)

In [14]:
function my_train!(Q,training_set,test_set,file;epochs=10,iteration_size=10000,progress=true,epsilon_greedy=0.05, 
        batch_size=50,turn_depth=6,epsilon_human=0,feature=nothing,all=false)
    accuracies=[]
    ave_imp_loss=[]
    accuracies_test=[]
    ave_imp_loss_test=[]
    model_history=[]
    if progress
        iterator= ProgressBar(1:epochs)
    else
        iterator= 1:epochs;
    end
    
    for i in iterator
        database_target_training=database(turn_depth)
        database_state_training=database(turn_depth)
        if all
            for j in 1:length(training_set)
                bid!(Q,training_set[j],database_state_training,database_target_training,
                epsilon_greedy,turn_depth=turn_depth,epsilon_human=epsilon_human,feature=feature);
            end
        else
            for j in 1:iteration_size
                bid!(Q,training_set[rand(1:end)],database_state_training,database_target_training,
                    epsilon_greedy,turn_depth=turn_depth,epsilon_human=epsilon_human,feature=feature);
            end
        end
        if all
            accuracy,ave_imp=model_accuracy([Q],training_set,turn_depth=turn_depth,
            epsilon_human=epsilon_human,feature=feature);
        else
            accuracy,ave_imp=model_accuracy([Q],training_set[randperm(iteration_size)],turn_depth=turn_depth,
            epsilon_human=epsilon_human,feature=feature);
        end
        push!(accuracies,accuracy);
        push!(ave_imp_loss,ave_imp);
        
        accuracy,ave_imp=model_accuracy([Q],test_set,turn_depth=turn_depth,epsilon_human=epsilon_human,feature=feature);
        push!(accuracies_test,accuracy);
        push!(ave_imp_loss_test,ave_imp);
        push!(model_history,Q);
        #Experience Replay
        for t in 2:turn_depth
            if length(database_state_training.data[t])==0
                continue
            end
            data=minibatch(hcat(database_state_training.data[t]...),hcat(database_target_training.data[t]...),batch_size,shuffle=true);
            if summary(data)[1]=='0'#if no minibatch
            else
                Knet.rmsprop!(Q[t],data);
            end
        end
        
    end
    
    best_model_index=findmin(ave_imp_loss_test)[2];
    best_model=model_history[best_model_index];
    Knet.save(file,"q_networks",Q, "accuracies",(accuracies,accuracies_test),"ave_imp_loss",(ave_imp_loss,ave_imp_loss_test),
     "best_model_index",best_model_index,"best_model",best_model);
    return accuracies, ave_imp_loss, accuracies_test,ave_imp_loss_test,best_model_index,model_history
end

    

my_train! (generic function with 1 method)

In [None]:
Q=initialize_model();
my_train!(Q,training_set,test_set,"model_eh1_eg01.jld2",epochs=200,progress=true,
    epsilon_greedy=0.01,epsilon_human=1);

0.0%┣                                         ┫ 0/200 [00:00<00:-10, -20.0 it/s]0.0119
9.507743999999619
0.0103625
9.525996000004783
0.5%┣▏                                         ┫ 1/200 [01:10<Inf:Inf, 0.0 it/s]0.2594
3.81062399999985
0.2552375
3.8064120000004165
1.0%┣▍                                        ┫ 2/200 [01:46<05:49:17, 0.0 it/s]0.2864
3.5305919999998596
0.2840625
3.5445960000006975
1.5%┣▋                                        ┫ 3/200 [02:20<03:50:14, 0.0 it/s]0.3165
3.1485119999998448
0.3191
3.1258320000005964


In [11]:
Q=load("model_slam.jld2","q_networks")
model_accuracy([Q],test_set,progress=true,turn_depth=12,epsilon_human=1)

100.0%┣█████████████████████████████████┫ 30000/30000 [00:07<00:00, 4126.0 it/s]
0.4660666666666667
3.305024000000082


(0.4660666666666667, 3.305024000000082)