In [None]:
using Knet: Knet, dir, accuracy, progress, sgd, load, save, gc, Param, KnetArray, Data, minibatch, nll, relu, training, dropout
using Statistics
using CUDA
using Random
using LinearAlgebra
using Knet
using Plots; default(fmt=:png,ls=:auto)
using ProgressBars
using NBInclude
using BSON: @save, @load

In [None]:
if CUDA.functional()
    atype = KnetArray{Float32};
else
    atype = Array{Float32};
end

# Random Deals and Hand Features
Hands are represented with 52-bit vectors and bidding history with 36-bit vectors.

In [None]:
@nbinclude("deal_features.ipynb")

# Deep Q-network Model

In [None]:
@nbinclude("model.ipynb")


In [None]:
function init()
    Q1=Chain(Dense(52,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
    Q2=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
    Q3=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
    Q4=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
    Q5=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
    Q6=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
    Q=[Q1,Q2,Q3,Q4,Q5,Q6];
    return Q
end

In [None]:
#hyper parameters
epsilon_greedy=0.05;
turn_depth=6;
learning_rate=0.1;
batch_size=50;
instance_size=1000;

# Hand Reading From txt


Generate a data instance from a PBN-formatted txt 

In [None]:
@nbinclude("hand_reading.ipynb")

file= open("deals.txt","r");
lines=readlines(file);
deal_data=[]
for deal in lines
    north=PBN_to_vector(deal[18:33])
    south=PBN_to_vector(deal[52:67])
    costs=PBN_to_cost(deal[69:end])
    push!(deal_data,(north,south,costs))

end
l=20000
training_set=deal_data[1:end];
#test_set=deal_data[l+1:end];


# Main Learning Algorithm

In [None]:
@nbinclude("target_determine.ipynb")
@nbinclude("explore.ipynb")
@nbinclude("bid_functions.ipynb")

In [None]:
function train!(Q,training_set;iteration_size=1000,progress=true,epsilon_greedy=0.05, batchsize=50,learning_rate=0.1,periods=10,turn_depth=6)
    push!(history_of_Q,deepcopy(Q));
    if progress
        iterator= ProgressBar(1:iteration_size)
    else
        iterator= 1:iteration_size;
    end
    for i in iterator
        bid!(Q,training_set[rand(1:end)],database_state_training,database_target_training,epsilon_greedy);
        #Experience Replay
        next=[];
        for t in 1:6
            temp=deepcopy(database_state_training[t]);
            if size(temp,1)>2000
                push!(next,temp[size(temp,1)-1000:end]);
            else
                push!(next,temp);
            end
        end
        
        database_state_training.=next;
        next=[];
        
        for t in 1:6
            temp=deepcopy(database_target_training[t]);
            if size(temp,1)>2000
                push!(next,temp[size(temp,1)-1000:end]);
            else
                push!(next,temp);
            end
        end
        database_target_training.=next;
        for t in 1:turn_depth
            copy_database_target=deepcopy(database_target_training[t]);
            copy_database_state=deepcopy(database_state_training[t]);
            data=minibatch(hcat(copy_database_state...),hcat(copy_database_target...),batch_size,shuffle=true);
            if summary(data)[1]=='0'#if no minibatch
            else
                Knet.rmsprop!(Q[t],data);
            end
        end
        if i%periods==0
            println(i)
            push!(history_of_Q,deepcopy(Q));
        end
    end
end

    

In [None]:
Q=[]
push!(Q,Knet.load("models.jld2","q_network1"))
push!(Q,Knet.load("models.jld2","q_network2"))
push!(Q,Knet.load("models.jld2","q_network3"))
push!(Q,Knet.load("models.jld2","q_network4"))
push!(Q,Knet.load("models.jld2","q_network5"))
push!(Q,Knet.load("models.jld2","q_network6"))

In [None]:
database_target_training=[[],[],[],[],[],[]];
database_state_training=[[],[],[],[],[],[]];
history_of_Q=[]
train!(Q,training_set,iteration_size=50000,progress=false,periods=100);
Knet.save("models.jld2","q_network1", Q[1],"q_network2", Q[2],"q_network3", 
    Q[3],"q_network4", Q[4],"q_network5", Q[5],"q_network6", Q[6])

13600


In [None]:
n,s,bidding,q_values,target=bid(Q,training_set[rand(1:end)],debug=true)
println(human_readable(n))
println(human_readable(s))
for i in 1:36
    if bidding[i]==1
        println(bids[i])
    end
end


In [None]:
t=3
plot(Array(-(q_values[t].-1)),label="1-Q value", xlabel= "Bid")
plot!(Array(target[t]),label="Cost", xlabel= "Bid")

# Calculate and Plot the Average Losses of every Epoch

In [None]:
test_losses=[[],[],[],[],[],[]];
train_losses=[[],[],[],[],[],[]];

for epoch in ProgressBar(history_of_Q)
    database_state, database_target= bid_to_the_end(epoch,training_set,100)
    database_state_test, database_target_test= bid_to_the_end(epoch,test_set,100)
    for i in 1:6
        data=minibatch(hcat(database_state[i]...),hcat(database_target[i]...),batch_size,shuffle=true,partial=true);
        push!(train_losses[i],epoch[i](first(data)));
        data=minibatch(hcat(database_state_test[i]...),hcat(database_target_test[i]...),batch_size,shuffle=true,partial=true);
        push!(test_losses[i],epoch[i](first(data)));
        
    end
end

In [None]:
test_losses

In [None]:
# Comparison to MLP shows faster convergence, better generalization
plot(test_losses[1],label="Test 1",xlabel="Epochs",ylabel="Loss")
plot!(train_losses[1],label="Train 1",xlabel="Epochs",ylabel="Loss")
plot!(test_losses[2],label="Test 2",xlabel="Epochs",ylabel="Loss")
plot!(train_losses[2],label="Train 2",xlabel="Epochs",ylabel="Loss")
plot!(test_losses[3],label="Test 3",xlabel="Epochs",ylabel="Loss")
plot!(train_losses[3],label="Train 3",xlabel="Epochs",ylabel="Loss")
plot!(test_losses[4],label="Test 4",xlabel="Epochs",ylabel="Loss")
plot!(train_losses[4],label="Train 4",xlabel="Epochs",ylabel="Loss")
plot!(test_losses[5],label="Test 5",xlabel="Epochs",ylabel="Loss")
plot!(train_losses[5],label="Train 5",xlabel="Epochs",ylabel="Loss")
plot!(test_losses[6],label="Test 6",xlabel="Epochs",ylabel="Loss")
plot!(train_losses[6],label="Train 6",xlabel="Epochs",ylabel="Loss",ylims=(0, 0.5))

# Opening bid with a random hand

In [None]:
example_bids=[]
sample_size=1000
for i in 1:sample_size
    state= deal_data[rand(1:end)]
    current_bid=1
    action=explore(Q[1],atype(state[1]),current_bid,0)
    push!(example_bids,(state[1],action,state[2],state[3]))
end

In [None]:
instance=example_bids[rand(1:end)]
println(human_readable(instance[1]))
println(human_readable(instance[3]))
println(instance[4][:,1])
println(bids[instance[2]])
println(bids[maximum(findall(x->x==0,instance[4][:,1]))])