In [92]:
using Knet: Knet, dir, accuracy, progress, sgd, load, save, gc, Param, KnetArray, Data, minibatch, nll, relu, training, dropout
using Statistics
using Plots
using CUDA
using Random
using LinearAlgebra
using Knet

atype = KnetArray{Float32};

In [93]:
bids=["PA",
  "1C", "1D", "1H", "1S", "1N",
  "2C", "2D", "2H", "2S", "2N",
  "3C", "3D", "3H", "3S", "3N",
  "4C", "4D", "4H", "4S", "4N",
  "5C", "5D", "5H", "5S", "5N",
  "6C", "6D", "6H", "6S", "6N",
  "7C", "7D", "7H", "7S", "7N"];

doubled_score=
[[0   ,  100,  300,  500,  800, 1100, 1400, 1700,
2000, 2300, 2600, 2900, 3200, 3500],
  [0   ,  200,  500,  800, 1100, 1400, 1700, 2000,
    2300, 2600, 2900, 3200, 3500, 3800]];


# Random Deals and Hand Features
Hands are represented with 52-bit vectors and bidding history with 36-bit vectors.

In [94]:
## entries:   (1 2 3 4 5 6 7 8 9 10 11 12 13)--> spades 14 15 16 17 18 19 20 21 22 23 24 25 26
## correspnds (2 3 4 5 6 7 8 9 T  J  Q  K  A).. so on

function random_hand(;remaining=ones(52,1)) #input: remaininn deal,  output: a random hand 
    cards_in_deal=[j for j in 1:52 if remaining[j]==1]
    shuffle!(cards_in_deal)
    hand= [ifelse(j in cards_in_deal[1:13],1,0) for j in 1:52];
    return hand
end

function random_deal(hands=[])
    remaining = ones(52,1);
    all_hands=zeros(52,4)
    for i in 1:length(hands)                #write given pre-dealt hands
            remaining = remaining-hands[i];
            all_hands[:,i]=hands[i]
    end
    generated_hands=4-length(hands)
    for i in 1:generated_hands              #generate remaining hands
        hand=random_hand(remaining=remaining)
        remaining = remaining-hand;
        all_hands[:,i+length(hands)]=hand
    end
    
    return all_hands #returns 52x4 matrix each column is a hand, they are in order as N,S,E,W
end



#features

function hcp_of(hand;suit_seperated=false) #calculates the HCP of a hand both total or suit seperated
    points=[1,2,3,4]
    if suit_seperated
        return [dot(points,hand[10:13]),dot(points,hand[23:26]),dot(points,hand[36:39]),dot(points,hand[49:52])]
    end
    honours=hand[10:13]+hand[23:26]+hand[36:39]+hand[49:52]
    return dot(honours,points)
end

function distribution_of(hand) #returns the card number of each suit in the order S,H,D,C
   return [sum(hand[1:13]),sum(hand[14:26]),sum(hand[27:39]),sum(hand[40:52])] 
end

#tools

function human_readable(hand)
    to_cards=["2","3","4","5","6","7","8","9","T","J","Q","K","A"]
    suits=[]
    for i in 1:4
        suit=hand[13*(i-1)+1:13*i]
        cards= [to_cards[j] for j in 13:-1:1 if suit[j]==1]
        push!(suits,join(cards))
    end
    return suits
end


human_readable (generic function with 1 method)

# Deep Q-network Model

In [96]:
struct Chain
    layers
    Chain(layers...) = new(layers)
end
(c::Chain)(x) = (for l in c.layers; x = l(x); end; x)
(c::Chain)(x,y) = loss(c(x),y)
(c::Chain)(d::Data) = mean(c(x,y) for (x,y) in d)

struct Dense
    w;
    b;
    f;
end

Dense(i::Int,o::Int,f=relu) = Dense(param(o,i),param0(o),f)
(l::Dense)(x) = (l.w * x .+ l.b)

dnn_init=Chain(Dense(52,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
dnn=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
println.(summary.((l.w,l.b)) for l in dnn_init.layers);


("128×52 Param{KnetArray{Float32,2}}", "128-element Param{KnetArray{Float32,1}}")
("128×128 Param{KnetArray{Float32,2}}", "128-element Param{KnetArray{Float32,1}}")
("128×128 Param{KnetArray{Float32,2}}", "128-element Param{KnetArray{Float32,1}}")
("36×128 Param{KnetArray{Float32,2}}", "36-element Param{KnetArray{Float32,1}}")


In [97]:
function loss(Q_value,cost)
    return mean((Q_value+cost.-1).^2)/2
end


function (c::Chain)(state,cost)
   return loss(c(state),cost) 
end

function (c::Chain)(d::Data)
   return mean(c(x,y) for (x,y) in data)
end
    

In [98]:
function sgdupdate!(func, args; lr=0.1)
    fval = @diff func(args...)
    for param in params(fval)
        ∇param = grad(fval, param)
        param .-= lr * ∇param
    end
    return value(fval)
end

sgdupdate! (generic function with 1 method)

In [None]:
Q1=Chain(Dense(52,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
Q2=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
Q3=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
Q4=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
Q5=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
Q6=Chain(Dense(52+36,128),Dense(128,128),Dense(128,128),Dense(128,36,identity));
Q=[Q1,Q2,Q3,Q4,Q5,Q6];

# Cost Array Calculation and Double Dummy Analysis (will be added)

In [99]:
function generate_cost(hands_NS,random_trial=5;random_cost=false)
    if random_cost
        return rand(52,1)
    end
    costs=zeros(52,1)
    for i in 1:random_trial
        deal=random_deal(hands)
        #---- generate cost vector here
        
        #-----
        costs +=costs
    end
    return costs./random_trial
end

generate_cost (generic function with 2 methods)

In [None]:
function generate_random_instances(size)
    data=Array{Float64,3}(undef,3,52,size)
    for i in 1:size
        deal=random_deal()
        data[1,:,i]=deal[:,1]
        data[2,:,i]=deal[:,2]
        data[3,:,i]=generate_cost(deal[:,1:2],random_cost=true)
    end
    return data
end
    

In [228]:
#hyper parameters
epsilon_greedy=0.05;
turn_depth=5;
learning_rate=0.1;
batch_size=50;
instance_size=1000;

# Baseline Algorithm

In [249]:
instances = generate_random_instances(instance_size);
costs=[]
iteration_size=1000
for i in 1:iteration_size
    instance= instances[:,:,rand(1:instance_size)]; #choose a random deal
    bidding_hist=zeros(36);
    state=(instance[1,:],instance[2,:],bidding_hist); #not used in baseline just for convenience
    
    for t in 1:turn_depth
        
        if t==1
            last_bid=1
        else
            last_bid=maximum(findall(x-> x==1,bidding_hist)) #the nature of an action
        end
        current_bid=rand(last_bid:36) #randomly bid a contract
        if last_bid == current_bid #if current bid is pass
            break;
        end

        bidding_hist[current_bid]=1;

        state=(instance[1],instance[2],bidding_hist);
    end
    contract=1
    try
        contract=findall(x-> x==1,bidding_hist)[1]
    catch
        contract=1
    end
    cost=instance[3,contract]   # cost of declaring this contract
    push!(costs,cost)
end

mean(costs)   # overall success of the baseline

0.5076760429801829

# Main Learning Algorithm

In [38]:
database=[]
instances = generate_random_instances(instance_size);
instance= instances[rand(1:end)]
bidding_hist=zeros(36,1)
state=(instance[1],instance[2],bidding_hist);


for t in 1:turn_depth

    cost=ones(36,1)*(-100)
    # initialize the Q-network
    if t==1
        Q_values=Q[1](state[t%2+1])
    else
        Q_values=Q[i](vcat(state[t%2+1],state[3]))
    end
    #determine the cost vector 
    
    b=findall(x->x==1,bidding_hist[i])
    current_bid=maximum(b)
    
    
    push!(database,)
    
    #exploration
    
    if rand()>epsilon_greedy
        bids=findmax.(costs)
    else
        for j in 1:instance_size
            bid= rand(current_bids[j]:36)
        end
    end
    
    # current bid is pass ----- how? parallel declerations
    if bid== current_bid
        break
    end
    costs[i,:,:]
    bidding_hist[bid]=1
end

for t in 1:turn_depth
    data=minibatch(states[1],costs[1],batch_size)
    sgdupdate!(Q[t],data,lr=learning_rate)
end


    

LoadError: MethodError: no method matching *(::KnetArray{Float32,2}, ::Array{Float64,1})
Closest candidates are:
  *(::Any, ::Any, !Matched::Any, !Matched::Any...) at operators.jl:538
  *(!Matched::Adjoint{var"#s828",var"#s8281"} where var"#s8281"<:(AbstractArray{T,1} where T) where var"#s828"<:Number, ::AbstractArray{var"#s827",1} where var"#s827"<:Number) at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\LinearAlgebra\src\adjtrans.jl:283
  *(!Matched::Transpose{T,var"#s828"} where var"#s828"<:(AbstractArray{T,1} where T), ::AbstractArray{T,1}) where T<:Real at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.5\LinearAlgebra\src\adjtrans.jl:284
  ...

In [109]:
@doc minibatch

```
minibatch(x, [y], batchsize; shuffle, partial, xtype, ytype, xsize, ysize)
```

Return an iterator of minibatches [(xi,yi)...] given data tensors x, y and batchsize.  

The last dimension of x and y give the number of instances and should be equal. `y` is optional, if omitted a sequence of `xi` will be generated rather than `(xi,yi)` tuples.  Use `repeat(d,n)` for multiple epochs, `Iterators.take(d,n)` for a partial epoch, and `Iterators.cycle(d)` to cycle through the data forever (this can be used with `converge`). If you need the iterator to continue from its last position when stopped early (e.g. by a break in a for loop), use `Iterators.Stateful(d)` (by default the iterator would restart from the beginning).

Keyword arguments:

  * `shuffle=false`: Shuffle the instances every epoch.
  * `partial=false`: If true include the last partial minibatch < batchsize.
  * `xtype=typeof(x)`: Convert xi in minibatches to this type.
  * `ytype=typeof(y)`: Convert yi in minibatches to this type.
  * `xsize=size(x)`: Convert xi in minibatches to this shape (with last dimension adjusted for batchsize).
  * `ysize=size(y)`: Convert yi in minibatches to this shape (with last dimension adjusted for batchsize).
