In [8]:
using NBInclude;
@nbinclude("deal_features.ipynb");

In [7]:
function get_human_bid(hand)  # get the strong opening bid if the hand is eligible
    hcp=hcp_of(hand)
    if hcp < 12
        return 1
    end
    dist=distribution_of(hand)
    if is_balanced(hand) && hcp>=20 && hcp<=21
        return findfirst(x->x=="2N",bids)
    elseif hcp>=22
        return findfirst(x->x=="2C",bids)
    elseif dist[1] >= 5 && dist[2] <= dist[1]
        return findfirst(x->x=="1S",bids)
    elseif dist[2] >=5 
        return findfirst(x->x=="1H",bids)
    elseif is_balanced(hand) && hcp>=15 && hcp<=17
        return findfirst(x->x=="1N",bids)
    elseif dist[3] > dist[4]
        return findfirst(x->x=="1D",bids)
    elseif dist[4] > dist[3] || dist[4]==3
        return findfirst(x->x=="1C",bids)
    else
        return findfirst(x->x=="1D",bids)
    end
end

get_human_bid (generic function with 1 method)

In [2]:
function bid!(Q,deal,database_state,database_target,epsilon_greedy;turn_depth=6, epsilon_human=0.0,feature=nothing)
    bidding_hist=zeros(36,1);
    instance=(deal[1],deal[2],bidding_hist);
    current_bid=1;
    loss=[];
    for t in 1:turn_depth
        if t==1
            state=atype(instance[(t-1)%2+1]);
            if rand()<epsilon_human      #get human bid with probability epsilon_human
                action=get_human_bid(instance[(t-1)%2+1])
                bidding_hist[action]=1;
                current_bid=action;
                continue;   
            end
        else
            state=atype(vcat(instance[(t-1)%2+1],bidding_hist));  
        end
        target=determine_target(deal[3][:,(t-1)%2+1],current_bid,feature=feature,hand=instance[t%2+1]); #Algorithm P
        
        #target[1]=0.1; to avoid double pass
        #exploration
        action,q_values=explore(Q[t],state,current_bid,epsilon_greedy);
        
        
        #Data for experience replay
        
        # current bid is pass----- 
        push!(loss,sum((q_values-reshape(target,(length(target),1))).^2))
        if (action== current_bid && t>1)|| t==turn_depth
            target=vcat(deepcopy(deal[3][:,(t-1)%2+1]),target[37:end]); #only change the cost part
            push!(database_state,t,state);
            push!(database_target,t,target);  
            return target[action],loss
            break;
        end
        push!(database_state,t,state);
        push!(database_target,t,target);  
        #Update bidding history
        bidding_hist[action]=1;
        current_bid=action;
    end   
end


function bid_to_the_end(Q,deal_set,number_of_deals)
    database_state=[[],[],[],[],[],[]];
    database_target=[[],[],[],[],[],[]];
    for i in 1:number_of_deals
        bid!(Q,deal_set[rand(1:end)],database_state,database_target,0);
    end
    return database_state, database_target;
end

bid_to_the_end (generic function with 1 method)

In [3]:
function bid(Q,deal;debug=false,turn_depth=6,epsilon_human=0,feature=nothing)
    bidding_hist=zeros(36,1);
    instance=(deal[1],deal[2],bidding_hist);
    q_values_data=[];
    target_data=[];
    current_bid=1;
    for t in 1:turn_depth
        if debug
            println(string(t," bid"));
        end
        if t==1
            state=atype(instance[(t-1)%2+1]);
            if rand()<epsilon_human      #get human bid with probability epsilon_human
                action=get_human_bid(instance[(t-1)%2+1])
                bidding_hist[action]=1;
                current_bid=action;
                continue;   
            end
        else
            state=atype(vcat(instance[(t-1)%2+1],bidding_hist));  
        end
        target=determine_target(deal[3][:,(t-1)%2+1],current_bid,feature=feature,hand=instance[t%2+1]); #Algorithm P
        target=reshape(target,(length(target),1));
        #target[1]=0.1;
        
        #Data for experience replay
        #exploration
        action,q_values=explore(Q[t],state,current_bid,0);
        # current bid is pass----- 
        if (action== current_bid && t>1) || t==turn_depth
            target=vcat(deepcopy(deal[3][:,(t-1)%2+1]),target[37:end]); #only change the cost part
            if debug
                print("Bid"); print("\t"); print("Q value"); print("\t \t"); print("Cost");  print("\t"); println("Loss")
                for i in 1:36
                    print(bids[i]); print("\t"); print(q_values[i]);  print("\t"); print(target[i]); print("\t"); println(q_values[i]-target[i])
                end
            end
            push!(q_values_data,q_values)
            push!(target_data,target)
            break;
        end
        if debug
            print("Bid"); print("\t"); print("Q value"); print("\t \t"); print("Cost");  print("\t"); println("Loss")
            for i in 1:36
                print(bids[i]); print("\t"); print(q_values[i]);  print("\t"); print(target[i]); print("\t"); println(q_values[i]-target[i])
            end
            
        end    
        push!(q_values_data,q_values)
        push!(target_data,target)
        #Update bidding history
        bidding_hist[action]=1;
        current_bid=action;
    end   


    return deal[1],deal[2],bidding_hist,q_values_data,target_data
end

bid (generic function with 1 method)

In [1]:
function sample_bid(Q,deal;debug=false,turn_depth=6)
    n,s,bidding,q_values,target=bid(Q,deal,debug=debug,turn_depth=turn_depth);
    
    for i in 1:length(target)
        q_values[i]=reshape(q_values[i],(36,1));
        target[i]=reshape(target[i],(36,1));
        println("",mean((q_values[i]-(target[i])).^2));
    end
    println(human_readable(n));
    println(human_readable(s));
    for i in 1:36
        if bidding[i]==1
            println(bids[i]);
        end
    end 
end

sample_bid (generic function with 1 method)

In [None]:
function model_accuracy(models,test_set;progress=false,turn_depth=6,epsilon_human=0,feature=nothing)
    accuracies=[]
    ave_imps=[]
    for Q in models
        got_it=0;
        ave_imp=0
        if progress
            iterator= ProgressBar(test_set)
        else
            iterator= test_set
        end
        for deal in iterator
            _,_,bidding_hist,_,targets=bid(Q,deal,turn_depth=turn_depth,epsilon_human=epsilon_human,feature=feature)
            true_costs=targets[end]
            if 1 in bidding_hist
                cost=true_costs[findlast(x->x==1,bidding_hist)]  #find the last bid and the cost of it
            else
                cost=true_costs[1] # the double pass
            end
            if cost == 0
                got_it += 1
            else
                ave_imp += cost*24
            end
            
        end
        println(got_it/length(test_set))
        println(ave_imp/length(test_set))
        push!(accuracies,got_it/length(test_set))
        push!(ave_imps,ave_imp/length(test_set))
    end
    if length(accuracies)==1
        return accuracies[1],ave_imps[1]
    end
    return accuracies,ave_imps
end