# A Jupyter Notebook that makes our work clear(er)

This Jupyter notebook will introduce the codes required to showcase our efforts in the model and how to run them.

Note: It may not really be recommended to run all of them due to long execution times.

## The Game of Singaporean Bridge

The code would look too long to fit in so we imported it from game.py

In [69]:
from game import Bridge
bridge_example = Bridge(0)

In [70]:
bridge_example.cards

[[1, 8],
 [1, 10],
 [2, 3],
 [2, 7],
 [2, 9],
 [2, 11],
 [3, 4],
 [3, 5],
 [4, 5],
 [4, 6],
 [4, 8],
 [4, 12],
 [4, 13]]

## Comparison with other models

### Gnoh's Algorithm

We estimate the win rate as the bidder to be around 42%.

In [71]:
import GnohChengYi

In [72]:
GnohChengYi.run_trials(10000)

declarer win/almost win rate: 0.419


### Random Algorithm, with some of Gnoh's logic

In [83]:
import random

def check_reshuffle(bridges):
    for bridge in bridges:
        if bridge.check_reshuffle():
            return True
    return False

def random_bidding(game, n_pass):

    OUTPUT_MAP = [
                    [0, 0],
                    [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], 
                    [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], 
                    [3, 1], [3, 2], [3, 3], [3, 4], [3, 5], 
                    [4, 1], [4, 2], [4, 3], [4, 4], [4, 5], 
                    [5, 1], [5, 2], [5, 3], [5, 4], [5, 5], 
                    [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], 
                    [7, 1], [7, 2], [7, 3], [7, 4], [7, 5]
                                                            ]

    id = 0
    if game.last_number > 0:
        id = game.last_suit + (game.last_number-1) * 5

    bids = [[0,0] for _ in range(n_pass)]
    for i in range(4):
        if id+i+1 >= 36: 
            break
        else: 
            bids.append(OUTPUT_MAP[id+i+1])

    return bids[random.randrange(len(bids))]

def smart_bidding(game):
    HCP = 0
    sl = [0,0,0,0]

    for t in game.cards:
        HCP += max(0,t[1]-9)
        sl[t[0]-1] += 1

    maxNum = None
    suit = None
    maxBid = []

    valid = []

    for i in range(1,8):
        for j in range(1,5):
            if i > Bridge.last_number:
                valid.append([i,j])
            elif i == Bridge.last_number and j > Bridge.last_suit:
                valid.append([i,j])

    maxL, minL = max(sl), min(sl)
    if maxL <= 4 and minL >= 2: # Try No Trump
        maxNum = round(0.25 * HCP - 1.75)
        maxNum = min(maxNum, 7)
        suit = 5
    else:
        for s in (3,2,1,0):
            if sl[s] == maxL:
                suit = s
                break
        maxNum = round(0.23 * HCP + 0.70 * maxL - 4.39)
        maxNum = min(maxNum, 7)
        if maxNum > 0: maxBid = [maxNum, suit]
        else: maxBid = [0,0]
    
    if maxBid == [0,0] or maxBid not in valid: return [0,0]
    if Bridge.last_suit == suit: return [0,0]
    for bid in valid:
        if bid[1] == suit: return bid


def random_calling(game):
    not_in_hand = []
    for i in range(1,5):
        for j in range(1,14):
            if [i,j] not in game.cards:
                not_in_hand.append([i,j])
    return not_in_hand[random.randrange(39)]

def smart_calling(game):
    x = Bridge.last_suit
    c = game.cards

    if x < 5:
        if [x,13] not in c:
            return [x,13]
        elif [x,12] not in c: 
            return [x,12]
        elif [x,11] not in c: 
            return [x,11]
        else:
            for card in ([4,13],[3,13],[2,13],[1,13],[4,12],[3,12],[2,12],[1,12],[4,11],[3,11],[2,11],[1,11]):
                if card not in c: return card
    else:
        for card in ([4,13],[3,13],[2,13],[1,13],[4,12],[3,12],[2,12],[1,12],[4,11],[3,11],[2,11],[1,11]):
            if card not in c: return card

def random_playing(game):
    valid = []
    for card in game.cards:
        if game.valid_card_play(card):
            valid.append(card)
    return valid[random.randrange(len(valid))]

def run_random_trials(num_games=10000, smart_bid=False, smart_call=False, printing=False, print_res=False, n_pass=4):
    bids, total = {}, {}

    game_cnt = 0
    bidder_win_cnt = 0

    for i in range(1,8):
        for j in range(1,6):
            bids[(i,j)] = 0
            total[(i,j)] = 0


    while game_cnt < num_games:

        next_player = 0
        bridges     = [Bridge(i) for i in range(4)]

        # If any of the players can reshuffle, start a new game
        if check_reshuffle(bridges): continue

        # Execute the bidding phase

        if printing: print('Bidding Phase:')

        while Bridge.current_phase == Bridge.BID_PHASE:
            bridge  = bridges[next_player]

            if smart_bid:
                move = smart_bidding(bridge)
                if printing: print(next_player,move)
                r, d, next_player = bridge.play_step(move)
            else:
                move = random_bidding(bridge,n_pass)
                if printing: print(next_player,move)
                r, d, next_player = bridge.play_step(move)

        
        # If everyone passes, start a new game
        if Bridge.all_passed: continue
        
        if printing:
            print('Final bid:')

            print(Bridge.last_number, Bridge.last_suit)
            print()
            
            print('Player cards, starting from bidder:')

            print(bridges[(Bridge.bidder_num)%4].cards)
            print(bridges[(Bridge.bidder_num + 1)%4].cards)
            print(bridges[(Bridge.bidder_num + 2)%4].cards)
            print(bridges[(Bridge.bidder_num + 3)%4].cards)

        # Partner calling phase

        # Run until the bidder makes a valid call

        bridge  = bridges[next_player]

        if smart_call:
            move = smart_calling(bridge)
            r, d, next_player = bridge.play_step(move)
            if printing:
                print('Partner card:')
                print(Bridge.partner_card)
        else:
            move = random_calling(bridge)
            r, d, next_player = bridge.play_step(move)
            if printing:
                print('Partner card:')
                print(Bridge.partner_card)

        # For other players to check if they are the partner
        bridges[(Bridge.bidder_num + 1)%4].play_step()
        bridges[(Bridge.bidder_num + 2)%4].play_step()
        bridges[(Bridge.bidder_num + 3)%4].play_step()


        # Execute the card playing phase

        if printing: print('Card Playing Phase:')

        suit = ['Club','Diam','Heart','Spade']
        num  = ['2','3','4','5','6','7','8','9','10','J','Q','K','A']

        while Bridge.current_phase == Bridge.PLAY_PHASE:
            bridge  = bridges[next_player]

            move = random_playing(bridge)

            if printing: 
                if Bridge.bidder_num == next_player:
                    print('Bidder',num[move[1]-1],suit[move[0]-1])
                elif Bridge.bidder_lst[next_player] == 1:
                    print('Partner',num[move[1]-1],suit[move[0]-1])
                else:
                    print('Against',num[move[1]-1],suit[move[0]-1])

            r, d, next_player = bridge.play_step(move)


        game_cnt += 1
        total[(Bridge.bid_number,Bridge.bid_suit)] += 1
        if printing:
            print('Game',game_cnt)
            print('Number:',Bridge.bid_number,', Suit:',Bridge.bid_suit)
        
        if Bridge.bidder_sets >= 6 + Bridge.bid_number:
            if printing: print(Bridge.bidder_sets,'Bidder win')
            bidder_win_cnt += 1
            bids[(Bridge.bid_number,Bridge.bid_suit)] += 1
        else:
            if printing: print(Bridge.bidder_sets,'Bidder lose')



    print('bidder win rate:',bidder_win_cnt/game_cnt)

    if print_res:
        print(bids)
        print(total)


        prop = {}

        for i in range(1,8):
            for j in range(1,6):
                if total[(i,j)] != 0:
                    prop[(i,j)] = bids[(i,j)]/total[(i,j)]
                else:
                    prop[(i,j)] = 'NIL'

        print(prop)

In [86]:
for n_pass in [1,2,4,6,8,12,16]:
    run_random_trials(n_pass=n_pass)

bidder win rate: 0.0163
bidder win rate: 0.0661
bidder win rate: 0.2015
bidder win rate: 0.2844
bidder win rate: 0.3379
bidder win rate: 0.3989
bidder win rate: 0.4241


### Random Bidding and Playing, Gnoh's Calling

In [87]:
for n_pass in [1,2,4,6,8,12,16]:
    run_random_trials(smart_call=True,n_pass=n_pass)

bidder win rate: 0.022
bidder win rate: 0.098
bidder win rate: 0.2625
bidder win rate: 0.3761
bidder win rate: 0.4488
bidder win rate: 0.5232
bidder win rate: 0.5494


### Random Calling and Playing, Gnoh's Bidding

In [84]:
run_random_trials(smart_bid=True)

bidder win rate: 0.3751


### Random Playing, Gnoh's Bidding and Calling

In [85]:
run_random_trials(smart_bid=True, smart_call=True)

bidder win rate: 0.5015


## Our Models

We have selected 2 models which we feel would be suitable to present.

In [88]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Linear_QNet1(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super().__init__()
        self.linear1 = nn.Linear(input_size, hidden_size1)
        self.linear2 = nn.Linear(hidden_size1, hidden_size2)
        self.linear3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

class Linear_QNet2(nn.Module):
    def __init__(self, input_size, hidden_size1, output_size):
        super().__init__()
        self.linear1 = nn.Linear(input_size, hidden_size1)
        self.linear2 = nn.Linear(hidden_size1, output_size)

    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x

class QTrainer:
    def __init__(self, model, lr, gamma):
        self.lr = lr
        self.gamma = gamma
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def train_step(self, state, action, reward, next_state, done):
        state = torch.tensor(state, dtype=torch.float)
        next_state = torch.tensor(next_state, dtype=torch.float)
        action = torch.tensor(action, dtype=torch.long)
        reward = torch.tensor(reward, dtype=torch.float)
        # (n, x)

        if len(state.shape) == 1:
            # (1, x)
            state = torch.unsqueeze(state, 0)
            next_state = torch.unsqueeze(next_state, 0)
            action = torch.unsqueeze(action, 0)
            reward = torch.unsqueeze(reward, 0)
            done = (done, )

        # 1: predicted Q values with current state
        pred = self.model(state)

        target = pred.clone()
        for idx in range(len(done)):
            Q_new = reward[idx]
            if not done[idx]:
                Q_new = reward[idx] + self.gamma * torch.max(self.model(next_state[idx]))

            target[idx][torch.argmax(action[idx]).item()] = Q_new
    
        # 2: Q_new = r + y * max(next_predicted Q value) -> only do this if not done
        # pred.clone()
        # preds[argmax(action)] = Q_new
        self.optimizer.zero_grad()
        loss = self.criterion(target, pred)
        loss.backward()

        self.optimizer.step()

### Training all 3 phases

In [112]:
import numpy as np
from collections import deque

BATCH_SIZE = 2500
MAX_MEMORY = 100000
LR = 0.001

class Agent:
    def __init__(self):
        self.epsilon = 1
        self.eps_min = 0.01
        self.eps_dec = 0.00005
        self.gamma   = 0.9
        self.memory  = deque(maxlen=MAX_MEMORY)

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def remember(self, state, action, reward, state_, done):
        self.memory.append((state, action, reward, state_, done))

class BiddingAgent(Agent):
    OUTPUT_MAP = [
                    [0, 0],
                    [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], 
                    [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], 
                    [3, 1], [3, 2], [3, 3], [3, 4], [3, 5], 
                    [4, 1], [4, 2], [4, 3], [4, 4], [4, 5], 
                    [5, 1], [5, 2], [5, 3], [5, 4], [5, 5], 
                    [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], 
                    [7, 1], [7, 2], [7, 3], [7, 4], [7, 5]
                                                            ]

    def __init__(self):
        super().__init__()
        self.model   = Linear_QNet1(43,65,65,36)
    
    def get_state(self, game):
        state = []

        cards = game.cards
        for card in cards:
            state.append(card[0])
            state.append(card[1])

        suits_bid = Bridge.suits_bid
        for i in range(1,4):
            id = (game.player_num+i)%4
            state.extend(suits_bid[id])
        
        state.extend([Bridge.last_number, Bridge.last_suit])

        return state


    def get_action(self, state, game, n_pass):
        if np.random.random() > self.epsilon:
            s = torch.tensor(state)
            s = s.type(torch.float32)
            x = self.model(s)
            move = torch.argmax(x).item()
            if self.epsilon > self.eps_min: self.epsilon -= self.eps_dec
            return BiddingAgent.OUTPUT_MAP[move]
        else:
            if self.epsilon > self.eps_min: self.epsilon -= self.eps_dec
            return self.explore(game, n_pass) 

    def explore(self, game, n_pass):
        id = 0
        if game.last_number > 0:
            id = game.last_suit + (game.last_number-1) * 5

        bids = [[0,0] for _ in range(n_pass)]
        for i in range(4):
            if id+i+1 >= len(BiddingAgent.OUTPUT_MAP): 
                break
            else: 
                bids.append(BiddingAgent.OUTPUT_MAP[id+i+1])

        return bids[random.randrange(len(bids))]
    
    def load_state(self, checkpoint):
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.eval()             

class CallingAgent(Agent):
    OUTPUT_MAP = [
                    [1, 11], [1, 12], [1, 13], 
                    [2, 11], [2, 12], [2, 13],
                    [3, 11], [3, 12], [3, 13],
                    [4, 11], [4, 12], [4, 13]
                                                ]

    def __init__(self):
        super().__init__()
        self.model   = Linear_QNet1(42,41,41,12)
    
    def get_state(self, game):
        state = []

        cards = game.cards
        for card in cards:
            state.extend(card)

        suits_bid = Bridge.suits_bid
        for i in range(1,4):
            id = (game.player_num+i)%4
            state.extend(suits_bid[id])
        
        state.append(Bridge.last_suit)

        return state

    def get_action(self, state, game):
        if np.random.random() > self.epsilon:
            s = torch.tensor(state)
            s = s.type(torch.float32)
            x = self.model(s)
            move = torch.argmax(x).item()
            if self.epsilon > self.eps_min: self.epsilon -= self.eps_dec
            return CallingAgent.OUTPUT_MAP[move]
        else:
            if self.epsilon > self.eps_min: self.epsilon -= self.eps_dec
            return self.explore(game)
    
    def explore(self, game):
        called = False
        while not called:
            call = random.choice(CallingAgent.OUTPUT_MAP)
            if call not in game.cards:
                called = True
                return call

    def load_state(self, checkpoint):
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.eval()


class PlayingAgent(Agent):

    def __init__(self):
        super().__init__()
        self.model   = Linear_QNet1(104,82,82,13)
    
    def get_state(self, game):
        state = []

        for i in range(13):
            if game.org_cards[i] not in game.cards:
                state.extend([0, 0])
            else:
                state.extend(game.org_cards[i])

        suits_bid = Bridge.suits_bid
        for i in range(1,4):
            id = (game.player_num+i)%4
            state.extend(suits_bid[id])
        
        state.append(Bridge.last_suit)

        state.extend(game.partner_card)

        # partner
        if game.bidder_side: state.append(1)
        else: state.append(0)

        # last 3 cards played
        past_cards = Bridge.past_cards
        if len(past_cards) == 3:
            for c in past_cards:
                state.extend(c)
        else:
            non_cards = 3-len(past_cards)
            for tuple in past_cards:
                state.extend(tuple)
            for i in range(non_cards): 
                    state.append(-1)
                    state.append(-1)
        
        # trump broken
        if Bridge.trump_broken: state.append(1)
        else: state.append(0)

        played = [0]*52
        for c in Bridge.cards_played:
            played[(c[0]-1)*13+(c[1]-1)] = 1
        state.extend(played)

        return state

    def get_action(self, state, game):
        if np.random.random() > self.epsilon:
            s = torch.tensor(state)
            s = s.type(torch.float32)
            x = self.model(s)
            move = torch.argmax(x).item()
            if self.epsilon > self.eps_min: self.epsilon -= self.eps_dec
            return game.org_cards[move]
        else:
            if self.epsilon > self.eps_min: self.epsilon -= self.eps_dec
            return self.explore(game)
    
    def explore(self, game):
        valid = []
        for card in game.cards:
            if game.valid_card_play(card):
                valid.append(card)
        return valid[random.randrange(len(valid))]

    def load_state(self, checkpoint):
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.eval()                                  

In [113]:
def run_3_dqn(num_games=100,printing=False,print_res=False,n_pass=4):
    agents      = [BiddingAgent(), CallingAgent(), PlayingAgent()]
    bridges     = [Bridge(i) for i in range(4)]

    for i in range(3):
        checkpoint = torch.load('AprilFoolsModel_Agent'+str(i)+'.pt')
        agents[i].load_state(checkpoint)
        agents[i].epsilon = 0.01

    bids = {}
    tot = {}

    for i in range(1,8):
        for j in range(1,6):
            bids[(i,j)] = 0
            tot[(i,j)] = 0


    def check_reshuffle():
        for bridge in bridges:
            if bridge.check_reshuffle():
                return True
        return False

    game_cnt = 0
    bidder_win_cnt = 0

    while game_cnt < num_games: # game_cnt < NUMGAMES
        bridges = [Bridge(i) for i in range(4)]

        next_player = game_cnt % 4


        # If any of the players can reshuffle, start a new game
        if check_reshuffle(): continue

        # Execute the bidding phase

        if printing: print('Bidding Phase:')

        while Bridge.current_phase == Bridge.BID_PHASE:

            bridge  = bridges[next_player]

            state   = agents[0].get_state(bridge)

            move = agents[0].get_action(state, bridge, n_pass)

            if printing: print(next_player,move)
            
            reward, done, next_player = bridge.play_step(move)


        # If everyone passes, start a new game
        if Bridge.all_passed: continue
        
        if printing:
            print('Final bid:')

            print(Bridge.last_number, Bridge.last_suit)
            print()
            
            print('Player cards, starting from bidder:')

            print(bridges[(Bridge.bidder_num)%4].cards)
            print(bridges[(Bridge.bidder_num + 1)%4].cards)
            print(bridges[(Bridge.bidder_num + 2)%4].cards)
            print(bridges[(Bridge.bidder_num + 3)%4].cards)

        # Partner calling phase

        # Run until the bidder makes a valid call
        reward=12345
        while reward != 0:
            bridge  = bridges[next_player]

            state   = agents[1].get_state(bridge)
            
            move = agents[1].get_action(state, bridge)

            reward, done, next_player = bridge.play_step(move)


        if printing:
            print('Partner card:')
            print(Bridge.partner_card)

        # For other players to check if they are the partner
        bridges[(Bridge.bidder_num + 1)%4].play_step()
        bridges[(Bridge.bidder_num + 2)%4].play_step()
        bridges[(Bridge.bidder_num + 3)%4].play_step()


        # Execute the card playing phase
        if printing: print('Card Playing Phase:')

        suit = ['Club','Diam','Heart','Spade']
        num  = ['2','3','4','5','6','7','8','9','10','J','Q','K','A']

        while Bridge.current_phase == Bridge.PLAY_PHASE:
            bridge  = bridges[next_player]

            state   = agents[2].get_state(bridge)
            
            move = agents[2].get_action(state, bridge)

            if printing: 
                if Bridge.bidder_num == next_player:
                    print('Bidder',num[move[1]-1],suit[move[0]-1])
                elif Bridge.bidder_lst[next_player] == 1:
                    print('Partner',num[move[1]-1],suit[move[0]-1])
                else:
                    print('Against',num[move[1]-1],suit[move[0]-1])

            reward, done, next_player = bridge.play_step(move)
        
        # Delegate rewards to agents

        game_cnt += 1
        tot[(Bridge.bid_number,Bridge.bid_suit)] += 1
        if printing:
            print('Game',game_cnt)
            print('Number:',Bridge.bid_number,', Suit:',Bridge.bid_suit)
        if Bridge.bidder_sets >= 6 + Bridge.bid_number:
            if printing: print(Bridge.bidder_sets,'Bidder win')
            bidder_win_cnt += 1
            bids[(Bridge.bid_number,Bridge.bid_suit)] += 1
        else:
            if printing: print(Bridge.bidder_sets,'Bidder lose')

        if game_cnt%10 == 0:
            print(game_cnt/1,'% done')

    print('bidder win rate:',bidder_win_cnt/game_cnt)

    if print_res:
        print(bids)
        print(tot)


        prop = {}

        for i in range(1,8):
            for j in range(1,6):
                if tot[(i,j)] != 0:
                    prop[(i,j)] = bids[(i,j)]/tot[(i,j)]
                else:
                    prop[(i,j)] = 'NIL'

        print(prop)

In [114]:
for n_pass in [1,2,4,6,8,12,16]:
    run_3_dqn(n_pass=n_pass)

10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.05
10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.08
10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.24
10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.34
10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.42
10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.54
10.0 % done
20.0 % done
30.0 % done
40.0 % done
50.0 % done
60.0 % done
70.0 % done
80.0 % done
90.0 % done
100.0 % done
bidder win rate: 0.59

### Training bidding and playing with Gnoh's calling