In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('../../')

In [3]:
from deepLearning import utils

In [4]:
import torch
import torch.nn as nn
import treys
from treys import Evaluator
from treys import Card

# LSTM poker bot

Okay so what are we trying to accomplish here.
We want to build a naive LSTM poker bot. We need to do three things:

- [x] Encode a poker game in a meaningful way

- [x] Develop a useful loss metric for games

- [x] Test many-to-many or many-to-one models for games

Note: the above checkboxes illustrate a CURSORY look at these topics. We will continue to explore them in future games.

## Encoding a poker game:

We're going to approach this by just encoding the cards at each step.
There are 4 discrete betting stages that are worth keeping track of in poker:
- Pre-Flop
- Flop
- Turn
- River

We'll create a `1x4x52` dim tensor to encode all the information in each stage for in one poker game.

This way we can create a training dataframe of `N` examples with shape: `Nx4x52`
Where `4` again represents the discrete betting stages and `52` is the one hot encoded cards.


In [5]:
def make_staged_games(num_games, device = 'cpu', dtype = torch.float, verbose = False):
    to = {'device': device, 'dtype':dtype}
    X = []
    y = []
    for g in range(num_games//2):
        start_time = time.time()
        if g % verbose == 0:
            print("Completed {} in {:2f} seconds".format(verbose, time.time()-start_time))
        p1, p2, board = utils.make_heads_up()
        g1 = torch.stack(
            [
                utils.one_hot_cards(p1, **to), 
                utils.one_hot_cards(board[:3], **to), 
                utils.one_hot_cards([board[3]], **to),
                utils.one_hot_cards([board[4]], **to)
            ]
        )
        g2 = torch.stack(
            [
                utils.one_hot_cards(p2, **to), 
                utils.one_hot_cards(board[:3], **to), 
                utils.one_hot_cards([board[3]], **to),
                utils.one_hot_cards([board[4]], **to)
            ]
        )
        X.append(g1)
        X.append(g2)
        
        s1, s2 = utils.score_heads_up(p1, p2, board)
        
        y.append(torch.tensor(s2, **to)) # if p1 wins append 0 => s2 is 0
        y.append(torch.tensor(s1, **to)) # if p1 loses append 1 => s1 is 1
        
    X = torch.stack(X)
    y = torch.stack(y)
    
    return X, y.to(torch.long)

## Many to One architecture experiments:

We're going to try to use a Many-to-one LSTM to predict our likelihood of winning a hand.

Our approach is as follows:
- LSTM with a hidden dimension of 300 (arbitrary)
- Output transformation with the following properties:
    - Linear layer with input 300 and output 2
    - Softmax for thresholding 
    - First dim of output is likelihood for us to win
    - Second dim is likelihood for opponents to win
    - This allows us to use `CrossEntropyLoss` as a penalty mechanism (Note we'll have to skip the softmax when we use `CrossEntropyLoss`)
    

In [7]:
class simpleLSTM(nn.Module):
    """
    A simpleLSTM for poker
    """
    
    def __init__(self, hidden_size = 300):
        """
        hidden_size: size of hidden dimension of LSTM
        """
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTMCell(52, hidden_size)
        self.output = nn.Linear(hidden_size, 2)
        self.squash = nn.Sigmoid()
#         self.params = []
#         self.params += list(self.lstm.parameters()) + list(self.output.parameters()) + list(self.squash.parameters())
        
    def forward(self, X, squash = False):
        """
        Forward pass of the LSTM
        X: input of shape Nx4x52
        squash: whether or not to squash using softmax
        
        Returns:
            tensor of shape Nx2 representing likelihood to win and likelihood for opponent to win.
            IF squash==True values represent probabilities.
        """
        N, r, _ = X.shape
        
        hand = X[:,0]
        flop = X[:,1]
        turn = X[:,2]
        river = X[:,3]
        
        # now we can pass through:
        hidden, cell = torch.zeros(N, 300, dtype=X.dtype, device=X.device), torch.zeros(N, 300, dtype=X.dtype, device=X.device)
        
        # first round:
        hidden, cell = self.lstm(hand, (hidden,cell))
        
        # second round:
        hidden, cell = self.lstm(flop, (hidden, cell))
        
        # third round:
        hidden, cell = self.lstm(turn, (hidden, cell))
        
        # fourth round:
        hidden, cell = self.lstm(river, (hidden, cell))
        
        #output:
        scores = self.output(hidden)
        
        if squash:
            return self.squash(scores)
        
        return scores

In [8]:
def train_model(X, y, model, epochs = 10, verbose = False):
    """
    Inputs:
        X: training tensor of shape (N,52)
        y: target tensor of shape (N,1)
        model: a torch.nn.Module model
        epochs: number of epochs to train
        verbose: iterations of epochs to print out (1 for all, False for none)
    """
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
    criterion = nn.CrossEntropyLoss()
    
    for e in range(epochs):
        scores = model(X)
        loss = criterion(scores, y)
        with torch.no_grad():
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if verbose and e%verbose == 0:
                print("epoch: {} loss: {:.4f}".format(e,loss.item()))
                
def evaluate_model(X, y, model):
    with torch.no_grad():
        scores = model(X)
        #threshold for predictions
        scores[scores < 0.5] = 0
        scores[scores >= 0.5] = 1
        return (scores == y).sum().item()/y.shape[0]

In [169]:
# WARNING THIS TAKES A LONG TIME
X_train, y_train = make_staged_games(30000, 'cuda')

In [170]:
X_test, y_test = make_staged_games(1000, 'cuda')

In [171]:
model = simpleLSTM().to('cuda')

In [186]:
train_model(X_train, y_train, model, 500, 100)

epoch: 0 loss: 0.5587
epoch: 100 loss: 0.6666
epoch: 200 loss: 0.6477
epoch: 300 loss: 0.6878
epoch: 400 loss: 0.6623


In [187]:
out = model(X_test, True)

In [188]:
out = out.argmax(1)

In [189]:
(out == y_test).sum().item()/y_test.shape[0]

0.629

In [190]:
import pickle

In [192]:
with open('deepLearning/lstmTrain.pkl', 'wb') as f:
    pickle.dump(X_train, f)

In [193]:
with open('deepLearning/lstmTrain_y.pkl', 'wb') as f:
    pickle.dump(y_train, f)

# Constant Loss experiments

We're going to have our model make a many-to-many predictions for each round of the betting.

We're going to arbitrarily set the loss penalty to be: `1,2,3,4` corresponding with each round of betting 

In [6]:
class sequenceLSTM(nn.Module):
    """
    A sequence LSTM for choosing to call per round
    """
    def init_weights(m):
        if type(m) == torch.nn.LSTMCell:
            torch.nn.init.normal_(m.weight_hh)
            torch.nn.init.normal_(m.weight_ih)
            torch.nn.init.normal_(m.bias_hh)
            torch.nn.init.normal_(m.bias_ih)
        elif type(m) != torch.nn.Sigmoid and type(m) != sequenceLSTM:
            torch.nn.init.normal_(m.weight)
            torch.nn.init.normal_(m.bias)
    
    def __init__(self):
        super().__init__()
        self.hidden_size = 300
        self.lstm = nn.LSTMCell(52, self.hidden_size)
        self.output = nn.Linear(self.hidden_size, 2)
        self.squash = nn.Sigmoid()
        self.penalty = [1,2,3,4]
        self.apply(sequenceLSTM.init_weights)
    
    def forward(self, X): 
        """
        X is of shape N, 4, 52
        returns a tensor of shape (4) where 0 indicates staying in and 1 indicates folding
        """
        N, _, _ = X.shape
        hand = X[:, 0]
        flop = X[:, 1]
        turn = X[:, 2]
        river = X[:, 3]
        out = []
        
        # now each of the objects above is of shape Nx52
        hidden, cell = torch.zeros(N, 300, dtype=X.dtype, device=X.device, requires_grad=True), torch.zeros(N, 300, dtype=X.dtype, device=X.device, requires_grad=True)
        
        #pre-flop:
        hidden, cell = self.lstm(hand, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        #flop:
        hidden, cell = self.lstm(flop, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        #turn:
        hidden, cell = self.lstm(turn, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        #river:
        hidden, cell = self.lstm(river, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        return torch.stack(out, 1) #should be of shape Nx4
    
    def calc_loss(self, X, y, mode = 'house'):
        if mode != 'house' and mode != 'player':
            raise Exception("mode must be of type {} or {}".format('house', 'player'))
        output = self.forward(X) # of shape Nx4
#         loss = torch.tensor(0, requires_grad = True, dtype = X.dtype)
        # transform the score checker
#         winners = ((y * -2) + 1).to(torch.float, 'cuda') # can't do this inplace because of the computational graph for autograd
        scoring = torch.tensor([1,2,3,4]).reshape(-1,4).to('cuda')
        scoring = ((y * -2) + 1).to(torch.float).reshape(-1, 1) * scoring
        # now lets fix the output by padding the ones for ordering
#         output[output[:,0]==1] = 1
#         output[output[:,1]==1] = 1
#         output[output[:,2]==1] = 1
#         output = output * -1 + 1
        
        loss = (output*scoring.unsqueeze(2)).sum()
        
        # this part is not differntiable -- reparam with multiplication instead.
#         loss = ((winners[output[:,0]==0].sum() * 1) + 
#                 (winners[output[:,1]==0].sum() * 2) +  
#                 (winners[output[:,2]==0].sum() * 3) + 
#                 (winners[output[:,3]==0].sum() * 4))
        
        return loss

In [7]:
X_train, y_train = utils.make_staged_games(1000, 'cuda')

In [8]:
seq = sequenceLSTM().to('cuda')

In [9]:
def train_sequence(X, y, model, epochs = 10, verbose = False):
    """
    Inputs:
        X: training tensor of shape (N,52)
        y: target tensor of shape (N,1)
        model: a torch.nn.Module model
        epochs: number of epochs to train
        verbose: iterations of epochs to print out (1 for all, False for none)
    """
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
    
    for e in range(epochs):
        loss = model.calc_loss(X, y)
        with torch.no_grad():
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if verbose and e%verbose == 0:
                print("epoch: {} loss: {:.4f}".format(e,loss.item()))

In [10]:
train_sequence(X_train, y_train, seq, 50, True)

epoch: 0 loss: 76.8632
epoch: 1 loss: -1321.6620
epoch: 2 loss: -2138.6997
epoch: 3 loss: -2708.9937
epoch: 4 loss: -3164.9229
epoch: 5 loss: -3581.3811
epoch: 6 loss: -3942.7563
epoch: 7 loss: -4243.6387
epoch: 8 loss: -4532.1318
epoch: 9 loss: -4802.6689
epoch: 10 loss: -5035.4102
epoch: 11 loss: -5244.4590
epoch: 12 loss: -5428.1782
epoch: 13 loss: -5602.4834
epoch: 14 loss: -5761.4717
epoch: 15 loss: -5911.5962
epoch: 16 loss: -6049.0806
epoch: 17 loss: -6176.6885
epoch: 18 loss: -6305.0684
epoch: 19 loss: -6429.3896
epoch: 20 loss: -6543.5283
epoch: 21 loss: -6651.5112
epoch: 22 loss: -6753.8369
epoch: 23 loss: -6859.4717
epoch: 24 loss: -6966.1348
epoch: 25 loss: -7060.6289
epoch: 26 loss: -7137.5117
epoch: 27 loss: -7217.9126
epoch: 28 loss: -7300.5801
epoch: 29 loss: -7374.1094
epoch: 30 loss: -7441.8174
epoch: 31 loss: -7505.8945
epoch: 32 loss: -7570.8799
epoch: 33 loss: -7633.2637
epoch: 34 loss: -7692.1680
epoch: 35 loss: -7746.9033
epoch: 36 loss: -7793.3701
epoch: 37 loss

In [73]:
X_train[0][0].nonzero().reshape(-1)

tensor([29, 51], device='cuda:0')

In [82]:
X_train[:2].nonzero(as_tuple=True)

(tensor([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'),
 tensor([0, 0, 1, 1, 1, 2, 3, 0, 0, 1, 1, 1, 2, 3], device='cuda:0'),
 tensor([29, 51,  8, 13, 27,  6,  4, 38, 48,  8, 13, 27,  6,  4],
        device='cuda:0'))

In [83]:
X_train[:2].shape

torch.Size([2, 4, 52])

In [87]:
X_train[:2].reshape(-1,52).nonzero(as_tuple=True)

(tensor([0, 0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 5, 6, 7], device='cuda:0'),
 tensor([29, 51,  8, 13, 27,  6,  4, 38, 48,  8, 13, 27,  6,  4],
        device='cuda:0'))

In [97]:
X_train[0].nonzero(as_tuple=True)[1].tolist()

[29, 51, 8, 13, 27, 6, 4]

In [18]:
Card.print_pretty_card(utils.unwrap_game(X_train[:5], True)[0])

TypeError: unsupported operand type(s) for >>: 'list' and 'int'

In [22]:
utils.unwrap_game(X_train[:5], True)[0][0]

2131213

In [38]:
game = utils.unwrap_game(X_train,True)

In [40]:
def 

[2131213, 16812055, 147715, 279045, 1057803, 2102541, 268442665]

In [44]:
seq(X_train[:2]).argmax(2)

tensor([[0, 0, 1, 0],
        [1, 0, 0, 0]], device='cuda:0')

In [46]:
Card.print_pretty_cards(game[0])

' [7♣],[T♣],[3♦],[4♦],[6♥],[7♠],[A♠] '

In [83]:
player1, bot, board = utils.make_heads_up()
player1 = utils.make_treys(player1)
board_treys = utils.make_treys(board)


In [58]:
Card.print_pretty_cards(player1)

' [J♥],[8♦] '

In [62]:
print('Cards {} '.format(Card.print_pretty_cards(player1)))

Cards  [J♥],[8♦]  


In [54]:
input("Betting costs {} 1 for YES, other for NO".format(1))

Betting costs 1 1 for YES, other for NO 1


'1'

In [84]:
bot_encoded = torch.stack([utils.one_hot_cards(bot), utils.one_hot_cards(board[:3]), utils.one_hot_cards([board[3]]), utils.one_hot_cards([board[4]])]).unsqueeze(0).to('cuda')

In [87]:
bot_encoded.shape

torch.Size([1, 4, 52])

In [88]:
bot_bets = seq.forward(bot_encoded).argmax(2).squeeze()

In [93]:
bot_bets.argmax(2).squeeze()

tensor([0, 1, 0, 0], device='cuda:0')

In [101]:
board[:3]

['8s', '7s', 'Jc']

In [99]:
Card.print_pretty_card(board[:3])

TypeError: unsupported operand type(s) for >>: 'list' and 'int'

In [117]:
def play_game(score):
    player1, bot, board = utils.make_heads_up()
    bot_treys = utils.make_treys(bot)
    player1 = utils.make_treys(player1)
    board_treys = utils.make_treys(board)
    print("Your Cards:{} ".format(Card.print_pretty_cards(player1)))
    bot_encoded = torch.stack([utils.one_hot_cards(bot), utils.one_hot_cards(board[:3]), utils.one_hot_cards([board[3]]), utils.one_hot_cards([board[4]])]).unsqueeze(0).to('cuda')
    bot_bets = seq.forward(bot_encoded).argmax(2).squeeze()
    bet = input("It costs 1 to call (y/n)")
    if bet != 'y':
        print("You folded and lost 0.")
        return
    if bot_bets[0] == 1:
        print("Bot folded with {} you win 0".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return
    print("Bot calls 1. Pot is 2")
    print("Flop:{}".format(Card.print_pretty_cards(board_treys[:3])))
    # flop bet
    if bot_bets[1] == 0:
        print("Bot raises 2")
    else:
        print("Bot folds with {} you win 1!".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return
    bet = input("It costs 2 to call (y/n)")
    if bet != 'y':
        print("You lost 1 token")
        return
    #turn bet
    print("Turn:{}".format(Card.print_pretty_card(board_treys[3])))
    bet = input("Pot is 6. It is 3 to call (y/n)")
    if bet!= 'y':
        print("You lost 3 tokens")
        return
    if bot_bets[2] == 0:
        print("Bot calls 3. Pot is now 12.")
    else:
        print("Bot folds with {} You win a pot worth 6 tokens.".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return
    #river bet
    print("River:{}".format(Card.print_pretty_card(board_treys[4])))
    if bot_bets[3] == 0:
        print("Bot raises 4. Pot is now 20.")
    else:
        print("Bot folds with You win a pot worth 12 tokens.".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return
    bet = input("It is 4 to call (y/n)")
    if bet != 'y':
        print("You've lost 6 tokens.")
        return    
    e = Evaluator()
    if e.evaluate(board_treys, player1) < e.evaluate(board_treys, bot_treys):
        print("You win a pot of 20!\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
    elif e.evaluate(board_treys, player1) == e.evaluate(board_treys, bot_treys):
        print("You lose a pot of 20.\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
    else:
        print("You push a pot of 20 taking home 10.\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
        
        

In [120]:
play_game(0)

Your Cards: [K♣],[8♣]  


It costs 1 to call (y/n) y


Bot calls 1. Pot is 2
Flop: [K♠],[4♠],[7♦] 
Bot folds with  [K♦],[8♦]  you win 1!
