In [148]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import os
os.chdir('../../')

In [3]:
from deepLearning import utils

In [23]:
import torch
import torch.nn as nn
import treys
from treys import Evaluator
from treys import Card
import pickle
from tqdm.notebook import tqdm

# LSTM poker bot

Okay so what are we trying to accomplish here.
We want to build a naive LSTM poker bot. We need to do three things:

- [x] Encode a poker game in a meaningful way

- [x] Develop a useful loss metric for games

- [x] Test many-to-many or many-to-one models for games

Note: the above checkboxes illustrate a CURSORY look at these topics. We will continue to explore them in future games.

## Encoding a poker game:

We're going to approach this by just encoding the cards at each step.
There are 4 discrete betting stages that are worth keeping track of in poker:
- Pre-Flop
- Flop
- Turn
- River

We'll create a `1x4x52` dim tensor to encode all the information in each stage for in one poker game.

This way we can create a training dataframe of `N` examples with shape: `Nx4x52`
Where `4` again represents the discrete betting stages and `52` is the one hot encoded cards.


In [5]:
def make_staged_games(num_games, device = 'cpu', dtype = torch.float, verbose = False):
    to = {'device': device, 'dtype':dtype}
    X = []
    y = []
    for g in range(num_games//2):
        start_time = time.time()
        if g % verbose == 0:
            print("Completed {} in {:2f} seconds".format(verbose, time.time()-start_time))
        p1, p2, board = utils.make_heads_up()
        g1 = torch.stack(
            [
                utils.one_hot_cards(p1, **to), 
                utils.one_hot_cards(board[:3], **to), 
                utils.one_hot_cards([board[3]], **to),
                utils.one_hot_cards([board[4]], **to)
            ]
        )
        g2 = torch.stack(
            [
                utils.one_hot_cards(p2, **to), 
                utils.one_hot_cards(board[:3], **to), 
                utils.one_hot_cards([board[3]], **to),
                utils.one_hot_cards([board[4]], **to)
            ]
        )
        X.append(g1)
        X.append(g2)
        
        s1, s2 = utils.score_heads_up(p1, p2, board)
        
        y.append(torch.tensor(s2, **to)) # if p1 wins append 0 => s2 is 0
        y.append(torch.tensor(s1, **to)) # if p1 loses append 1 => s1 is 1
        
    X = torch.stack(X)
    y = torch.stack(y)
    
    return X, y.to(torch.long)

## Many to One architecture experiments:

We're going to try to use a Many-to-one LSTM to predict our likelihood of winning a hand.

Our approach is as follows:
- LSTM with a hidden dimension of 300 (arbitrary)
- Output transformation with the following properties:
    - Linear layer with input 300 and output 2
    - Softmax for thresholding 
    - First dim of output is likelihood for us to win
    - Second dim is likelihood for opponents to win
    - This allows us to use `CrossEntropyLoss` as a penalty mechanism (Note we'll have to skip the softmax when we use `CrossEntropyLoss`)
    

In [7]:
class simpleLSTM(nn.Module):
    """
    A simpleLSTM for poker
    """
    
    def __init__(self, hidden_size = 300):
        """
        hidden_size: size of hidden dimension of LSTM
        """
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTMCell(52, hidden_size)
        self.output = nn.Linear(hidden_size, 2)
        self.squash = nn.Sigmoid()
#         self.params = []
#         self.params += list(self.lstm.parameters()) + list(self.output.parameters()) + list(self.squash.parameters())
        
    def forward(self, X, squash = False):
        """
        Forward pass of the LSTM
        X: input of shape Nx4x52
        squash: whether or not to squash using softmax
        
        Returns:
            tensor of shape Nx2 representing likelihood to win and likelihood for opponent to win.
            IF squash==True values represent probabilities.
        """
        N, r, _ = X.shape
        
        hand = X[:,0]
        flop = X[:,1]
        turn = X[:,2]
        river = X[:,3]
        
        # now we can pass through:
        hidden, cell = torch.zeros(N, 300, dtype=X.dtype, device=X.device), torch.zeros(N, 300, dtype=X.dtype, device=X.device)
        
        # first round:
        hidden, cell = self.lstm(hand, (hidden,cell))
        
        # second round:
        hidden, cell = self.lstm(flop, (hidden, cell))
        
        # third round:
        hidden, cell = self.lstm(turn, (hidden, cell))
        
        # fourth round:
        hidden, cell = self.lstm(river, (hidden, cell))
        
        #output:
        scores = self.output(hidden)
        
        if squash:
            return self.squash(scores)
        
        return scores

In [8]:
def train_model(X, y, model, epochs = 10, verbose = False):
    """
    Inputs:
        X: training tensor of shape (N,52)
        y: target tensor of shape (N,1)
        model: a torch.nn.Module model
        epochs: number of epochs to train
        verbose: iterations of epochs to print out (1 for all, False for none)
    """
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
    criterion = nn.CrossEntropyLoss()
    
    for e in range(epochs):
        scores = model(X)
        loss = criterion(scores, y)
        with torch.no_grad():
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if verbose and e%verbose == 0:
                print("epoch: {} loss: {:.4f}".format(e,loss.item()))
                
def evaluate_model(X, y, model):
    with torch.no_grad():
        scores = model(X)
        #threshold for predictions
        scores[scores < 0.5] = 0
        scores[scores >= 0.5] = 1
        return (scores == y).sum().item()/y.shape[0]

In [169]:
# WARNING THIS TAKES A LONG TIME
X_train, y_train = make_staged_games(30000, 'cuda')

In [170]:
X_test, y_test = make_staged_games(1000, 'cuda')

In [171]:
model = simpleLSTM().to('cuda')

In [186]:
train_model(X_train, y_train, model, 500, 100)

epoch: 0 loss: 0.5587
epoch: 100 loss: 0.6666
epoch: 200 loss: 0.6477
epoch: 300 loss: 0.6878
epoch: 400 loss: 0.6623


In [187]:
out = model(X_test, True)

In [188]:
out = out.argmax(1)

In [189]:
(out == y_test).sum().item()/y_test.shape[0]

0.629

In [190]:
import pickle

In [192]:
with open('deepLearning/lstmTrain.pkl', 'wb') as f:
    pickle.dump(X_train, f)

In [193]:
with open('deepLearning/lstmTrain_y.pkl', 'wb') as f:
    pickle.dump(y_train, f)

# Constant Loss experiments

We're going to have our model make a many-to-many predictions for each round of the betting.

We're going to arbitrarily set the loss penalty to be: `1,2,3,4` corresponding with each round of betting 

In [93]:
class sequenceLSTM(nn.Module):
    """
    A sequence LSTM for choosing to call per round
    """
    def init_weights(m):
        if type(m) == torch.nn.LSTMCell:
            torch.nn.init.normal_(m.weight_hh)
            torch.nn.init.normal_(m.weight_ih)
            torch.nn.init.normal_(m.bias_hh)
            torch.nn.init.normal_(m.bias_ih)
        elif type(m) != torch.nn.Sigmoid and type(m) != sequenceLSTM:
            torch.nn.init.normal_(m.weight)
            torch.nn.init.normal_(m.bias)
    
    def __init__(self):
        super().__init__()
        self.hidden_size = 300
        self.lstm = nn.LSTMCell(52, self.hidden_size)
        self.output = nn.Linear(self.hidden_size, 2)
        self.squash = nn.Sigmoid()
        self.penalty = [1,2,3,4]
        self.apply(sequenceLSTM.init_weights)
    
    def forward(self, X): 
        """
        X is of shape N, 4, 52
        returns a tensor of shape (4) where 0 indicates staying in and 1 indicates folding
        """
        N, _, _ = X.shape
        hand = X[:, 0]
        flop = X[:, 1] + hand
        turn = X[:, 2] + flop
        river = X[:, 3] + turn
        out = []
        
        # now each of the objects above is of shape Nx52
        hidden, cell = torch.zeros(N, 300, dtype=X.dtype, device=X.device, requires_grad=True), torch.zeros(N, 300, dtype=X.dtype, device=X.device, requires_grad=True)
        
        #pre-flop:
        hidden, cell = self.lstm(hand, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        #flop:
        hidden, cell = self.lstm(flop, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        #turn:
        hidden, cell = self.lstm(turn, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        #river:
        hidden, cell = self.lstm(river, (hidden, cell))
        score = self.squash(self.output(hidden))
        out.append(score)
        
        return torch.stack(out, 1) #should be of shape Nx4
    
    def calc_loss(self, X, y, mode = 'house'):
        if mode != 'house' and mode != 'player':
            raise Exception("mode must be of type {} or {}".format('house', 'player'))
        output = self.forward(X) # of shape Nx4
        scoring = torch.tensor([1,2,3,4]).reshape(-1,4).to('cuda')
        scoring = ((y * -2) + 1).to(torch.float).reshape(-1, 1) * scoring
        output[:,:,0] *= -1
        loss = (output*scoring.unsqueeze(2)).sum()
        
        return loss

In [7]:
X_train, y_train = utils.make_staged_games(1000, 'cuda')

In [209]:
seq = sequenceLSTM().to('cuda')

In [136]:
def train_sequence(X, y, model, epochs = 10, batch_size = 10000, verbose = False):
    """
    Inputs:
        X: training tensor of shape (N,52)
        y: target tensor of shape (N,1)
        model: a torch.nn.Module model
        epochs: number of epochs to train
        verbose: iterations of epochs to print out (1 for all, False for none)
    """
    # shuffle dataset:
    X_train, y_train
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
    optimizer.zero_grad()
    for e in range(epochs):
        loss = model.calc_loss(X, y)
        with torch.no_grad():
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if verbose and e%verbose == 0:
                print("epoch: {} loss: {:.4f}".format(e,loss.item()))

In [138]:
train_sequence(X_train, y_train, seq, 50, True)

epoch: 0 loss: -7659.9150
epoch: 1 loss: -7754.9209
epoch: 2 loss: -7838.3247
epoch: 3 loss: -7895.5728
epoch: 4 loss: -7954.4590
epoch: 5 loss: -7996.0518
epoch: 6 loss: -8037.0791
epoch: 7 loss: -8081.0586
epoch: 8 loss: -8113.9155
epoch: 9 loss: -8139.6133
epoch: 10 loss: -8169.6821
epoch: 11 loss: -8197.6582
epoch: 12 loss: -8225.1846
epoch: 13 loss: -8257.2207
epoch: 14 loss: -8281.8223
epoch: 15 loss: -8305.7480
epoch: 16 loss: -8330.6963
epoch: 17 loss: -8358.5781
epoch: 18 loss: -8381.5312
epoch: 19 loss: -8415.0166
epoch: 20 loss: -8436.6162
epoch: 21 loss: -8461.4336
epoch: 22 loss: -8485.6797
epoch: 23 loss: -8508.7012
epoch: 24 loss: -8537.4785
epoch: 25 loss: -8565.1387
epoch: 26 loss: -8583.5215
epoch: 27 loss: -8600.7129
epoch: 28 loss: -8622.8008
epoch: 29 loss: -8643.5820
epoch: 30 loss: -8657.9766
epoch: 31 loss: -8675.3818
epoch: 32 loss: -8690.1953
epoch: 33 loss: -8709.2559
epoch: 34 loss: -8733.0752
epoch: 35 loss: -8757.4922
epoch: 36 loss: -8777.1172
epoch: 37 l

In [83]:
def play_game(player_score, bot_score):
    player1, bot, board = utils.make_heads_up()
    bot_treys = utils.make_treys(bot)
    player1 = utils.make_treys(player1)
    board_treys = utils.make_treys(board)
    print("Your Cards:{} ".format(Card.print_pretty_cards(player1)))
    bot_encoded = torch.stack([utils.one_hot_cards(bot), utils.one_hot_cards(board[:3]), utils.one_hot_cards([board[3]]), utils.one_hot_cards([board[4]])]).unsqueeze(0).to('cuda')
    bot_bets = seq.forward(bot_encoded).argmax(2).squeeze()
    bet = input("It costs 1 to call (y/n)")
    if bet != 'y':
        print("You folded and lost 0.")
        return (player_score, bot_score)
    if bot_bets[0] == 1:
        print("Bot folded with {} you win 0".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return (player_score, bot_score)
    print("Bot calls 1. Pot is 2")
    print("Flop:{}".format(Card.print_pretty_cards(board_treys[:3])))
    # flop bet
    if bot_bets[1] == 0:
        print("Bot raises 2")
    else:
        print("Bot folds with {} you win 1!".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return (player_score+1, bot_score-1)
    bet = input("It costs 2 to call (y/n)")
    if bet != 'y':
        print("You lost 1 token")
        return (player_score-1, bot_score+1)
    #turn bet
    print("Turn:{}".format(Card.print_pretty_card(board_treys[3])))
    bet = input("Pot is 6. It is 3 to call (y/n)")
    if bet!= 'y':
        print("You lost 3 tokens")
        return (player_score-3, bot_score+3)
    if bot_bets[2] == 0:
        print("Bot calls 3. Pot is now 12.")
    else:
        print("Bot folds with {} You win a pot worth 6 tokens.".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return (player_score+3, bot_score-3)
    #river bet
    print("River:{}".format(Card.print_pretty_card(board_treys[4])))
    if bot_bets[3] == 0:
        print("Bot raises 4. Pot is now 20.")
    else:
        print("Bot folds with {} You win a pot worth 12 tokens.".format(Card.print_pretty_cards(utils.make_treys(bot))))
        return (player_score-6, bot_score+6)
    bet = input("It is 4 to call (y/n)")
    if bet != 'y':
        print("You've lost 6 tokens.")
        return (player_score-6, bot_score+6)
    e = Evaluator()
    if e.evaluate(board_treys, player1) < e.evaluate(board_treys, bot_treys):
        print("You win a pot of 20!\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
        return (player_score+10, bot_score-10)
    elif e.evaluate(board_treys, player1) > e.evaluate(board_treys, bot_treys):
        print("You lose a pot of 20.\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
        return (player_score-10, bot_score+10)
    else:
        print("You push a pot of 20 taking home 10.\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
        return (player_score, bot_score)


In [143]:
play_game(0)

Your Cards: [4♠],[J♥]  


It costs 1 to call (y/n) y


Bot calls 1. Pot is 2
Flop: [4♥],[5♥],[7♣] 
Bot folds with  [3♦],[3♥]  you win 1!


In [77]:
# lets load the bigger training set:
with open('./deepLearning/data/X.pkl', 'rb') as f:
    X = pickle.load(f)
    
with open('./deepLearning/data/y.pkl', 'rb') as f:
    y = pickle.load(f)

In [78]:
X = X.to('cpu')
y = y.to('cpu')

print(X.shape)
print(y.shape)

torch.Size([1000000, 4, 52])
torch.Size([1000000])


In [116]:
# we need to use batchwise trainning here so:
def get_batch(X, y, batch_size):
    mask = torch.randint(0, X.shape[0], (batch_size, 1))
    return X[mask].squeeze(1).to('cuda'), y[mask].squeeze(1).to('cuda')

def train_sequence(X, y, model, epochs = 10, batch_size = 10000, verbose = False, deepVerbose = False):
    """
    Inputs:
        X: training tensor of shape (N,52)
        y: target tensor of shape (N,1)
        model: a torch.nn.Module model
        epochs: number of epochs to train
        verbose: iterations of epochs to print out (1 for all, False for none)
    """
    if not verbose:
        deepVerbose = False
    if verbose:
        print("Training on {} samples across {} batches per epoch".format(X.shape[0], X.shape[0]//batch_size))
    
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
    optimizer.zero_grad()
    for e in tqdm(range(epochs)):
        epoch_loss = 0
        for b in range(X.shape[0]//batch_size):
            X_train, y_train = get_batch(X, y, batch_size)
            loss = model.calc_loss(X_train, y_train)
            with torch.no_grad():
                optimizer.zero_grad()
                loss.backward()
                epoch_loss += loss.item()
                optimizer.step()
                if deepVerbose and b%deepVerbose == 0:
                    print("batch: {} loss: {:.4f}".format(b,loss.item()))
        if verbose and e%verbose == 0:
            print("epoch: {} loss: {:.4f}".format(e,epoch_loss))

In [94]:
seq = sequenceLSTM().to('cuda')

In [117]:
train_sequence(X,y, seq, 100, 50000, 20, False)

Training on 1000000 samples across 20 batches per epoch


  0%|          | 0/100 [00:00<?, ?it/s]

epoch: 0 loss: -4371264.6719


KeyboardInterrupt: 

In [147]:
player1, bot, board = utils.make_heads_up()
bot_treys = utils.make_treys(bot)
player1 = utils.make_treys(player1)
board_treys = utils.make_treys(board)
bot_encoded = torch.stack([utils.one_hot_cards(bot), utils.one_hot_cards(board[:3]), utils.one_hot_cards([board[3]]), utils.one_hot_cards([board[4]])]).unsqueeze(0).to('cuda')
bot_bets = seq.forward(bot_encoded).argmax(2).squeeze()
print('Bot Bets {}'.format(bot_bets))
e = Evaluator()
if e.evaluate(board_treys, player1) < e.evaluate(board_treys, bot_treys):
    print("You win a pot of 20!\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
elif e.evaluate(board_treys, player1) > e.evaluate(board_treys, bot_treys):
    print("You lose a pot of 20.\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))
else:
    print("You push a pot of 20 taking home 10.\nYour hand was:{}\nBot hand was:{}\nBoard was{}".format(Card.print_pretty_cards(player1), Card.print_pretty_cards(bot_treys), Card.print_pretty_cards(board_treys)))


Bot Bets tensor([1, 0, 0, 1], device='cuda:0')
You lose a pot of 20.
Your hand was: [5♦],[7♦] 
Bot hand was: [T♥],[7♣] 
Board was [8♥],[K♥],[9♣],[2♥],[J♣] 


In [124]:
player, bot = 100, 100

In [91]:
player, bot = play_game(player, bot)

Your Cards: [5♣],[J♠]  


It costs 1 to call (y/n) y


Bot calls 1. Pot is 2
Flop: [A♠],[4♠],[A♥] 
Bot raises 2


It costs 2 to call (y/n) y


Turn:[4♦]


Pot is 6. It is 3 to call (y/n) y


Bot calls 3. Pot is now 12.
River:[T♥]
Bot raises 4. Pot is now 20.


It is 4 to call (y/n) y


You win a pot of 20!
Your hand was: [5♣],[J♠] 
Bot hand was: [8♠],[7♦] 
Board was [A♠],[4♠],[A♥],[4♦],[T♥] 


In [92]:
print("{}, {}".format(player, bot))

132, 68


In [138]:
play_game(1,2)

Your Cards: [7♠],[8♠]  


It costs 1 to call (y/n) y


Bot calls 1. Pot is 2
Flop: [4♥],[A♦],[7♦] 
Bot raises 2


It costs 2 to call (y/n) y


Turn:[A♥]


Pot is 6. It is 3 to call (y/n) y


Bot folds with  [T♣],[Q♦]  You win a pot worth 6 tokens.


(4, -1)