In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('../../')

In [19]:
from deepLearning import utils

In [20]:
import torch
import torch.nn as nn
import treys
from treys import Evaluator
from treys import Card

# LSTM poker bot

Okay so what are we trying to accomplish here.
We want to build a naive LSTM poker bot. We need to do three things:

- [ ] Encode a poker game in a meaningful way

- [ ] Develop a useful loss metric for games

- [ ] Test many-to-many or many-to-one models for games

## Encoding a poker game:

We're going to approach this by just encoding the cards at each step.
There are 4 discrete betting stages that are worth keeping track of in poker:
- Pre-Flop
- Flop
- Turn
- River

We'll create a `1x4x52` dim tensor to encode all the information in each stage for in one poker game.

This way we can create a training dataframe of `N` examples with shape: `Nx4x52`
Where `4` again represents the discrete betting stages and `52` is the one hot encoded cards.


In [84]:
def make_staged_games(num_games, device = 'cpu', dtype = torch.float):
    to = {'device': device, 'dtype':dtype}
    X = []
    y = []
    for g in range(num_games//2):
        p1, p2, board = utils.make_heads_up()
        g1 = torch.stack(
            [
                utils.one_hot_cards(p1, **to), 
                utils.one_hot_cards(board[:3], **to), 
                utils.one_hot_cards([board[3]], **to),
                utils.one_hot_cards([board[4]], **to)
            ]
        )
        g2 = torch.stack(
            [
                utils.one_hot_cards(p2, **to), 
                utils.one_hot_cards(board[:3], **to), 
                utils.one_hot_cards([board[3]], **to),
                utils.one_hot_cards([board[4]], **to)
            ]
        )
        X.append(g1)
        X.append(g2)
        
        s1, s2 = utils.score_heads_up(p1, p2, board)
        
        y.append(torch.tensor(s2, **to)) # if p1 wins append 0 => s2 is 0
        y.append(torch.tensor(s1, **to)) # if p1 loses append 1 => s1 is 1
        
    X = torch.stack(X)
    y = torch.stack(y)
    
    return X, y.to(torch.long)

## Many to One architecture experiments:

We're going to try to use a Many-to-one LSTM to predict our likelihood of winning a hand.

Our approach is as follows:
- LSTM with a hidden dimension of 300 (arbitrary)
- Output transformation with the following properties:
    - Linear layer with input 300 and output 2
    - Softmax for thresholding 
    - First dim of output is likelihood for us to win
    - Second dim is likelihood for opponents to win
    - This allows us to use `CrossEntropyLoss` as a penalty mechanism (Note we'll have to skip the softmax when we use `CrossEntropyLoss`)
    

In [48]:
class simpleLSTM(nn.Module):
    """
    A simpleLSTM for poker
    """
    
    def __init__(self, hidden_size = 300):
        """
        hidden_size: size of hidden dimension of LSTM
        """
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTMCell(52, hidden_size)
        self.output = nn.Linear(hidden_size, 2)
        self.squash = nn.Sigmoid()
#         self.params = []
#         self.params += list(self.lstm.parameters()) + list(self.output.parameters()) + list(self.squash.parameters())
        
    def forward(self, X, squash = False):
        """
        Forward pass of the LSTM
        X: input of shape Nx4x52
        squash: whether or not to squash using softmax
        
        Returns:
            tensor of shape Nx2 representing likelihood to win and likelihood for opponent to win.
            IF squash==True values represent probabilities.
        """
        N, r, _ = X.shape
        
        hand = X[:,0]
        flop = X[:,1]
        turn = X[:,2]
        river = X[:,3]
        
        # now we can pass through:
        hidden, cell = torch.zeros(N, 300, dtype=X.dtype, device=X.device), torch.zeros(N, 300, dtype=X.dtype, device=X.device)
        
        # first round:
        hidden, cell = self.lstm(hand, (hidden,cell))
        
        # second round:
        hidden, cell = self.lstm(flop, (hidden, cell))
        
        # third round:
        hidden, cell = self.lstm(turn, (hidden, cell))
        
        # fourth round:
        hidden, cell = self.lstm(river, (hidden, cell))
        
        #output:
        scores = self.output(hidden)
        
        if squash:
            return self.squash(scores)
        
        return scores

In [173]:
def train_model(X, y, model, epochs = 10, verbose = False):
    """
    Inputs:
        X: training tensor of shape (N,52)
        y: target tensor of shape (N,1)
        model: a torch.nn.Module model
        epochs: number of epochs to train
        verbose: iterations of epochs to print out (1 for all, False for none)
    """
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
    criterion = nn.CrossEntropyLoss()
    
    for e in range(epochs):
        scores = model(X)
        loss = criterion(scores, y)
        with torch.no_grad():
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if verbose and e%verbose == 0:
                print("epoch: {} loss: {:.4f}".format(e,loss.item()))
                
def evaluate_model(X, y, model):
    with torch.no_grad():
        scores = model(X)
        #threshold for predictions
        scores[scores < 0.5] = 0
        scores[scores >= 0.5] = 1
        return (scores == y).sum().item()/y.shape[0]

In [169]:
# WARNING THIS TAKES A LONG TIME
X_train, y_train = make_staged_games(30000, 'cuda')

In [170]:
X_test, y_test = make_staged_games(1000, 'cuda')

In [171]:
model = simpleLSTM().to('cuda')

In [186]:
train_model(X_train, y_train, model, 500, 100)

epoch: 0 loss: 0.5587
epoch: 100 loss: 0.6666
epoch: 200 loss: 0.6477
epoch: 300 loss: 0.6878
epoch: 400 loss: 0.6623


In [187]:
out = model(X_test, True)

In [188]:
out = out.argmax(1)

In [189]:
(out == y_test).sum().item()/y_test.shape[0]

0.629

In [190]:
import pickle

In [192]:
with open('deepLearning/lstmTrain.pkl', 'wb') as f:
    pickle.dump(X_train, f)

In [193]:
with open('deepLearning/lstmTrain_y.pkl', 'wb') as f:
    pickle.dump(y_train, f)