In [1]:
import math
import gym
import random
from gym import spaces
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import json
import datetime as dt
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [65]:
class Env(gym.Env):
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self, row, col, num_players = 0, debug = False):
        super(Env, self).__init__()
        
        self.row = row
        self.col = col  
        self.current_step = 0
        self.bingo = np.array(random.sample(range(1,self.row*self.col+1), self.row*self.col)).reshape(self.row,self.col)
        self.original_bingo_start = self.bingo
        self.bingo_state = np.zeros(self.row*self.col).reshape(self.row,self.col).astype('int32')
        self.bingo_selection_count = np.zeros(self.row*self.col).reshape(self.row,self.col).astype('int32')
        self.latest_number_cut = 0
        self.strikes = 0
        self.num_players = num_players
        self.player_cut_list = []
        self.action_space = spaces.Discrete(self.row*self.col)
        #self.action_space = spaces.Box(low=[0] , high=[row*col], dtype=np.int32)
        self.observation_space = spaces.Box(
          low=np.zeros(self.row*self.col).reshape(self.row,self.col), 
            high=np.ones(self.row*self.col).reshape(self.row,self.col), dtype=np.int32)
        self.debug = debug
        
    def reset(self):
               
        self.current_step = 0
        self.bingo = np.array(random.sample(range(1,self.row*self.col+1), self.row*self.col)).reshape(self.row,self.col)
        self.bingo_state = np.zeros(self.row*self.col).reshape(self.row,self.col).astype('int32')
        self.bingo_selection_count = np.zeros(self.row*self.col).reshape(self.row,self.col).astype('int32')
        self.strikes = 0
        
        return self._next_observation()
    
    def _next_observation(self):

        current_bingo_state = self.bingo_state   
        

        return np.array(current_bingo_state)
  
    def _take_action(self, action):
        
        self.current_step+=1
        self.latest_number_cut = self.bingo[math.floor(action/self.row),(action)%self.row]           
        is_number_cut = self.bingo_selection_count[math.floor(action/self.row),action%self.row]
    
        already_cut_reward = 0
        if(is_number_cut == 0):
            self.bingo[math.floor(action/self.row),(action)%self.row] = 0
            self.bingo_state[math.floor(action/self.row),action%self.row] = 1
            self.bingo_selection_count[math.floor(action/self.row),action%self.row] = 1
        else:
            self.bingo_selection_count[math.floor(action/self.row),action%self.row] += 1
            already_cut_reward = -self.bingo_selection_count[math.floor(action/self.row),action%self.row]          
        
        #For cuts
        cuts = 0
        for i in range(self.row):
            if(self.bingo_state[i,:].sum() == self.row):
                cuts+=1
        for i in range(self.col):
            if(self.bingo_state[:,i].sum() == self.col):
                cuts+=1
        
        diag1_sum = 0
        diag2_sum = 0
        
        for i in range(self.row):
            diag1_sum+= self.bingo_state[i,i]
            diag2_sum+= self.bingo_state[i,self.row-i-1]
            
        if(diag1_sum == self.row):            
            cuts+=1
        if(diag2_sum == self.row):            
            cuts+=1
    
        #bingo_dict = {0: "No_cut", 1: "B", 2: "BI", 3: "BIN", 4: "BING", 5: "BINGO", 6: "BINGO", 7: "BINGO"}
        
        reward_for_multiple_cuts = 0
        
        reward_for_multiple_cuts = cuts - self.strikes # +1 reward for a cut, + 2 reward for 2 cuts at the same time
        
        self.strikes = cuts
        
        total_add_reward = already_cut_reward + reward_for_multiple_cuts
                        
        return cuts, total_add_reward
    
    def step(self,action, mode = 'no_play'):
        
        reward = -1
        add_reward = 0
        
        if(mode == 'no_play'):
            total_cuts,add_reward = self._take_action(action)
        else:
            tmp_list = list(self.original_bingo_start.reshape(self.row*self.col))
            #Using original because if player says the say number again there is an error
            player_cut_index = tmp_list.index(pl_num)
            total_cuts,add_reward = self._take_action(action)
        
        reward+= add_reward
        
        if (total_cuts >= self.row):
            done = True
        else:
            done = False
        
        return self._next_observation(), reward, done, self.latest_number_cut, self.bingo, self.strikes
    
    def render(self, mode='train', close=False):
        # Render the environment to the screen
        
        if(mode=='train'):
            if(self.current_step > 500): #Just for check
                print(self.current_step)
            train_list.append([self.bingo, self.bingo_state, self.bingo_selection_count, "BINGO", self.strikes])
        elif(mode == 'test'):
            
            test_list.append([self.bingo, self.bingo_state, self.bingo_selection_count, "BINGO", self.strikes])                
            
        return

In [17]:
row = 5
col = 5

In [167]:

#bingo = np.array(random.sample(range(1,row*col+1), row*col)).reshape(row,col)
#bingo_state = np.zeros(row*col).reshape(row,col).astype('int32')
#bingo_selection_count = np.zeros(row*col).reshape(row,col).astype('int32')

bingo_env = Env(row, col, num_players = 0, debug = True)

model = PPO2(MlpPolicy, bingo_env,verbose = 0)
model.learn(total_timesteps=500000)
model.save("deep_bingo")

<stable_baselines.ppo2.ppo2.PPO2 at 0x1dab9a3c4e0>

In [6]:
del model
model = PPO2.load("deep_bingo")

Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [172]:
train_list= []
train_action_list= []
obs = bingo_env.reset()
done=False
while(done == False):
    action, _states = model.predict(obs)
    #print(action)
    obs, rewards, done, info = bingo_env.step(action)
    train_action_list.append(action)
    bingo_env.render(mode='train')
    #print(train_list[-1])

In [173]:
print(len(train_action_list))
train_list[-1]

16


[array([[ 0,  0, 12,  0,  0],
        [15,  0, 23,  0,  8],
        [ 9,  0,  0,  0, 19],
        [17,  0, 21,  0, 25],
        [ 0,  0,  0,  0,  0]]), array([[1, 1, 0, 1, 1],
        [0, 1, 0, 1, 0],
        [0, 1, 1, 1, 0],
        [0, 1, 0, 1, 0],
        [1, 1, 1, 1, 1]]), array([[1, 1, 0, 1, 1],
        [0, 1, 0, 1, 0],
        [0, 1, 1, 1, 0],
        [0, 1, 0, 1, 0],
        [1, 1, 1, 1, 1]]), 'BINGO', 5]

In [174]:
for i in range(20):
    test_list = []
    test_action_list=[]
    obs = bingo_env.reset()
    done = False
    while(done ==False):
        action, _states = model.predict(obs)
        obs, rewards, done, info = bingo_env.step(action)
        test_action_list.append(action)
        bingo_env.render(mode='test')
    print(len(test_action_list))
    print(test_list[-1])

18
[array([[ 0,  0, 20,  0,  0],
       [ 5,  0, 22,  0, 14],
       [ 6,  0,  0,  0, 19],
       [ 1,  0,  7,  0, 12],
       [ 0,  0,  0,  0,  0]]), array([[1, 1, 0, 1, 1],
       [0, 1, 0, 1, 0],
       [0, 1, 1, 1, 0],
       [0, 1, 0, 1, 0],
       [1, 1, 1, 1, 1]]), array([[1, 1, 0, 1, 1],
       [0, 1, 0, 1, 0],
       [0, 1, 1, 1, 0],
       [0, 1, 0, 1, 0],
       [1, 1, 1, 1, 1]]), 'BINGO', 5]
16
[array([[ 0,  0, 10,  0,  0],
       [ 1,  0, 17,  0, 15],
       [16,  0,  0,  0, 21],
       [14,  0, 19,  0, 11],
       [ 0,  0,  0,  0,  0]]), array([[1, 1, 0, 1, 1],
       [0, 1, 0, 1, 0],
       [0, 1, 1, 1, 0],
       [0, 1, 0, 1, 0],
       [1, 1, 1, 1, 1]]), array([[1, 1, 0, 1, 1],
       [0, 1, 0, 1, 0],
       [0, 1, 1, 1, 0],
       [0, 1, 0, 1, 0],
       [1, 1, 1, 1, 1]]), 'BINGO', 5]
16
[array([[ 0,  0, 18,  0,  0],
       [14,  0,  2,  0, 10],
       [ 4,  0,  0,  0,  9],
       [24,  0, 12,  0, 11],
       [ 0,  0,  0,  0,  0]]), array([[1, 1, 0, 1, 1],
       [0, 

## Play

In [72]:
row = 5
col = 5
num_players_playing = int(input("Enter number of players "))
bingo_env_play = Env(row, col, num_players = num_players_playing, debug = True)
obs = bingo_env_play.reset()
done = False
player_win = False
counter = 0
strike_counter = 0

while(done ==False and player_win == False):
    #bingo_env_play.render(mode='play')
    for i in range(num_players_playing + 1):
        if(i != num_players_playing):
            msg = "Player " + str(i+1) + " Enter number you are cutting (Enter won/w if you have won): "
            pl_num = input(msg)
            if(pl_num == "won" or pl_num == "w"):
                print("Congrats! End of game")
                player_win = True
            else:
                pl_num = int(pl_num)
                bingo_env_play.step(pl_num,"play")
        else:
            action, _states = model.predict(obs)
            obs, rewards, done, info, curr_bingo, comp_strikes = bingo_env_play.step(action, "no_play")
            if(done == True):
                print("I won. Game over")
            else:
                print("Number I am striking is ", info)
                print(curr_bingo)
                print("")
                if(strike_counter != comp_strikes):
                    print("I have ", comp_strikes, "strike(s)")
                strike_counter = comp_strikes
        

Enter number of players 1
Player 1 Enter number you are cutting (Enter won/w if you have won): 15
Number I am striking is  14
[[13 12  7 15  9]
 [22 18  1 25 17]
 [21  8  2 16 11]
 [ 0  6 19  5  3]
 [23  4 24 10  0]]

Player 1 Enter number you are cutting (Enter won/w if you have won): 8
Number I am striking is  2
[[13 12  7 15  9]
 [22 18  1  0 17]
 [21  8  0 16 11]
 [ 0  6 19  5  3]
 [23  4 24 10  0]]

Player 1 Enter number you are cutting (Enter won/w if you have won): 21
Number I am striking is  6
[[13 12  7 15  9]
 [22 18  1  0 17]
 [21  8  0 16 11]
 [ 0  0 19  5  3]
 [23  0 24 10  0]]

Player 1 Enter number you are cutting (Enter won/w if you have won): 4
Number I am striking is  0
[[13 12  7 15  0]
 [22 18  1  0 17]
 [21  8  0 16 11]
 [ 0  0 19  5  3]
 [23  0 24 10  0]]

Player 1 Enter number you are cutting (Enter won/w if you have won): 10
Number I am striking is  23
[[13 12  7 15  0]
 [22 18  1  0 17]
 [ 0  8  0 16 11]
 [ 0  0 19  5  3]
 [ 0  0 24 10  0]]

I have  1 strike(s)