In [300]:
import numpy as np
from numba import njit
import numba
from numba.typed import List
import sys

In [301]:
@njit
def getActionSize():
    return 50

In [302]:
@njit
def getStateSize():
    return 87

In [303]:
@njit
def initEnv():
    env = np.zeros(75)
    card = np.arange(46) #card except Defuse and Explo kitten
    np.random.shuffle(card)
    env[:56] += 5 # 5 is card on draw pile
    for i in range(5): # draw 4 card for player: id from 0 to 4
        env[card[i*4:(i+1)*4]] = i
        env[46+i] = i
    draw_pile = np.where(env==5.)[0].astype(np.float64)
    np.random.shuffle(draw_pile)
    env[56] = 0 # track if other player use Nope: 1 if yes else 0
    env[57] = 0 # track player id main turn 
    env[58:62] = [2,3,4,0] # track player id Nope turn
    discard_pile = np.zeros(13)#card on discard pile will have id 6
    env[62:67] = 1 # 0 if lose else 1
    env[67] = 0 # [0:main turn, 1:nope turn,2:steal card turn,3:choose/take card turn]
    env[68] = 1 # number of card player env[57] have to draw
    env[69:72] = [0,0,0] #three card in see the future
    env[72] = 0 # player env[57] last action
    env[73] = env[57]+1
    env[74] = -1
    return env,draw_pile,discard_pile

In [304]:
@njit
def getNumCard(env,id):
    return np.where(env==id)[0].shape[0]

In [305]:
@njit
def getAllNumCard(env):
    state = np.zeros(12)
    state[0] = getNumCard(env[0:5],env[57])
    for i in range(4):
        state[1+i] = getNumCard(env[5+i*4:9+i*4],env[57])
    state[5] = getNumCard(env[21:26],env[57])
    for i in range(5):
        state[6+i] = getNumCard(env[26+i*4:30+i*4],env[57])
    state[11] = getNumCard(env[46:52],env[57])
    return state

In [306]:
@njit
def getCardType(id):
    cards = List([np.arange(0.,5.),np.arange(5.,9.),np.arange(9.,13.),np.arange(13.,17.),np.arange(17.,21.),np.arange(21.,26.),np.arange(26.,30.),np.arange(30.,34.),np.arange(34.,38.),np.arange(38.,42.),np.arange(42.,46.),np.arange(46.,52.),np.arange(52.,56.)])
    i = 0
    for c in cards:
        if id in c:
            return i
        i+=1

In [355]:
@njit
def getCardRange(type):
    cards = List([np.arange(0.,5.),np.arange(5.,9.),np.arange(9.,13.),np.arange(13.,17.),np.arange(17.,21.),np.arange(21.,26.),np.arange(26.,30.),np.arange(30.,34.),np.arange(34.,38.),np.arange(38.,42.),np.arange(42.,46.),np.arange(46.,52.),np.arange(52.,56.)])
    return cards[type].astype(np.int64)

In [308]:
@njit
def getAgentState(env,draw_pile,discard_pile):
    state = np.zeros(getStateSize())
    #get card
    state[0:12] = getAllNumCard(env)
    
    state[12:25] = discard_pile #discard pile
    state[25] = draw_pile.shape[0] #number of card in draw pile
    state[26] = np.where(env[62:67]==0)[0].shape[0]
    state[27] = env[56]%2 #1 if action been Nope else 0
    for i in range(3):
        state[28+13*i:41+13*i] = getCardType(env[69+i])# three card if use see the future
    state[67:71][int(env[67])] = 1 #[main turn,nope turn,take card turn]
    state[71] = env[68] # number of card player have to draw
    state[72:82][int(env[72])] = 1# player main turn last action
    state[82:86] = env[62:67][env[58:62].astype(np.int64)]
    state[86] = env[62:67][int(env[57])]
    return state


In [309]:
@njit
def getValidActions(state):
    list_action = np.zeros(getActionSize())
    if state[67]==1:#main turn
        list_action[1:6] = (state[1:6]>0).astype(np.float64)
        list_action[6] = 1
        if np.max(state[0:11])>=2:#two of a kind
            list_action[7] = True
        if np.max(state[0:11])>=3:#three of a kind
            list_action[8] = True
        type_card = (state[0:11]>0).astype(np.float64)
        if np.sum(type_card)>=5:#five of a kind
            list_action[9] = True

    elif state[68]==1: #Nope turn
        list_action[0] = 1
        list_action[10] = 1

    elif state[69]==1: #steal turn
        list_action[11:15]  = 1
        
    elif state[70]==1: #choose/take card turn
        main_action =  np.where(state[72:82])[0][0]
        if main_action==3:
            list_action[15:26] = 1
        elif main_action==7:
            list_action[27:38] = 1
        elif main_action==8:
            list_action[38:49] = 1

    return list_action
    
    
        




In [310]:
@njit
def checkDefuse(env,discard_pile:np.ndarray): # get the Defuse (if player have else None)
    card = np.where(env[46:52]==env[57])[0].astype(np.int64)
    if card.shape[0] > 0:
        card_id = card[0]
        env[46:52][card_id] = 6
        discard_pile = np.append(discard_pile,card_id+46)
        return True
    return False

In [311]:
@njit
def checkExploding(card): # check if that card is expode or not
    explode = np.array([52,53,54,55],dtype=np.float64)
    if card in explode:
        return True
    return False

In [312]:
@njit
def drawCard(env,draw_pile,discard_pile):
    for i in range(env[68].astype(np.int64)):
        if checkExploding(draw_pile[0]):#draw an exploding kitten
            if checkDefuse(env,discard_pile):#player have defuse 
                draw_pile = np.concatenate(draw_pile[1:],draw_pile[0])#insert explode card back to the Draw Pile
            else:#player lost
                env[62:67][env[57]] = 0
                discard_pile = np.append(discard_pile,draw_pile[0])
                draw_pile = draw_pile[1:]
        else:#draw other card
            env[0:56][draw_pile[0].astype(np.int64)] = env[57] #draw
            discard_pile = np.append(discard_pile,draw_pile[0])
            draw_pile = draw_pile[1:]
    

In [313]:
@njit
def changeTurn(env):
    env[57] = (env[57]+1)%5
    while env[62:67][env[57]]==0:#if player id is already lost.
        env[57] = (env[57]+1)%5
    env[58:62] = np.arange(env[57]+1,env[57]+5)%5

In [314]:
@njit
def nopeTurn(id):
        return np.arange(id+1.,id+5.) % 5

In [315]:
@njit
def checkIfNope(env):
    return env[56]%2==1

In [316]:
@njit
def executeMainAction(env,draw_pile,discard_pile,action):
    if action==1: #Attack #No Nope
        changeTurn(env) #change main turn
        env[68] = 2 # next player draw 2 card
        env[67] = 0
        env[5:9][np.where(env[5:9]==env[57])[0][0]] = 6
        discard_pile[1]+=1
    elif action==2: #Skip
        env[68]-=1
        if env[68]==0:
            changeTurn(env)
            env[68] = 1
        env[67] = 0
        env[9:13][np.where(env[9:13]==env[57])[0][0]] = 6
        discard_pile[2]+=1
    elif action==3:
        env[67] = 2
        env[13:17][np.where(env[13:17]==env[57])[0][0]] = 6
        discard_pile[3]+=1
    elif action==4: #Shuffle
        np.random.shuffle(discard_pile)
        env[17:21][np.where(env[17:21]==env[57])[0][0]] = 6
        discard_pile[4]+=1
    elif action==5: #See the future
        env[69:72] = draw_pile[0:2]
        env[21:26][np.where(env[21:26]==env[57])[0][0]] = 6
        discard_pile[5]+=1
    elif action==6: #draw card
        drawCard(env,draw_pile,discard_pile)
    elif action==7:
        env[67] = 2
    elif action==8:
        env[67] = 2
    elif action==0:
        env[67] = 3
    



In [317]:
def discardCardSpecialAction(env,last_action):
    all_num_card = getAllNumCard(env)[:11]
    if last_action==7: # two of a kind
        if np.max(all_num_card[6:11])>=2:
            if 2 in all_num_card[6:11]:
                type_card = np.where(all_num_card[6:11]==2)[0][0]+6
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])] = 6
            else:
                type_card = np.random.choice(np.where(all_num_card[6:11]>2)[0])+6
                for i in range(2):
                    env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
        else:
            type_card = np.random.choice(np.where(all_num_card[0:6]>2)[0])+6
            for i in range(2):
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
            
    elif last_action==8:
        if np.max(all_num_card[6:11])>=3:
            if 3 in all_num_card[6:11]:
                type_card = np.where(all_num_card[6:11]==3)[0][0]+6
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])] = 6
            else:
                type_card = np.random.choice(np.where(all_num_card[6:11]>3)[0])+6
                for i in range(3):
                    env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
        else:
            type_card = np.random.choice(np.where(all_num_card[0:6]>3)[0])+6
            for i in range(3):
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
    elif last_action==9:
        if np.sum((all_num_card[6:11]>0).astype(np.float64))==5:
            for i in range[5]:
                type_card = 6+i
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
        else:
            num_spec = 5 - np.sum((all_num_card[6:11]>0).astype(np.float64))
            normal_card = np.where(env[6:11]>0)[0]+6
            special_card = np.where(env[0:6]>0)[0]
            for n in normal_card:
                type_card = n
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
            for i in range(num_spec):
                np.random.shuffle(special_card)
                type_card = special_card[0]
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==env[57])[0]] = 6
                special_card = special_card[1:]

            

In [318]:
@njit
def stepEnv(env,draw_pile,discard_pile,action):
    phase = env[67]
    main_id = env[57]
    nope_id = env[73]
    nope_count = env[56]
    last_action = env[72]
    if phase==0: #Phase 0: Main Turn
        last_action = action
        env[67]==1 #change to Nope phase
    elif phase==1:#Phase 1: Nope phase
        if nope_id!=main_id:
            if action==0:
                nope_count+=1
        if (nope_id+1)%5==main_id:
            if not checkIfNope(env): #main p card not been Nope
                executeMainAction(env,draw_pile,discard_pile,last_action)
                env[67] = 0
            else:
                nope_id = main_id
        else:
            while np.where(env[0:5]==nope_id)[0].shape[0]==0 and nope_id!=main_id:#if other dont have Nope, skip
                nope_id = (nope_id+1)%5
        #main_id only in phase 1 if the card has been Nope 
        if nope_id==main_id and checkIfNope(env):
            if np.where(env[0:5]==main_id)[0].shape[0]==0:#main_id dont have Nope
                last_action = -1 # last action not exist
                env[67] = 0
                nope_id = (main_id+1)%5
            else:
                if action==0:
                    nope_count = 0
                    nope_id = (main_id+1)%5
                    while np.where(env[0:5]==nope_id)[0][0]==0:#if other dont have Nope, skip
                        nope_id = (nope_id+1)%5
                        if nope_id==main_id:
                            break
                    if nope_id == main_id:
                        executeMainAction(env,draw_pile,discard_pile,last_action)
                        env[67] = 0
                else:
                    last_action = -1 # last action not exist
                    phase = 0
                    nope_id = (main_id+1)%5
                    
    elif phase==2:# phase 2: choose player to steal card. Only main_id can enter this phase
        env[74] = env[58:62][int(action-11)]
        env[67] = 3
    elif phase==3: #phase 3: choose card to give/take. Only main_id can enter this phase
        if last_action==3:
            all_card_to_take = np.where(env[getCardRange(type_card)]==env[74])[0]
            type_card = action - 15
            env[getCardRange(type_card)][all_card_to_take[0]] = env[57]
        elif last_action==7:
            card_on_player_chosen = np.where(env[0:56]==env[74])[0]
            card = np.random.choice(card_on_player_chosen)
            env[0:56][card] = env[57]
            #used card go to Discard Pile
            discardCardSpecialAction(env,last_action)
        elif last_action==8:
            #take card
            all_card_to_take = np.where(env[getCardRange(type_card)]==env[74])[0]
            type_card = action - 27
            if all_card_to_take.shape[0]>0:
                env[getCardRange(type_card)][all_card_to_take[0]] = env[57]
            #used card go to Discard Pile
            discardCardSpecialAction(env,last_action)
        elif last_action==9:
            type_card = action - 38
            if np.where(env[getCardRange(type_card)]==6)[0].shape[0]>0:
                env[getCardRange(type_card)][np.where(env[getCardRange(type_card)]==6)[0][0]] = env[57]
            discardCardSpecialAction(env,last_action)
        env[67] = 0
    return env

In [319]:
@njit
def getAgentSize():
    return 5

In [320]:
@njit
def checkEnded(env):
    if np.sum(env[62:67])==1:
        return 1
    else:
        return -1

In [321]:
@njit
def getReward(state):
    if np.sum(state[82:86])==0:
        return 1
    elif state[87] == 0:
        return 0
    else:
        return -1

In [322]:
def run(listAgent,perData):
    env = initEnv()
    for _ in range(getAgentSize()):
        dataOnePlayer = List()
        dataOnePlayer.append(np.array([[0.]]))
    winner = -1
    turn = 0
    while True:
        turn +=1
        phase = env[67]
        main_id = env[57]
        nope_id = env[73]
        nope_count = env[56]
        last_action = env[72]
        if phase==0:
            pIdx = main_id
        elif phase==1:
            pIdx = nope_id
        elif phase==2:
            pIdx = main_id
        elif phase==3:
            pIdx = main_id
        action, perData = listAgent[pIdx](getAgentState(env), perData)
        env = stepEnv(action, env)
        winner = checkEnded(env)
        if winner != -1:
            break
    return winner, perData


In [323]:
def main(listAgent, times, perData):
    numWin = np.full(6, 0)
    pIdOrder = np.arange(5)
    for _ in range(times):
        np.random.shuffle(pIdOrder)
        shuffledListAgent = [listAgent[i] for i in pIdOrder]
        winner, perData = run(shuffledListAgent, perData)
        if winner == -1:
            numWin[-1] += 1
        else:
            numWin[pIdOrder[winner]] += 1
    return numWin, perData

In [324]:
@njit
def random_player(state,temp,per):
    list_action  = np.where(getValidActions(state)==1)[0]
    action = np.random.choice(list_action)
    return action,temp,per

In [352]:
@njit
def drawCard(env,draw_pile,discard_pile):
    for i in range(int(env[68])):
        if checkExploding(draw_pile[0]):#draw an exploding kitten
            if checkDefuse(env,discard_pile):#player have defuse
                idx = np.random.randint(draw_pile.shape[0])
                draw_pile_2 = draw_pile.copy()
                draw_pile_2[0:idx] = draw_pile[1:idx+1]
                draw_pile_2[idx] = draw_pile[0]
                draw_pile_2[idx+1:] = draw_pile[idx+1:]
                draw_pile = draw_pile_2
                #insert explode card back to the Draw Pile
            else:#player lost
                env[62:67][int(env[57])] = 0
                discard_pile = np.append(discard_pile,draw_pile[0])
                draw_pile = draw_pile[1:]
        else:#draw other card
            env[0:56][int(draw_pile[0])] = env[57] #draw
            discard_pile = np.append(discard_pile,draw_pile[0])
            draw_pile = draw_pile[1:]
    

In [353]:
drawCard2(*initEnv())

In [356]:
a = np.zeros(15)
a[getCardRange(2)]

array([0., 0., 0., 0.])