In [1]:
!pip install bitstring

Collecting bitstring
[?25l  Downloading https://files.pythonhosted.org/packages/c3/fc/ffac2c199d2efe1ec5111f55efeb78f5f2972456df6939fea849f103f9f5/bitstring-3.1.7.tar.gz (195kB)
[K     |█▊                              | 10kB 22.4MB/s eta 0:00:01[K     |███▍                            | 20kB 18.8MB/s eta 0:00:01[K     |█████                           | 30kB 11.2MB/s eta 0:00:01[K     |██████▊                         | 40kB 9.3MB/s eta 0:00:01[K     |████████▍                       | 51kB 7.7MB/s eta 0:00:01[K     |██████████                      | 61kB 7.6MB/s eta 0:00:01[K     |███████████▊                    | 71kB 8.6MB/s eta 0:00:01[K     |█████████████▍                  | 81kB 9.1MB/s eta 0:00:01[K     |███████████████                 | 92kB 8.7MB/s eta 0:00:01[K     |████████████████▊               | 102kB 7.5MB/s eta 0:00:01[K     |██████████████████▍             | 112kB 7.5MB/s eta 0:00:01[K     |████████████████████            | 122kB 7.5MB/s eta 0:00:0

## ML ASSIGNMENT BATCH 8
- Deepthishree GS
- Harshini S
- Iswarya GP
- Janani R
- Swetha M

### ***TOPIC*** - REINFORCEMENT LEARNING
### ***Problem Taken*** : Playing Dots and Boxes using a Q learning Agent

![image](https://user-images.githubusercontent.com/43994542/112790879-f9220480-907d-11eb-8dae-c041684a9523.png)



## Importing Libraries 

In [2]:
from bitstring import BitArray
# This library is used to store and convert the board state. 
# Board state is stored as bitarray with one indicating a line and zero indicating no line
import numpy as np
import random
# To make random moves in initial exploration phase
import json
# Save the QTABLE finally as a json file to continue training incrementally when the kernal restarts

## Global Variables declaration


In [3]:
'''
3x3 dots and boxes 
4 boxes can be drawn
12 lines to choose from for each move
2 power 12 game states for the agent to learn from
DRAW / TIE is available in this version of the game (in original version in case of tie, person who starts loses)
Reward 100 and Penalty -100 (for each box drawn)
Reward 200 and Penalty -200 (long term finally at the end)
Reward 50 for a tie game at end
'''
QTable=dict()
# This table is the ultimate QTable to be learnt by our agent
'''
Q TABLE usually has states as rows and actions as columns and the entries are the corresponding Q values

Since in a game, only legal moves are the actions, the number of columns(moves) for each state is different

So, a hash table is constructed with keys as states and values as action:qvalue pairs
'''
VISIT_TABLE=dict()
# This table has the count of visits for each state. This ensures that we can check if all states are visited often (An assumption for Q Learning Convergence in TOM MITCHELL book)
DISCOUNT_RATE=0.8
# This decreases the future rewards by a factor each step
LEARNING_RATE=0.2
# This makes sure table is learnt at a specified. A balance between existing value and updated value
# Small LRate values ensure the table is not drastically updated


### Main Function for LEARNER
- Algorithm is play games repeatedly and update the Q Table 
- Calls subfunction playGame

#### Main Functions:
- Decide the choice of move for agent as random, simple or qlearner 
- Ensures where the learner is exploring or exploiting
- Random and simple for exploration
  - Random makes random moves
  - Simple makes the move with least visit count for the next state (Thus makes sure the same state is not repeated often and unvisited states are explored)
- Qlearner exploits
  - This makes the move where the Q value for the next state is maximum
  

In [4]:
def learner(gameCount=1000): #default no of games if the trainer doesn't specify the games

    count=1
    # counter to keep track of number of games
    # choiceOfMove=['random','simple','qlearner']
    # choice for deciding exploration or exploitation
    while count<gameCount:
        print("GAME ",count)
        #print the game number to know the progress of learner
        if count<gameCount/10:
            playGame(choiceOfMove='random',firstMove=1,withHuman=False) 
        #First 10% games are random
        elif count<gameCount/2:
            playGame(choiceOfMove='simple',firstMove=1,withHuman=False)
        #Till 50% the games are moved by a simple agent - Exploring the unvisited states
        else:
            playGame(choiceOfMove='qlearner',firstMove=1,withHuman=False)
        #Last 50% of games is exploited by QLearner
        count+=1
        with open('Qtable.json','w') as jsonfile:
          json.dump(QTable,jsonfile)
        # Each game, the current QTABLE is backed up incase the kernel restarts and values are lost



    
        
        

### Initialises the Main Q Table
- Q TABLE usually has states as rows and actions as columns and the entries are the corresponding Q values

- Since in a game, only legal moves are the actions, the number of columns(moves) for each state is different

- So, a hash table is constructed with keys as states and values as action:qvalue pairs

- All values are init to 0 now


In [5]:

def initialiseQTable():
    global QTable
    global VISIT_TABLE
    # The global variables are initialised in this function
    for i in range(4096):
      '''
      The number of states is 2 power 12 which is 4096
      QTABLE rows (in our case the keys of the hashtable are 4096 in number )

      For each state, each line which can be drawn is a possible action. 
      Initial state = 000000000000 => has 12 possible actions
      Some state    = 000011110000 => has 8 zeros and 4 ones. 
                      Out of the 12 lines, 4 are drawn. 8 lines are free to be drawn for next move.
                      The possible line numbers which can be drawn are the possible actions
                      EACH ZERO is a possible action. Here, 1,2,3,4 and 9,10,11,12 are the 8 possible actions. 
      So, the actions are initialised as another dictionary and 0 is the current initialisation for Q value
      '''
      currentState=BitArray(uint=i, length=12)
      actionQvalues=dict()
      # find possible actions
      bitString=currentState.bin
      # convert bitarray to bitstring to store as key
      for index in range(len(bitString)):
          if bitString[index]=='0':
              actionQvalues[index+1]=0
            
          #EACH ZERO in this state is a possible action when making next move from this state
          # index + 1 because line number index starts from 1 for ease of human player

      QTable[currentState.bin]=actionQvalues
      VISIT_TABLE[currentState.bin]=0
      # EACH State visit count is init to 0 
      QTable['111111111111']={0:0}
      # FINAL BOARD STATE HAS ONLY ONE POSSIBLE ACTION - NO MOVE - 0 is set as action which is not a valid line number (line numbers start from 1 to 12)


                
        
    

### Best action given a state
Function used by QLearner to select the action or move which has maximum QValue for all possible actioins in the given state

In [6]:
#This is the function to choice best action for the  given state using Qvalue in QTable
def bestActionForState(state):
    maxQval=-2000 #negative inf
    bestAction=0
    for action,qval in QTable[state].items():
        # QTABLE has all valid actions for current state and their QValues
        if qval>maxQval:
            maxQval=qval # Find the maximum qvalue
            bestAction=action # Find the corresponding best action
    return bestAction



    
    

### UPDATE QTABLE
- This function uses the formula for QLearning and updates the moves in reverse chronological order
- This is called after each game is over
- This finetuning is what makes the agent learn the game
- According to Tom Mitchell book, update for QTable values is as 
  `Q(s,a) = (1-LR)*Q(s,a)+LR*(rwd+DR*MAX(Q(ns,all a's)))`
  - First term is existing term
  - Second term is update term
  - LR and DR are learning and discount rate respectively

In [7]:

def updateQTable(memoryOfPlayerMoves):
    global QTable
    memoryOfPlayerMoves=memoryOfPlayerMoves[::-1] #reverse chronological order
    '''
    The first few moves have no rewards. According to Tom Mitchell book, the sequence of updates have no impact. 
    Therefore, it is efficient to propagate the effect of rewards from current move to the previous moves. 
    This helps us indirectly since we have rewards only for end result and certain important points in game.
    For remaining moves the immediate reward is 0
    '''
    for state,action,nextState,reward in memoryOfPlayerMoves:
        maxQValForNextState=QTable[nextState][bestActionForState(nextState)]
        # Finding the maximum QValue for next state to update future rewards along with a discount
        existingValue=(1-LEARNING_RATE)*QTable[state][action]
        # Existing value is taken by a fraction as indicated by learning rate to prevent drastic updates in Qtable
        updateValue=LEARNING_RATE*(reward+DISCOUNT_RATE*maxQValForNextState)
        # UPDATE value is immediate reward + Discount rate * future rewards
        # ACCORDING TO an equation in Tom Mitchell book
        QTable[state][action]=existingValue+updateValue
        VISIT_TABLE[state]+=1
        #Since this state is visited, its visit count is incremented
        

### Print board state
- Print the board state so that the human can understand what are the boxes formed and what could be the potential next move during a game

In [8]:
#Printing 3*3 table
def printBoardState(currentState):
    count=1
    m=7
    # 6 horizontal lines and from 7 vertical lines. m=7 means from m it will print accordingly
    for i in range(3):
        for j in range(3):
            print(".\t", end=" ")
            if j<2:
                if currentState[count-1]=='0':
                    print(str(count)+"\t",end=" ") 
                else:
                    print("--\t",end=" ")
                count+=1
        #printing horizontal line
                
        print("\n")
        if i!=2:
            for k in range(3):
                if currentState[m-1]=='0':
                    print(str(m)+"\t\t",end=" ")
                else:
                    print("|\t\t",end=" ")
                m+=1
        #printing vertical lines
        print("\n")
printBoardState(12*'0') # 12 zeros are initial board state 

.	 1	 .	 2	 .	 

7		 8		 9		 

.	 3	 .	 4	 .	 

10		 11		 12		 

.	 5	 .	 6	 .	 





### MakeMove or ChooseAction
- First few games, learner uses random moves
- Next it uses an approach to select states with very less visit count. 
- These are for exploration
- After that, the game is carried forward by the QLearner

- Human can choose any of the three for choice of making move for his/her opponent

In [9]:
'''
  Three choices are considered for a move (ie) Random, simple and qlearner
  Random move will choose random move from the actions that are available for a particular board state.
  Simple move will choose the action for the state with minimum count.
  Qlearner will choose best action for a state using Qvalues in the QTable
  Random and Simple move are used for explore new states and actions.
  QLearner is for exploitation.
'''
def MakeMove(currentState,choice):
    # possible actions
    possibleActions=list(QTable[currentState].keys())
    #choiceOfMove=['random','simple','qlearner']
    if choice=='random':
        #returning random action from the possible actions.
        return possibleActions[random.randint(0,len(possibleActions)-1)]
        
    if choice=='simple':
        minCount=1000000
        action=0
        for action in possibleActions:
            #VISIT_TABLE stores how many times the states have been visited for each action.
            #Return least visited state ie. explore unvisited states
            if VISIT_TABLE[transition(currentState,action)]<minCount:
                minCount=VISIT_TABLE[transition(currentState,action)]
                bestAction=action     
        return bestAction
    if choice=='qlearner':
        # return the best action that maximises QVALUE. Helper function name is bestActionForState
        return bestActionForState(currentState)  

    
 

### Checks if the game is over

In [10]:
'''
End state for a board is '111111111111' -> all lines are drawn. 12 lines are drawn in final state. 
This function compares current state with end state and checks if currrent state is End state
'''
def isBoardStateFinal(boardState):
    return boardState.bin==12*'1'

### Transition function
- Takes input as current state and move and gives the next state as output

In [11]:
'''
It outputs the next state.
Ex: currentState is '100000000000' and current move is line number 12, then
it will return '100000000001' as the nextstate
'''
def transition(currentState,currentMove):
    temp=currentState
    temp=temp[:currentMove-1]+'1'+temp[currentMove:]
    #It copies through string splicing every line till current move and every line after current move and adds 1 in current move's place
    return temp


### Counts the number of boxes formed during the move to update points

In [12]:
'''
This function will check whether the box is formed or not. If so, it will return the
number of boxes formed for that move. If not, it will return 0 .
  .    1   .    2     . 
  
  7        8          9

  .    3   .     4    .

  10       11         12

  .    5    .   6      .

  Line numbers 1,3,7,8 will form one box and 2,4,8,9 will form another box
  and 3,5,10,11 will form third box and 4,6,11,12 will form the last box
'''
def numberOfNewBoxFormed(box,newState):
    count=0
    boxBounds=[[1,3,7,8],[2,4,8,9],[3,5,10,11],[4,6,11,12]]
    #box is the state of boxes already filled in the game
    # we check if a new box is formed and update it. (list is passed by reference. No need to return)
    for i in range(len(boxBounds)):
      if box[i]==0:
        #This box is not filled till last state
        flag=True
        #Flag indicates possibility of filling the box
        for lineNumber in boxBounds[i]:
          #Even if one line is not drawn out of the 4, flag is marked false
          if newState[lineNumber-1]!='1':
            flag=False
            break
        # IF a new box is drawn, we increment the count by one
        if flag==True:
          count+=1
          box[i]=1
    # WE return count of new boxes formed by drawing a single line . Note that it can be greater than 1 also
    return count
    # This count will be used to update the player score and give reward and penalty accordingly
        
                

### Main function where the game is played

Algo
- init board state
- init players as p1, p2 or p1, human
- init current player = p1
- init box [0 0 0 0]
- while not final state: 
    - if cp != 'human' : 
        currentplayer.make move // simple, q learner , random .. 3 boxes..
    - else:
        accept input
    - update currentplayer.memory
    - Check if new box formed, update player score
    - else 
    -   toggle the currentplayer
    - update board state with current move
- update QTABLE with rewards and penalties


In [13]:
'''
Similar to algorithm shown in markdown

'''

def playGame(choiceOfMove,firstMove,withHuman=False): #choice of Move is the type of agent (random or simple or QLearner )
#firstMove = 1 if agent makes first move else human can make first move
#argument withHuman is set TRUE to accept move from user

    #Game init
    player=["p1","p2"]
    #players p1, p2
    if withHuman:
        player[1]='human'
    # decide if p2 or human
    if firstMove==2:
      currentPlayer='human'
    else:
      currentPlayer='p1'
    # who makes the first move
    box=[0,0,0,0]
    # Which are boxes completed. It is updated to keep score
    currentState=BitArray(uint=0, length=12)
    # INITIAL board state
    memory={player[0]:[],player[1]:[]}
    score={player[0]:0,player[1]:0}
    # INITIALISE memory of a game and score
    while isBoardStateFinal(currentState)==False: # while the game is not over
        if(currentPlayer!="human"):
            currentMove=MakeMove(currentState.bin,choiceOfMove) # call makeMove function and the agent will return move
        else:
          possibleActions=list(QTable[currentState.bin].keys())
          currentMove=int(input("Enter the line number:"))
          while currentMove not in possibleActions:
            currentMove=int(input("INVALID! Enter the line number:"))
          #accept user input and check if it is valid

        newState=transition(currentState.bin,currentMove) 
        # Transition is made with the move
        memory[currentPlayer].append([currentState.bin,currentMove,newState,0])
        # Memory of the current move is added to player game memory
        numberOfBoxes=numberOfNewBoxFormed(box,newState)
        # Checks if new box is formed, if so score is updated
        if numberOfBoxes>0:#Short term rewards are given for boxes and score is updated
            score[currentPlayer]+=numberOfBoxes*2
            memory[currentPlayer][-1][-1]=100
            memory[player[1-player.index(currentPlayer)]][-1][-1]=-100

        else: #toggle the current player to give the turn to next player
            currentPlayer=player[1-player.index(currentPlayer)]

        currentState=BitArray(uint=int(newState,2),length=12)
        #update current state based on the transition obtained

        if player[1]=='human':            
            printBoardState(currentState.bin)
            print("Score of ",player[0]," is ",score[player[0]])
            print("Score of ",player[1]," is ",score[player[1]])
            if isBoardStateFinal(currentState)==False:
              print("Next turn for ",currentPlayer)
            #print each move if human is playing 

    #game over now
    #find winner 
    result=""
    if score[player[0]]==score[player[1]]: # find if scores are same
        result='TIE'
        print(result)
    else:
        result=max(score,key=score.get) # find the winner
        print("Winner is ",result)
    #print(memory[player[0]])
    # reward is given for tie, winner, penalty for loser
    if result=='TIE':
        memory[player[0]][-1][-1]=50
        memory[player[1]][-1][-1]=50
    else: 
        memory[result][-1][-1]=200
        memory[player[1-player.index(result)]][-1][-1]=-200
    # print(memory[player[0]])
    # print(memory[player[1]])    
    #put reward and penalties in last row of the memory table
    updateQTable(memory[player[0]])
    updateQTable(memory[player[1]])
    # update the QTABLE 
    return result




### Learn and test performance
- Step 1 : INITIALISE QTABLE
- Step 2 : Learn by playing games 
- Step 3 : Play against human ( can specify who makes first move)

In [14]:
initialiseQTable()
# set 0 to all QValues 
learner(gameCount=10000)
# specify the number of games learning agent has to played


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
GAME  7500
Winner is  p1
GAME  7501
Winner is  p1
GAME  7502
Winner is  p1
GAME  7503
Winner is  p1
GAME  7504
Winner is  p1
GAME  7505
Winner is  p1
GAME  7506
Winner is  p1
GAME  7507
Winner is  p1
GAME  7508
Winner is  p1
GAME  7509
Winner is  p1
GAME  7510
Winner is  p1
GAME  7511
Winner is  p1
GAME  7512
Winner is  p1
GAME  7513
Winner is  p1
GAME  7514
Winner is  p1
GAME  7515
Winner is  p1
GAME  7516
Winner is  p1
GAME  7517
Winner is  p1
GAME  7518
Winner is  p1
GAME  7519
Winner is  p1
GAME  7520
Winner is  p1
GAME  7521
Winner is  p1
GAME  7522
Winner is  p1
GAME  7523
Winner is  p1
GAME  7524
Winner is  p1
GAME  7525
Winner is  p1
GAME  7526
Winner is  p1
GAME  7527
Winner is  p1
GAME  7528
Winner is  p1
GAME  7529
Winner is  p1
GAME  7530
Winner is  p1
GAME  7531
Winner is  p1
GAME  7532
Winner is  p1
GAME  7533
Winner is  p1
GAME  7534
Winner is  p1
GAME  7535
Winner is  p1
GAME  7536
Winner is  p1
GAME  7537

In [15]:


def playAgainstHuman():
  #helper function to play 10 games with human 
  print("Playing with HUMAN")
  print("Enter 1 -> The agent makes the first move")
  print("Enter 2 -> Human makes the first move")
  firstMove=int(input())
  result=playGame(choiceOfMove='qlearner',firstMove=firstMove,withHuman=True)
  return result

wc=0 #win count
lc=0 #lose count
tc=0 #tie count
for i in range(10):
  r=playAgainstHuman() # result is stored in r
  if r=='p1': 
    wc+=1 # our agent p1 wins, update wincount
  elif r=='TIE':
    tc+=1 # game tied. update tiecount
  else:
    lc+=1 # our agent lost, update lose count
print("OUT OF 10 games, win rate = ", wc/10,", tie rate= ",tc/10, ", lose rate= ",lc/10)
# print result

Playing with HUMAN
Enter 1 -> The agent makes the first move
Enter 2 -> Human makes the first move
1
.	 --	 .	 2	 .	 

7		 8		 9		 

.	 3	 .	 4	 .	 

10		 11		 12		 

.	 5	 .	 6	 .	 



Score of  p1  is  0
Score of  human  is  0
Next turn for  human
Enter the line number:11
.	 --	 .	 2	 .	 

7		 8		 9		 

.	 3	 .	 4	 .	 

10		 |		 12		 

.	 5	 .	 6	 .	 



Score of  p1  is  0
Score of  human  is  0
Next turn for  p1
.	 --	 .	 2	 .	 

7		 8		 9		 

.	 3	 .	 --	 .	 

10		 |		 12		 

.	 5	 .	 6	 .	 



Score of  p1  is  0
Score of  human  is  0
Next turn for  human
Enter the line number:8
.	 --	 .	 2	 .	 

7		 |		 9		 

.	 3	 .	 --	 .	 

10		 |		 12		 

.	 5	 .	 6	 .	 



Score of  p1  is  0
Score of  human  is  0
Next turn for  p1
.	 --	 .	 --	 .	 

7		 |		 9		 

.	 3	 .	 --	 .	 

10		 |		 12		 

.	 5	 .	 6	 .	 



Score of  p1  is  0
Score of  human  is  0
Next turn for  human
Enter the line number:12
.	 --	 .	 --	 .	 

7		 |		 9		 

.	 3	 .	 --	 .	 

10		 |		 |		 

.	 5	 .	 6	 .	 



S

### Save the QTABLE and print visit table


In [16]:
#save in json file
with open('Qtable.json','w') as jsonfile:
  json.dump(QTable,jsonfile)

# print(VISIT_TABLE)
# OPTIONAL to print and check if all states are visited

## Check the performance of our machine by letting it play N games with random agent and find its winrate



In [17]:
'''
JUST like the play game function, altered to make 2 different agents play, agent qlearner against random or itself etc. 
'''
def playGame_AgentVsAgent(opponent='random'):
    #specify opponent for qlearner to play with
    #Game init
    player=["p1","p2"]    
    #players p1, p2
    currentPlayer="p1"
    box=[0,0,0,0]
    # Which are boxes completed. It is updated to keep score
    currentState=BitArray(uint=0, length=12)
    # INITIAL board state
    memory={player[0]:[],player[1]:[]}
    score={player[0]:0,player[1]:0}
    # INITIALISE memory of a game and score
    while isBoardStateFinal(currentState)==False:  # while the game is not over
        if(currentPlayer=="p1"):
          currentMove=MakeMove(currentState.bin,'qlearner')
        else:
          currentMove=MakeMove(currentState.bin,opponent)
          
         # Transition is made with the move
        newState=transition(currentState.bin,currentMove) 
        # Memory of the current move is added to player game memory
        memory[currentPlayer].append([currentState.bin,currentMove,newState,0])
        # Checks if new box is formed, if so score is updated
        numberOfBoxes=numberOfNewBoxFormed(box,newState)
        if numberOfBoxes>0:#Short term rewards are given for boxes and score is updated
            score[currentPlayer]+=numberOfBoxes*2
            memory[currentPlayer][-1][-1]=100
            memory[player[1-player.index(currentPlayer)]][-1][-1]=-100

        else: #toggle the current player to give the turn to next player
            currentPlayer=player[1-player.index(currentPlayer)]

        currentState=BitArray(uint=int(newState,2),length=12)
        #update current state based on the transition obtained
        
    #find winner game over now
    result=""
    if score[player[0]]==score[player[1]]:
        result='TIE'
        print(result)
    else:
        result=max(score,key=score.get)
        print("Winner is ",result)
    #print(memory[player[0]])
    if result=='TIE':
        memory[player[0]][-1][-1]=50
        memory[player[1]][-1][-1]=50
    else: 
        memory[result][-1][-1]=200
        memory[player[1-player.index(result)]][-1][-1]=-200    
    #put reward and penalties in last row of the memory table
    updateQTable(memory[player[0]])
    updateQTable(memory[player[1]])
    # update the QTABLE 
    return result

In [18]:
winCount=0 #agent against agent wincount
loseCount=0 #agent against agent losecount
N=1000
for i in range(1,N+1):
  #play qlearner vs random
  winner = playGame_AgentVsAgent(opponent='random')
  
  print('Game ',i," : Winner is ",winner)
  #print winner of game
  if winner =='p1':
    winCount+=1
  if winner =='p2':
    loseCount+=1

#print metric
print("Of the ",N," games, our Q Learner  has winrate =",winCount/N," and loserate =",loseCount/N,". It has tie rate of  ", (N-winCount-loseCount)/N ) #N-win-lose gives tie count



Winner is  p1
Game  1  : Winner is  p1
TIE
Game  2  : Winner is  TIE
Winner is  p1
Game  3  : Winner is  p1
Winner is  p1
Game  4  : Winner is  p1
Winner is  p1
Game  5  : Winner is  p1
Winner is  p1
Game  6  : Winner is  p1
TIE
Game  7  : Winner is  TIE
Winner is  p1
Game  8  : Winner is  p1
Winner is  p1
Game  9  : Winner is  p1
Winner is  p1
Game  10  : Winner is  p1
Winner is  p1
Game  11  : Winner is  p1
Winner is  p1
Game  12  : Winner is  p1
Winner is  p1
Game  13  : Winner is  p1
Winner is  p1
Game  14  : Winner is  p1
Winner is  p1
Game  15  : Winner is  p1
Winner is  p1
Game  16  : Winner is  p1
Winner is  p1
Game  17  : Winner is  p1
Winner is  p1
Game  18  : Winner is  p1
Winner is  p1
Game  19  : Winner is  p1
Winner is  p1
Game  20  : Winner is  p1
Winner is  p1
Game  21  : Winner is  p1
Winner is  p1
Game  22  : Winner is  p1
Winner is  p1
Game  23  : Winner is  p1
Winner is  p1
Game  24  : Winner is  p1
Winner is  p1
Game  25  : Winner is  p1
Winner is  p1
Game  26  : W