In [7]:
import numpy as np
from IPython.display import clear_output
import time
import random
# https://www.youtube.com/watch?v=UXW2yZndl7U

In [9]:
def update_board(board_temp,color,column):
    # this is a function that takes the current board status, a color, and a column and outputs the new board status
    # columns 0 - 6 are for putting a checker on the board: if column is full just return the current board...this should be forbidden by the player

    # the color input should be either 'plus' or 'minus'

    board = board_temp.copy()
    ncol = board.shape[1]
    nrow = board.shape[0]

    # this seems silly, but actually faster to run than using sum because of overhead!
    colsum = abs(board[0,column])+abs(board[1,column])+abs(board[2,column])+abs(board[3,column])+abs(board[4,column])+abs(board[5,column])
    row = int(5-colsum)
    if row > -0.5:
        if color == 'plus':
            board[row,column] = 1
        else:
            board[row,column] = -1
    return board

# in this code the board is a 6x7 numpy array.  Each entry is +1, -1 or 0.  You WILL be able to do a better
# job training your neural network if you rearrange this to be a 6x7x2 numpy array.  If the i'th row and j'th
# column is +1, this can be represented by board[i,j,0]=1.  If it is -1, this can be represented by
# board[i,j,1]=1. It's up to you how you represent your board.


In [None]:
board = np.zeros((6,7))
board = update_board(board,'plus',3)
board = update_board(board,'minus',3)
board = update_board(board,'plus',3)
board = update_board(board,'minus',3)
board = update_board(board,'plus',3)
board = update_board(board,'minus',3)
print(board)
board = update_board(board,'plus',3)
print(board)

[[ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]]
[[ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]]


In [10]:
def check_for_win_slow(board):
    # this function checks to see if anyone has won on the given board
    nrow = board.shape[0]
    ncol = board.shape[1]
    winner = 'nobody'
    for col in range(ncol):
        for row in reversed(range(nrow)):
            if abs(board[row,col]) < 0.1: # if this cell is empty, all the cells above it are too!
                break
            # check for vertical winners
            if row <= (nrow-4): # can't have a column go from rows 4-7...
                tempsum = board[row,col]+board[row+1,col]+board[row+2,col]+board[row+3,col] # this is WAY faster than np.sum!!!
                if tempsum==4:
                    winner = 'v-plus'
                    return winner
                elif tempsum==-4:
                    winner = 'v-minus'
                    return winner
            # check for horizontal winners
            if col <= (ncol-4):
                tempsum = board[row,col]+board[row,col+1]+board[row,col+2]+board[row,col+3]
                if tempsum==4:
                    winner = 'h-plus'
                    return winner
                elif tempsum==-4:
                    winner = 'h-minus'
                    return winner
            # check for top left to bottom right diagonal winners
            if (row <= (nrow-4)) and (col <= (ncol-4)):
                tempsum = board[row,col]+board[row+1,col+1]+board[row+2,col+2]+board[row+3,col+3]
                if tempsum==4:
                    winner = 'd-plus'
                    return winner
                elif tempsum==-4:
                    winner = 'd-minus'
                    return winner
            # check for top right to bottom left diagonal winners
            if (row <= (nrow-4)) and (col >= 3):
                tempsum = board[row,col]+board[row+1,col-1]+board[row+2,col-2]+board[row+3,col-3]
                if tempsum==4:
                    winner = 'd-plus'
                    return winner
                elif tempsum==-4:
                    winner = 'd-minus'
                    return winner
    return winner

In [11]:
def check_for_win(board,col):
    # this code is faster than the above code, but it requires knowing where the last checker was dropped
    # it may seem extreme, but in MCTS this function is called more than anything and actually makes up
    # a large portion of total time spent finding a good move.  So every microsecond is worth saving!
    nrow = 6
    ncol = 7
    # take advantage of knowing what column was last played in...need to check way fewer possibilities
    colsum = abs(board[0,col])+abs(board[1,col])+abs(board[2,col])+abs(board[3,col])+abs(board[4,col])+abs(board[5,col])
    row = int(6-colsum)
    if row+3<6:
        vert = board[row,col] + board[row+1,col] + board[row+2,col] + board[row+3,col]
        if vert == 4:
            return 'v-plus'
        elif vert == -4:
            return 'v-minus'
    if col+3<7:
        hor = board[row,col] + board[row,col+1] + board[row,col+2] + board[row,col+3]
        if hor == 4:
            return 'h-plus'
        elif hor == -4:
            return 'h-minus'
    if col-1>=0 and col+2<7:
        hor = board[row,col-1] + board[row,col] + board[row,col+1] + board[row,col+2]
        if hor == 4:
            return 'h-plus'
        elif hor == -4:
            return 'h-minus'
    if col-2>=0 and col+1<7:
        hor = board[row,col-2] + board[row,col-1] + board[row,col] + board[row,col+1]
        if hor == 4:
            return 'h-plus'
        elif hor == -4:
            return 'h-minus'
    if col-3>=0:
        hor = board[row,col-3] + board[row,col-2] + board[row,col-1] + board[row,col]
        if hor == 4:
            return 'h-plus'
        elif hor == -4:
            return 'h-minus'
    if row < 3 and col < 4:
        DR = board[row,col] + board[row+1,col+1] + board[row+2,col+2] + board[row+3,col+3]
        if DR == 4:
            return 'd-plus'
        elif DR == -4:
            return 'd-minus'
    if row-1>=0 and col-1>=0 and row+2<6 and col+2<7:
        DR = board[row-1,col-1] + board[row,col] + board[row+1,col+1] + board[row+2,col+2]
        if DR == 4:
            return 'd-plus'
        elif DR == -4:
            return 'd-minus'
    if row-2>=0 and col-2>=0 and row+1<6 and col+1<7:
        DR = board[row-2,col-2] + board[row-1,col-1] + board[row,col] + board[row+1,col+1]
        if DR == 4:
            return 'd-plus'
        elif DR == -4:
            return 'd-minus'
    if row-3>=0 and col-3>=0:
        DR = board[row-3,col-3] + board[row-2,col-2] + board[row-1,col-1] + board[row,col]
        if DR == 4:
            return 'd-plus'
        elif DR == -4:
            return 'd-minus'
    if row+3<6 and col-3>=0:
        DL = board[row,col] + board[row+1,col-1] + board[row+2,col-2] + board[row+3,col-3]
        if DL == 4:
            return 'd-plus'
        elif DL == -4:
            return 'd-minus'
    if row-1 >= 0 and col+1 < 7 and row+2<6 and col-2>=0:
        DL = board[row-1,col+1] + board[row,col] + board[row+1,col-1] + board[row+2,col-2]
        if DL == 4:
            return 'd-plus'
        elif DL == -4:
            return 'd-minus'
    if row-2 >=0 and col+2<7 and row+1<6 and col-1>=0:
        DL = board[row-2,col+2] + board[row-1,col+1] + board[row,col] + board[row+1,col-1]
        if DL == 4:
            return 'd-plus'
        elif DL == -4:
            return 'd-minus'
    if row-3>=0 and col+3<7:
        DL = board[row-3,col+3] + board[row-2,col+2] + board[row-1,col+1] + board[row,col]
        if DL == 4:
            return 'd-plus'
        elif DL == -4:
            return 'd-minus'
    return 'nobody'

In [12]:
def find_legal(board):
    legal = [i for i in range(7) if abs(board[0,i]) < 0.1]
    return legal

In [13]:
def look_for_win(board_,color):
    board_ = board_.copy()
    legal = find_legal(board_)
    winner = -1
    for m in legal:
        bt = update_board(board_.copy(),color,m)
        wi = check_for_win(bt,m)
        if wi[2:] == color:
            winner = m
            break
    return winner

In [14]:
def find_all_nonlosers(board,color):
    if color == 'plus':
        opp = 'minus'
    else:
        opp = 'plus'
    legal = find_legal(board)
    poss_boards = [update_board(board,color,l) for l in legal]
    poss_legal = [find_legal(b) for b in poss_boards]
    allowed = []
    for i in range(len(legal)):
        wins = [j for j in poss_legal[i] if check_for_win(update_board(poss_boards[i],opp,j),j) != 'nobody']
        if len(wins) == 0:
            allowed.append(legal[i])
    return allowed

In [15]:
def back_prop(winner,path,color0,md):
    for i in range(len(path)):
        board_temp = path[i]

        md[board_temp][0]+=1
        if winner[2]==color0[0]:
            if i % 2 == 1:
                md[board_temp][1] += 1
            else:
                md[board_temp][1] -= 1
        elif winner[2]=='e': # tie
            # md[board_temp][1] += 0
            pass
        else:
            if i % 2 == 1:
                md[board_temp][1] -= 1
            else:
                md[board_temp][1] += 1

In [16]:
def rollout(board,next_player):
    winner = 'nobody'
    player = next_player
    while winner == 'nobody':
        legal = find_legal(board)
        if len(legal) == 0:
            winner = 'tie'
            return winner
        move = random.choice(legal)
        board = update_board(board,player,move)
        winner = check_for_win(board,move)

        if player == 'plus':
            player = 'minus'
        else:
            player = 'plus'
    return winner


In [17]:
def mcts(board_temp,color0,nsteps):
    # nsteps is a parameter that determines the skill (and slowness) of the player
    # bigger values of nsteps means the player is better, but also slower to figure out a move.
    board = board_temp.copy()
    ##############################################
    winColumn = look_for_win(board,color0) # check to find a winning column
    if winColumn > -0.5:
        return winColumn # if there is one - play that!
    legal0 = find_all_nonlosers(board,color0) # find all moves that won't immediately lead to your opponent winning
    if len(legal0) == 0: # if you can't block your opponent - just find the 'best' losing move
        legal0 = find_legal(board)
    ##############################################
    # the code above, in between the hash rows, is not part of traditional MCTS
    # but it makes it better and faster - so I included it!
    # MCTS occasionally makes stupid mistakes
    # like not dropping the checker on a winning column, or not blocking an obvious opponent win
    # this avoids a little bit of that stupidity!
    # we could also add this logic to the rest of the MCTS and rollout functions - I just haven't done that yet...
    # feel free to experiment!
    mcts_dict = {tuple(board.ravel()):[0,0]}
    for ijk in range(nsteps):
        color = color0
        winner = 'nobody'
        board_mcts = board.copy()
        path = [tuple(board_mcts.ravel())]
        while winner == 'nobody':
            legal = find_legal(board_mcts)
            if len(legal) == 0:
                winner = 'tie'
                back_prop(winner,path,color0,mcts_dict)
                break
            board_list = []
            for col in legal:
                board_list.append(tuple(update_board(board_mcts,color,col).ravel()))
            for bl in board_list:
                if bl not in mcts_dict.keys():
                    mcts_dict[bl] = [0,0]
            ucb1 = np.zeros(len(legal))
            for i in range(len(legal)):
                num_denom = mcts_dict[board_list[i]]
                if num_denom[0] == 0:
                    ucb1[i] = 10*nsteps
                else:
                    ucb1[i] = num_denom[1]/num_denom[0] + 2*np.sqrt(np.log(mcts_dict[path[-1]][0])/mcts_dict[board_list[i]][0])
            chosen = np.argmax(ucb1)

            board_mcts = update_board(board_mcts,color,legal[chosen])
            path.append(tuple(board_mcts.ravel()))
            winner = check_for_win(board_mcts,legal[chosen])
            if winner[2]==color[0]:
                back_prop(winner,path,color0,mcts_dict)
                break
            if color == 'plus':
                color = 'minus'
            else:
                color = 'plus'
            if mcts_dict[tuple(board_mcts.ravel())][0] == 0:
                winner = rollout(board_mcts,color)
                back_prop(winner,path,color0,mcts_dict)
                break

    maxval = -np.inf
    best_col = -1
    for col in legal0:
        board_temp = tuple(update_board(board,color0,col).ravel())
        num_denom = mcts_dict[board_temp]
        if num_denom[0] == 0:
            compare = -np.inf
        else:
            compare = num_denom[1] / num_denom[0]
        if compare > maxval:
            maxval = compare
            best_col = col
    return (best_col)


In [18]:
mcts(np.zeros((6,7)),'plus',5000)

3

In [19]:
board = np.zeros((6,7))
winner = 'nobody'
color = 'plus'
while winner == 'nobody':
    if color == 'minus':
        col = mcts(board,color,300)
    else:
        col = mcts(board,color,1500)
    board = update_board(board,color,col)
    winner = check_for_win(board,col)
    if color == 'plus':
        color = 'minus'
    else:
        color = 'plus'
    print(board)
    print('=========================')
print(winner)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]]
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]]
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]]
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]]
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  1.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.]]
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. 

In [20]:
def display_board(board):
    # this function displays the board as ascii using X for +1 and O for -1
    # For the project, this should be a better picture of the board...
    clear_output()
    horizontal_line = '-'*(7*5+8)
    blank_line = '|'+' '*5
    blank_line *= 7
    blank_line += '|'
    print('   0     1     2     3     4     5     6')
    print(horizontal_line)
    for row in range(6):
        print(blank_line)
        this_line = '|'
        for col in range(7):
            if board[row,col] == 0:
                this_line += ' '*5 + '|'
            elif board[row,col] == 1:
                this_line += '  X  |'
            else:
                this_line += '  O  |'
        print(this_line)
        print(blank_line)
        print(horizontal_line)
    print('   0     1     2     3     4     5     6')



In [21]:
# this is how you can play a game
winner = 'nobody'
board = np.zeros((6,7))

display_board(board)

player = 'plus'

while winner == 'nobody':
    move = input('Pick a move (0-6) for player '+player+': ')
    move = int(move)
    board = update_board(board,player,move)
    display_board(board)
    winner = check_for_win(board,move)
    if player == 'plus':
        player = 'minus'
    else:
        player = 'plus'
print('The winner is '+winner)


   0     1     2     3     4     5     6
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |  X  |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |  X  |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |    

In [22]:
def convert_to_onehot(board):
    # 6x7x2 is better for neural networks
    onehot = np.zeros((6,7,2))
    onehot[:,:,0] = (board == 1).astype(int)
    onehot[:,:,1] = (board == -1).astype(int)
    return onehot

## Generate MCTS Dataset


In [23]:
# Configuration
num_games = 2  #change this number for each batch (used 200-500 at a time)
mcts_skill = 1500
use_onehot = True
max_random_opening = 3  # Add diversity to opening positions

print(f"  Games: {num_games}")
print(f"  MCTS skill: {mcts_skill}")
print(f"  Board encoding: {'6x7x2' if use_onehot else '6x7'}")
print(f"  Random opening: 0-{max_random_opening} moves")

  Games: 2
  MCTS skill: 1500
  Board encoding: 6x7x2
  Random opening: 0-3 moves


In [24]:
boards = []
moves = []

for game_num in range(num_games):

    board = np.zeros((6,7))
    winner = 'nobody'
    color = 'plus'

    # Random opening moves - not recorded
    num_random = random.randint(0, max_random_opening)
    for i in range(num_random):
        legal = find_legal(board)
        if len(legal) == 0:
            break
        col = random.choice(legal)
        board = update_board(board, color, col)
        winner = check_for_win(board, col)
        if winner != 'nobody':
            break
        if color == 'plus':
            color = 'minus'
        else:
            color = 'plus'

    # Skip if game ended during opening
    if winner != 'nobody':
        continue

    # Dan's game loop with move counter
    move_num = 0
    while winner == 'nobody':
        # Check if board is full (tie)
        legal = find_legal(board)
        if len(legal) == 0:
            break

        move_num += 1
        print(f"  Game {game_num+1}, Move {move_num}")

        # Get MCTS move
        col = mcts(board, color, mcts_skill)

        # Record position (always from +1 perspective)
        if color == 'plus':
            if use_onehot:
                boards.append(convert_to_onehot(board))
            else:
                boards.append(board.copy())
            moves.append(col)
        else:
            # Flip board for minus player
            if use_onehot:
                board_flipped = convert_to_onehot(board)
                board_flipped = board_flipped[:,:,[1,0]]
                boards.append(board_flipped)
            else:
                boards.append(-board.copy())
            moves.append(col)

        # Make move
        board = update_board(board, color, col)
        winner = check_for_win(board, col)

        # Switch players
        if color == 'plus':
            color = 'minus'
        else:
            color = 'plus'

    print(f'Game {game_num+1}/{num_games} - Winner: {winner}\n')

print(f"\nDone! Collected {len(boards)} positions")

  Game 1, Move 1
  Game 1, Move 2
  Game 1, Move 3
  Game 1, Move 4
  Game 1, Move 5
  Game 1, Move 6
  Game 1, Move 7
  Game 1, Move 8
  Game 1, Move 9
  Game 1, Move 10
  Game 1, Move 11
  Game 1, Move 12
  Game 1, Move 13
  Game 1, Move 14
  Game 1, Move 15
  Game 1, Move 16
  Game 1, Move 17
  Game 1, Move 18
  Game 1, Move 19
  Game 1, Move 20
  Game 1, Move 21
  Game 1, Move 22
  Game 1, Move 23
  Game 1, Move 24
  Game 1, Move 25
  Game 1, Move 26
  Game 1, Move 27
  Game 1, Move 28
  Game 1, Move 29
  Game 1, Move 30
  Game 1, Move 31
  Game 1, Move 32
  Game 1, Move 33
  Game 1, Move 34
  Game 1, Move 35
  Game 1, Move 36
  Game 1, Move 37
  Game 1, Move 38
  Game 1, Move 39
  Game 1, Move 40
Game 1/2 - Winner: h-minus

  Game 2, Move 1
  Game 2, Move 2
  Game 2, Move 3
  Game 2, Move 4
  Game 2, Move 5
  Game 2, Move 6
  Game 2, Move 7
  Game 2, Move 8
  Game 2, Move 9
  Game 2, Move 10
  Game 2, Move 11
  Game 2, Move 12
  Game 2, Move 13
  Game 2, Move 14
  Game 2, Move 15


In [25]:
batch_number = 9  # change this number based on which batch you are running

X_batch = np.array(boards)
Y_batch = np.array(moves)

# Save with batch number in filename
np.save(f"connect4_X_batch{batch_number}.npy", X_batch)
np.save(f"connect4_Y_batch{batch_number}.npy", Y_batch)

print(f" batch {batch_number} saved!")
print(f"   Positions: {len(X_batch):,}")
print(f"   File: connect4_X_batch{batch_number}.npy")

 batch 9 saved!
   Positions: 64
   File: connect4_X_batch9.npy


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Files downloading to your computer!


In [27]:
#convert to arrays and analyze
X = np.array(boards)
Y = np.array(moves)

print("Dataset Statistics:")
print(f"  Total positions: {len(X)}")
print(f"  Board shape: {X.shape}")
print(f"  Moves shape: {Y.shape}")
print(f"  Average positions per game: {len(X)/num_games:.1f}")

Dataset Statistics:
  Total positions: 64
  Board shape: (64, 6, 7, 2)
  Moves shape: (64,)
  Average positions per game: 32.0


In [29]:
#check for duplicates
# Flatten boards to make them hashable
boards_flat = X.reshape(len(X), -1)

# Find unique boards
unique_boards = np.unique(boards_flat, axis=0)
print(f"\nUnique boards: {len(unique_boards)}")
print(f"Duplicate boards: {len(X) - len(unique_boards)}")
print(f"Duplicate rate: {(len(X) - len(unique_boards))/len(X)*100:.1f}%")


Unique boards: 64
Duplicate boards: 0
Duplicate rate: 0.0%


In [None]:
#upload all batches to combine
from google.colab import files
uploaded = files.upload()


Saving connect4_X_Frank.npy to connect4_X_Frank.npy
Saving connect4_X.npy to connect4_X.npy
Saving connect4_Y_Frank.npy to connect4_Y_Frank.npy
Saving connect4_Y.npy to connect4_Y.npy


In [None]:
#combine all batches into a final dataset

# Load all X batches
X1 = np.load('connect4_X_batch1.npy')
X2 = np.load('connect4_X_batch2 (1).npy')
X3 = np.load('connect4_X_batch3.npy')
X4 = np.load('connect4_X_batch4.npy')
X5 = np.load('connect4_X_batch5.npy')
X6 = np.load('connect4_X_batch6.npy')

# Load all Y batches
Y1 = np.load('connect4_Y_batch1.npy')
Y2 = np.load('connect4_Y_batch2.npy')
Y3 = np.load('connect4_Y_batch3.npy')
Y4 = np.load('connect4_Y_batch4.npy')
Y5 = np.load('connect4_Y_batch5.npy')
Y6 = np.load('connect4_Y_batch6.npy')

print("All batches loaded!")

# Combine them
X_final = np.concatenate([X1, X2, X3, X4, X5, X6])
Y_final = np.concatenate([Y1, Y2, Y3, Y4, Y5, Y6])

print("\nFinal Combined Dataset")
print(f" Final shapes:")
print(f"   X: {X_final.shape}")
print(f"   Y: {Y_final.shape}")

print(f"\n Statistics:")
total_positions = len(X_final)
total_games = 6 * 500  # 6 batches √ó 500 games
positions_per_game = total_positions / total_games

print(f"   Total positions: {total_positions:,}")
print(f"   Total games: {total_games:,}")
print(f"   Avg positions/game: {positions_per_game:.1f}")

print(f"\n Move distribution:")
for col in range(7):
    count = np.sum(Y_final == col)
    pct = count / len(Y_final) * 100
    bar = '#' * int(pct / 2)
    print(f"   Col {col}: {bar:15s} {count:5d} ({pct:5.1f}%)")

# Save final dataset
np.save('connect4_X.npy', X_final)
np.save('connect4_Y.npy', Y_final)

# Download them
from google.colab import files
files.download('connect4_X.npy')
files.download('connect4_Y.npy')

print("\nDONE!")

Loading all batches...
‚úÖ All batches loaded!

üéâ FINAL COMBINED DATASET

üìä Final shapes:
   X: (58608, 6, 7, 2)
   Y: (58608,)

üìà Statistics:
   Total positions: 58,608
   Total games: 3,000
   Avg positions/game: 19.5

‚úÖ Move distribution:
   Col 0: #######          8371 ( 14.3%)
   Col 1: ######           7981 ( 13.6%)
   Col 2: #######          8335 ( 14.2%)
   Col 3: ########         9946 ( 17.0%)
   Col 4: #######          8365 ( 14.3%)
   Col 5: ######           7597 ( 13.0%)
   Col 6: ######           8013 ( 13.7%)

‚úÖ FINAL FILES SAVED!
   connect4_X.npy
   connect4_Y.npy


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


üéâ DONE! Your dataset is ready for training!


In [None]:
#Mirror data and verify it's correct
import numpy as np

# Load original data
X_yours = np.load('connect4_X.npy')
Y_yours = np.load('connect4_Y.npy')

print(f"   Original positions: {len(X_yours):,}")
print(f"   Original shape: {X_yours.shape}")

# Mirror the boards (flip left-right)
X_yours_mirrored = np.flip(X_yours, axis=2)  # Flip columns
Y_yours_mirrored = 6 - Y_yours  # Mirror the moves: 0‚Üí6, 1‚Üí5, 2‚Üí4, 3‚Üí3

# Combine original + mirrored
X_yours_doubled = np.concatenate([X_yours, X_yours_mirrored])
Y_yours_doubled = np.concatenate([Y_yours, Y_yours_mirrored])

print(f"\n   Doubled dataset: {len(X_yours_doubled):,} positions")

# Verify mirroring worked correctly
print("\n[3/3] Verification checks...")

# Check: Size doubled
print(f"\n Check 1 - Size:")
print(f"   Original: {len(X_yours):,}")
print(f"   Doubled: {len(X_yours_doubled):,}")
print(f"   Correctly doubled: {len(X_yours_doubled) == len(X_yours) * 2}")


# Check symmetry
col_counts = [np.sum(Y_yours_doubled == col) for col in range(7)]
sym_0_6 = abs(col_counts[0] - col_counts[6])
sym_1_5 = abs(col_counts[1] - col_counts[5])
sym_2_4 = abs(col_counts[2] - col_counts[4])

print(f"\n Check 2 - Symmetry verification:")
print(f"   Col 0 vs Col 6: difference = {sym_0_6}")
print(f"   Col 1 vs Col 5: difference = {sym_1_5}")
print(f"   Col 2 vs Col 4: difference = {sym_2_4}")

if sym_0_6 == 0 and sym_1_5 == 0 and sym_2_4 == 0:
    print(f"  PERFECT SYMMETRY - Mirroring done correctly!")
else:
    print(f"  Something's wrong with mirroring")


# Save doubled data
np.save('connect4_X_yours_doubled.npy', X_yours_doubled)
np.save('connect4_Y_yours_doubled.npy', Y_yours_doubled)

print(f"\n Saved:")
print(f"   connect4_X_yours_doubled.npy ({len(X_yours_doubled):,} positions)")
print(f"   connect4_Y_yours_doubled.npy")

ü™û MIRRORING YOUR DATA

[1/3] Loading your original data...
   Original positions: 72,689
   Original shape: (72689, 6, 7, 2)

[2/3] Mirroring boards and moves...
   ‚úÖ Boards mirrored (flipped left-right)
   ‚úÖ Moves mirrored (0‚Üî6, 1‚Üî5, 2‚Üî4, 3‚Üí3)

   Doubled dataset: 145,378 positions

[3/3] Verification checks...

‚úÖ Check 1 - Size:
   Original: 72,689
   Doubled: 145,378
   Correctly doubled: True

‚úÖ Check 2 - Move distribution (should be symmetric):
   Col 0: #####           20250 ( 13.9%)
   Col 1: #####           19418 ( 13.4%)
   Col 2: #####           20630 ( 14.2%)
   Col 3: ######          24782 ( 17.0%)
   Col 4: #####           20630 ( 14.2%)
   Col 5: #####           19418 ( 13.4%)
   Col 6: #####           20250 ( 13.9%)

‚úÖ Check 3 - Symmetry verification:
   Col 0 vs Col 6: difference = 0
   Col 1 vs Col 5: difference = 0
   Col 2 vs Col 4: difference = 0
   ‚úÖ‚úÖ‚úÖ PERFECT SYMMETRY - Mirroring done correctly!

‚úÖ Check 4 - Sample board comparison:

 

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
#Remove duplicates with majority vote
import numpy as np
from collections import defaultdict, Counter

# Group boards and their moves
board_to_moves = defaultdict(list)
board_to_first_idx = {}

for i in range(len(X_all)):
    board_key = tuple(X_flat[i])
    board_to_moves[board_key].append(Y_all[i])
    if board_key not in board_to_first_idx:
        board_to_first_idx[board_key] = i

# For each unique board, pick the most common move
print(" Computing majority votes...")
X_final = []
Y_final = []

for board_key, moves in board_to_moves.items():
    # Get the most common move (majority vote)
    move_counts = Counter(moves)
    majority_move = move_counts.most_common(1)[0][0]

    # Get the board using first occurrence index
    idx = board_to_first_idx[board_key]
    X_final.append(X_all[idx])
    Y_final.append(majority_move)

X_final = np.array(X_final)
Y_final = np.array(Y_final)

print(f"\n Results:")
print(f"   Original: {len(X_all):,} positions")
print(f"   Unique boards: {len(X_final):,}")
print(f"   Removed: {len(X_all) - len(X_final):,} duplicates")

print(f"\nMove distribution:")
for col in range(7):
    count = np.sum(Y_final == col)
    pct = count / len(Y_final) * 100
    bar = '#' * int(pct / 3)
    print(f"   Col {col}: {bar:15s} {count:5d} ({pct:5.1f}%)")

# Save
np.save('connect4_X_FINAL.npy', X_final)
np.save('connect4_Y_FINAL.npy', Y_final)

print("Saved with majority vote!")
print(f"   connect4_X_FINAL.npy ({len(X_final):,} positions)")
print(f"   connect4_Y_FINAL.npy")

# Download
from google.colab import files
files.download('connect4_X_FINAL.npy')
files.download('connect4_Y_FINAL.npy')

üó≥Ô∏è REMOVING DUPLICATES WITH MAJORITY VOTE

üìä Original: 222,539 positions

‚è≥ Grouping duplicate boards and finding majority votes...
‚è≥ Computing majority votes...


KeyboardInterrupt: 