# Connect4 Data Generation Script
This notebook generates datasets for Connect4 using Monte Carlo Tree Search (MCTS).

## Import Required Libraries
The following libraries are used for data generation, MCTS, and file handling.

In [9]:
import random
import pandas as pd
from copy import deepcopy
from mcts_ucb import MctsAlgo, Node
from connect4API import Connect4
import math
import time
import os
from multiprocess import Process, Queue, Manager
from tqdm import tqdm  # Add tqdm for the main progress bar

## Helper Functions
These functions assist in creating and manipulating the Connect4 board, as well as generating valid game states.

In [None]:
def create_empty_board() -> list[list[str]]:
    return [['-' for _ in range(COLS)] for _ in range(ROWS)]

def make_random_move(board: list[list[str]], col: int, player: str) -> bool:
    for row in reversed(range(ROWS)):
        if board[row][col] == '-':
            board[row][col] = player
            return True
    return False

def generate_valid_board(num_moves: int) -> (list[list[str]], str):
    board = create_empty_board()
    current_player = 'O'
    move_count = 0
    while move_count < num_moves:
        col = random.randint(0, COLS - 1)
        if make_random_move(board, col, current_player):
            move_count += 1
            current_player = 'X' if current_player == 'O' else 'O'
    return board, current_player

def flatten_board(board: list[list[str]]) -> list[str]:
    return [cell for row in board for cell in row]

def display_progress_bar(progress: int, total: int, bar_length: int = 100):
    completed = int(bar_length * progress / total) + 1
    bar = '#' * completed + '-' * (bar_length - completed)
    print(f"\r/{bar}/ {round(progress / total * 100, 1)}%", end='', flush=True)

def generate_dataset(createFor: str, iterations: int) -> list[list[str]]:
    print("hello")
    dataset = []
    mcts = MctsAlgo(C=C0, reset=resetTree0, drawValue=drawValue0, speed="fast")
    connect4 = Connect4(6, 7)
    count = 0
    min_moves, max_moves = GAMESTATES[createFor]

    while count < iterations:
        display_progress_bar(count, iterations)
        num_moves = random.randint(min_moves, max_moves)
        board, turn = generate_valid_board(num_moves)
        connect4.getIntoDesiredState(turn, gameState=board)

        if connect4.checkGameOver(speed="slow"):  # IGNORA estados terminados
            continue

        # MCTS para determinar resultado provável
        mcts.run_mcts(iterations0, connect4)
        bestMove = str(mcts.choose_best_move())

        flat_board = flatten_board(board)
        flat_board.append(bestMove)
        dataset.append(flat_board)
        count += 1

    return dataset

def save_to_excel(dataset: list[list[str]], saveFor: str):
    col_names = [f"cell_{i}" for i in range(42)] + ["result"]
    df = pd.DataFrame(dataset, columns=col_names)
    filename = f"connect4_{saveFor}.xlsx"
    df.to_excel(f'../datasets/{filename}', index=False)
    print(f"Dataset saved as {filename}")

## Configuration Parameters
Define constants and parameters for MCTS and game state distribution.

In [11]:
# Constants
C0 = math.sqrt(2)
iterations0 = 15000
resetTree0 = True
drawValue0 = 0

ROWS = 6
COLS = 7

GAMESTATES = {
    "early": (4, 14),
    "mid": (15, 28),
    "late": (29, 41)
}

ITERATIONS_PER_GAMESTATE = 3333

## Generate Dataset for Early Game States
This section generates datasets with multiprocessing for early game states and saves them to Excel.

In [None]:
dataset = generate_dataset(createFor="early", iterations=ITERATIONS_PER_GAMESTATE)
save_to_excel(dataset=dataset, saveFor="early")


## Generate Dataset for Mid Game States
This section generates datasets for mid game states and saves them to Excel.

In [None]:
def generate_save_mid():
    datasets = generate_dataset(createFor=["mid"])
    save_to_excel(datasets)

In [None]:
generate_save_mid()

TypeError: unhashable type: 'list'

## Generate Dataset for Late Game States
This section generates datasets for late game states and saves them to Excel.

In [None]:
def generate_save_late():
    datasets = generate_dataset(createFor=["late"])
    save_to_excel(datasets)

In [None]:
generate_save_late()

## Check for Duplicate Rows in the Dataset
This cell loads the generated dataset from an Excel file, checks for duplicate rows (excluding the 'result' column), and displays the count and a sample of duplicate rows.

In [None]:
# Load the Excel file
file_path_early = r"other_version\datasets\connect4_early.xlsx"
file_path_mid = r"other_version\datasets\connect4_mid.xlsx"
file_path_late = r"other_version\datasets\connect4_late.xlsx"

for file_path in [file_path_early,file_path_mid,file_path_late]:
    df = pd.read_excel(file_path)

    # Check for duplicates excluding the 'result' column
    duplicate_rows = df[df.duplicated(subset=df.columns[:-1], keep=False)]

    # Count and display the number of duplicate rows and show a few examples
    duplicate_count = len(duplicate_rows)
    duplicate_rows_sample = duplicate_rows.head()

    duplicate_count, duplicate_rows_sample