<a href="https://colab.research.google.com/github/Ulrike13/KrestikiNoliki/blob/main/KrestikiNoliki.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import random
import numpy as np

BOARD_SIZE = 9
WINNING_COMBINATIONS = [
    [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Горизонтали
    [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Вертикали
    [0, 4, 8], [2, 4, 6]              # Диагонали
]

def initialize_board():
    return [' ' for _ in range(BOARD_SIZE)]

def draw_board(board):
    print('-------------')
    for i in range(0, BOARD_SIZE, 3):
        print('|', board[i], '|', board[i + 1], '|', board[i + 2], '|')
    print('-------------')

def check_winner(board, symbol):
    return any(all(board[cell] == symbol for cell in combo) for combo in WINNING_COMBINATIONS)

def player_move(board, available_actions):
    while True:
        try:
            action = int(input("Введите номер ячейки (от 0 до 8): "))
            if action in available_actions:
                return action
            else:
                print("Неправильный ход, попробуйте еще раз.")
        except ValueError:
            print("Введите корректное число.")

def agent_move(board, available_actions, exploration_rate, Q):
    if random.uniform(0, 1) < exploration_rate:
        return random.choice(available_actions)
    else:
        return max(available_actions, key=lambda x: Q[x])

def update_q_values(Q, action, reward, alpha):
    Q[action] += alpha * (reward - Q[action])
    return Q

def train_agent(episodes, alpha, gamma, exploration_rate):
    global Q
    for _ in range(episodes):
        board = initialize_board()
        available_actions = list(range(BOARD_SIZE))

        while ' ' in board:
            # Агент делает ход
            action = agent_move(board, available_actions, exploration_rate, Q)
            available_actions.remove(action)
            board[action] = 'O'  # Исправлено здесь

            # Обновляем таблицу Q-значений
            if check_winner(board, ' '):
                reward = 0
            elif check_winner(board, 'O'):
                reward = 1
            elif check_winner(board, 'X'):
                reward = -1
            else:
                next_action = agent_move(board, available_actions, exploration_rate, Q)
                reward = gamma * Q[next_action]

            Q = update_q_values(Q, action, reward, alpha)

            if reward != 0:
                break

def play_game(exploration_rate):
    global Q
    board = initialize_board()
    available_actions = list(range(BOARD_SIZE))

    while ' ' in board:
        # Ход игрока
        draw_board(board)
        action = player_move(board, available_actions)
        available_actions.remove(action)
        board[action] = 'X'

        if check_winner(board, 'X'):
            draw_board(board)
            print("Игрок победил!")
            return

        # Ход агента
        action = agent_move(board, available_actions, exploration_rate, Q)
        available_actions.remove(action)
        board[action] = 'O'

        if check_winner(board, 'O'):
            draw_board(board)
            print("Агент победил!")
            return

    draw_board(board)
    print("Ничья!")

# Параметры обучения
alpha = 0.1
gamma = 0.6
exploration_rate = 0.1
episodes = 10000

# Инициализация Q-значений
Q = np.zeros(BOARD_SIZE)

# Обучение агента
train_agent(episodes, alpha, gamma, exploration_rate)

# Игра с обученным агентом
play_game(exploration_rate)


-------------
|   |   |   |
|   |   |   |
|   |   |   |
-------------
Введите номер ячейки (от 0 до 8): 4
-------------
| O |   |   |
|   | X |   |
|   |   |   |
-------------
Введите номер ячейки (от 0 до 8): 2
-------------
| O |   | X |
|   | X |   |
|   |   | O |
-------------
Введите номер ячейки (от 0 до 8): 6
-------------
| O |   | X |
|   | X |   |
| X |   | O |
-------------
Игрок победил!
