In [None]:
import numpy as np
import random
import pickle

# Inisialisasi parameter default
alpha = 0.1    # learning rate
gamma = 0.9    # discount factor
epsilon = 0.5  # exploration rate, mulai dengan nilai tinggi
epsilon_decay = 0.999  # pengurangan epsilon
epsilon_min = 0.1  # nilai minimum epsilon
num_episodes = 10000

# Inisialisasi Q-table
Q = {}

# Fungsi untuk menginisialisasi Q-table
def init_q_table():
    for i in range(3**9):
        state = np.base_repr(i, base=3).zfill(9)
        Q[state] = [random.uniform(-0.01, 0.01) for _ in range(9)]

# Fungsi untuk memilih aksi menggunakan kebijakan epsilon-greedy
def choose_action(state):
    if random.uniform(0, 1) < epsilon:
        return random.randint(0, 8)
    else:
        return np.argmax(Q[state])

# Fungsi untuk memperbarui Q-value
def update_q_table(state, action, reward, next_state):
    best_next_action = np.argmax(Q[next_state])
    td_target = reward + gamma * Q[next_state][best_next_action]
    td_error = td_target - Q[state][action]
    Q[state][action] += alpha * td_error

# Fungsi untuk memeriksa apakah ada pemenang
def check_winner(board):
    for i in range(3):
        if board[i] == board[i+3] == board[i+6] != 0:
            return board[i]
        if board[i*3] == board[i*3+1] == board[i*3+2] != 0:
            return board[i*3]
    if board[0] == board[4] == board[8] != 0 or board[2] == board[4] == board[6] != 0:
        return board[4]
    return 0

# Fungsi untuk menjalankan permainan (Pelatihan)
def play_game():
    board = [0] * 9
    state = ''.join(map(str, board))
    for _ in range(9):
        action = choose_action(state)
        while board[action] != 0:
            action = choose_action(state)
        board[action] = 1
        next_state = ''.join(map(str, board))
        winner = check_winner(board)
        if winner == 1:
            update_q_table(state, action, 1, next_state)
            break
        elif winner == 2:
            update_q_table(state, action, -1, next_state)
            break
        else:
            if 0 not in board:
                update_q_table(state, action, 0, next_state)
                break
        state = next_state

# Fungsi untuk mencetak papan permainan
def print_board(board):
    symbols = {0: ' ', 1: 'X', 2: 'O'}
    print(f"{symbols[board[0]]} | {symbols[board[1]]} | {symbols[board[2]]}")
    print("--+---+--")
    print(f"{symbols[board[3]]} | {symbols[board[4]]} | {symbols[board[5]]}")
    print("--+---+--")
    print(f"{symbols[board[6]]} | {symbols[board[7]]} | {symbols[board[8]]}")

# Fungsi untuk menjalankan permainan interaktif melawan pengguna
def play_interactive_game():
    board = [0] * 9
    state = ''.join(map(str, board))
    print("Mulai permainan! Anda adalah 'O'.")
    for turn in range(9):
        print_board(board)
        if turn % 2 == 0:
            # Giliran agen
            action = choose_action(state)
            while board[action] != 0:
                action = choose_action(state)
            board[action] = 1
            print(f"Agen memilih posisi {action}")
        else:
            # Giliran pengguna
            action = int(input("Pilih posisi (0-8): "))
            while board[action] != 0:
                action = int(input("Posisi tidak valid, pilih posisi lain (0-8): "))
            board[action] = 2
        next_state = ''.join(map(str, board))
        winner = check_winner(board)
        if winner == 1:
            print_board(board)
            print("Agen menang!")
            update_q_table(state, action, 1, next_state)
            break
        elif winner == 2:
            print_board(board)
            print("Anda menang!")
            update_q_table(state, action, -1, next_state)
            break
        else:
            if 0 not in board:
                print_board(board)
                print("Permainan seri!")
                update_q_table(state, action, 0, next_state)
                break
        state = next_state

# Fungsi untuk menyimpan Q-table
def save_q_table(filename):
    with open(filename, 'wb') as f:
        pickle.dump(Q, f)

# Fungsi untuk memuat Q-table
def load_q_table(filename):
    global Q
    with open(filename, 'rb') as f:
        Q = pickle.load(f)

# Fungsi untuk mengatur kesulitan agen
def set_difficulty(level):
    global epsilon, num_episodes
    if level == 'mudah':
        epsilon = 0.9
        num_episodes = 1000
    elif level == 'sedang':
        epsilon = 0.5
        num_episodes = 5000
    elif level == 'sulit':
        epsilon = 0.1
        num_episodes = 10000

# Main loop untuk pelatihan
init_q_table()

# Set kesulitan agen
difficulty = input("Pilih kesulitan agen (mudah/sedang/sulit): ").strip().lower()
set_difficulty(difficulty)

for episode in range(num_episodes):
    play_game()
    global epsilon
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

print("Pembelajaran selesai!")

# Simpan Q-table
save_q_table('q_table.pkl')

# Muat Q-table
load_q_table('q_table.pkl')

# Menjalankan permainan interaktif dengan pengguna
print("Mulai permainan interaktif:")
play_interactive_game()


Pilih kesulitan agen (mudah/sedang/sulit): mudah
Pembelajaran selesai!
Mulai permainan interaktif:
Mulai permainan! Anda adalah 'O'.
  |   |  
--+---+--
  |   |  
--+---+--
  |   |  
Agen memilih posisi 7
  |   |  
--+---+--
  |   |  
--+---+--
  | X |  
Pilih posisi (0-8): 0
O |   |  
--+---+--
  |   |  
--+---+--
  | X |  
Agen memilih posisi 5
O |   |  
--+---+--
  |   | X
--+---+--
  | X |  
Pilih posisi (0-8): 1
O | O |  
--+---+--
  |   | X
--+---+--
  | X |  
Agen memilih posisi 2
O | O | X
--+---+--
  |   | X
--+---+--
  | X |  
Pilih posisi (0-8): 4
O | O | X
--+---+--
  | O | X
--+---+--
  | X |  
Agen memilih posisi 8
O | O | X
--+---+--
  | O | X
--+---+--
  | X | X
Agen menang!
