import numpy as np import tensorflow as tf
class Connect4Environment: def init(self): self.board = np.zeros((6, 7), dtype=np.int) # 6x7 game board self.current_player = 1 # Player 1 starts
def reset(self):
self.board = np.zeros((6, 7), dtype=np.int)
self.current_player = 1
def is_valid_move(self, action):
# Check if the selected column is not full
return self.board[0][action] == 0
def make_move(self, action):
# Place a player's piece in the selected column
for row in range(5, -1, -1):
if self.board[row][action] == 0:
self.board[row][action] = self.current_player
break
self.current_player = 3 - self.current_player # Switch player
def get_game_state(self):
# Encode the current state of the game board
return self.board
def is_game_over(self):
# Check for a win condition or a full board
# Implement win-checking logic here
return False
class QLearningAgent: def init(self, state_space_size, action_space_size, learning_rate, discount_factor): self.q_table = np.zeros((state_space_size, action_space_size)) self.learning_rate = learning_rate self.discount_factor = discount_factor self.epsilon = 0.1 # Exploration rate
def choose_action(self, state):
if np.random.rand() < self.epsilon:
return np.random.choice(len(self.q_table[state]))
else:
return np.argmax(self.q_table[state])
def update_q_table(self, state, action, reward, next_state):
best_next_action = np.argmax(self.q_table[next_state])
self.q_table[state][action] += self.learning_rate * \
(reward + self.discount_factor * self.q_table[next_state][best_next_action] - self.q_table[state][action])
num_episodes = 1000 learning_rate = 0.1 discount_factor = 0.99
env = Connect4Environment()
state_space_size = 6 * 7 # 6 rows x 7 columns action_space_size = 7 # 7 possible column choices agent = QLearningAgent(state_space_size, action_space_size, learning_rate, discount_factor)
for episode in range(num_episodes): state = env.get_game_state() done = False while not done: action = agent.choose_action(state) if env.is_valid_move(action): env.make_move(action) next_state = env.get_game_state() if env.is_game_over(): reward = 1 # The agent wins done = True else: reward = 0 agent.update_q_table(state, action, reward, next_state) state = next_state else: continue
env.reset()