In [None]:
pip install gym box2d-py tensorflow pygame

In [None]:
import gym
import pygame
import numpy as np
import random
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers.legacy import Adam

# --- Pygame Initialization ---
pygame.init()

# Display settings
SCREEN_WIDTH = 600
SCREEN_HEIGHT = 400
screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
pygame.display.set_caption("Lunar Lander DQN Simulation")

# Load and resize images
background_image = pygame.transform.scale(pygame.image.load("moon.png"), (SCREEN_WIDTH, SCREEN_HEIGHT))
rocket_image = pygame.transform.scale(pygame.image.load("rocket.png"), (150, 150))

# Define colors and fonts
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
iteration_font = pygame.font.Font(None, 24)

def draw_lander(x, y, angle):
    """Draw the rocket on the screen with a given position and angle."""
    screen_x = int((x + 1) * SCREEN_WIDTH / 2)
    screen_y = int(SCREEN_HEIGHT - y * SCREEN_HEIGHT / 2)
    rotated_rocket = pygame.transform.rotate(rocket_image, -np.degrees(angle))
    rocket_rect = rotated_rocket.get_rect(center=(screen_x, screen_y))
    screen.blit(rotated_rocket, rocket_rect.topleft)

def draw_ui_text(episodeIndex, success_percentage):
    """Draw episode and success rate text on the screen."""
    episode_text = iteration_font.render(f"Episode: {episodeIndex+1}", True, WHITE)
    success_text = iteration_font.render(f"Success Rate: {success_percentage:.2f}%", True, WHITE)
    screen.blit(episode_text, (int(SCREEN_WIDTH * 0.07), int(SCREEN_HEIGHT * 0.9)))
    screen.blit(success_text, (int(SCREEN_WIDTH * 0.7), int(SCREEN_HEIGHT * 0.9)))

def draw_landing_pad(successful_landing):
    """Draw the landing pad on the screen."""
    pad_width = 100
    pad_height = 15
    pad_x = (SCREEN_WIDTH - pad_width) // 2
    pad_y = SCREEN_HEIGHT - pad_height
    color = GREEN if successful_landing else WHITE
    pygame.draw.rect(screen, color, (pad_x, pad_y, pad_width, pad_height))

def draw_lunar_ui(x, y, angle, episodeIndex, success_percentage, successful_landing):
    """Draw the entire user interface for the lunar lander simulation."""
    screen.blit(background_image, (0, 0))
    draw_lander(x, y, angle)
    draw_ui_text(episodeIndex, success_percentage)
    draw_landing_pad(successful_landing)
    pygame.display.flip()

class DQN:
    """Deep Q-Network implementation for training the lunar lander."""
    
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount factor
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        """Build the neural network model for the DQN."""
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def update_target_model(self):
        """Update the target model with weights from the main model."""
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        """Store the experience in memory."""
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        """Select an action using the DQN or a random action."""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        """Train the DQN using experiences from the memory."""
        minibatch = random.sample(self.memory, batch_size)
        states = np.array([experience[0][0] for experience in minibatch])
        next_states = np.array([experience[3][0] for experience in minibatch])
        targets = self.model.predict(states)
        next_state_targets = self.target_model.predict(next_states)
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch):
            if done:
                targets[i][action] = reward
            else:
                targets[i][action] = reward + self.gamma * np.amax(next_state_targets[i])
                
        self.model.fit(states, targets, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

env = gym.make('LunarLander-v2')
agent = DQN(env)
batch_size = 32
num_episodes = 1000
successful_landings = 0

# Loop over all episodes for training
for e in range(num_episodes):
    # Initialize the reward for this episode
    total_reward = 0
    # Flag to check if the landing was successful in this episode
    successful_landing = False
    # Reset the environment for a new episode and get initial state
    state, _ = env.reset()
    # Reshape the state to match the expected input shape for the DQN model
    state = np.reshape(state, [1, agent.state_size])
    
    # Loop for each time step in the episode
    for time in range(500):
        # Extract position and angle information from the state
        x = state[0][0]
        y = state[0][1]
        angle = state[0][4]
        # Calculate the success percentage so far
        success_percentage = (successful_landings / (e + 1)) * 100
        # Draw the UI with current state information
        draw_lunar_ui(x, y, angle, e, success_percentage, successful_landing)
        # Get an action from the DQN agent
        action = agent.act(state)
        # Perform the action in the environment
        next_state, reward, done, _, _ = env.step(action)
        # Accumulate the reward
        total_reward += reward
        # Modify the reward if the episode is done
        reward = reward if not done else -10
        # Reshape the next state to match the expected input shape for the DQN model
        next_state = np.reshape(next_state, [1, agent.state_size])
        # Store this experience in the agent's memory
        agent.remember(state, action, reward, next_state, done)
        # Set the current state for the next iteration
        state = next_state

        # Check for user exit request
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                exit()

        # Check if episode is done
        if done:
            # If the total reward is above a threshold, consider it a successful landing
            if total_reward > 50:
                successful_landings += 1
            # Update the target model of the DQN
            agent.update_target_model()
            break
            
        # Every 10 episodes, train the DQN with experiences from memory
        if e % 10 == 0:
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

    # Check the landing conditions at the end of an episode
    if total_reward > 50 and y < 0.1:
        successful_landing = True

    # Wait for a short duration before starting the next episode
    pygame.time.wait(500)

print("Training completed!")
pygame.quit()
