In [4]:
import gym
import pygame
import numpy as np
import random
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers.legacy import Adam

# Initialize Pygame
pygame.init()

# Set up display dimensions
SCREEN_WIDTH = 600
SCREEN_HEIGHT = 400
screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
pygame.display.set_caption("Lunar Lander DQN Simulation")

# Load background image and scale it to fit the screen
background_image = pygame.image.load("moon.png")
background_image = pygame.transform.scale(background_image, (SCREEN_WIDTH, SCREEN_HEIGHT))

# Load the rocket image
rocket_image = pygame.image.load("rocket.png")
rocket_image = pygame.transform.scale(rocket_image, (150, 150))

# Colors and Fonts
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
BROWN = (139, 69, 19)
GREEN = (0, 255, 0)
RED = (255, 0, 0)
font = pygame.font.Font(None, 36)
iteration_font = pygame.font.Font(None, 24)

def draw_lander(x, y, angle):
    # Convert x, y to screen coordinates
    screen_x = int((x + 1) * SCREEN_WIDTH / 2)
    screen_y = int(SCREEN_HEIGHT - y * SCREEN_HEIGHT / 2)
    
    # Rotate the rocket image based on the angle
    rotated_rocket = pygame.transform.rotate(rocket_image, -np.degrees(angle))
    
    # Get the rect of the rotated image and set its center to screen_x, screen_y
    rocket_rect = rotated_rocket.get_rect(center=(screen_x, screen_y))
    
    # Draw the rotated image on the screen
    screen.blit(rotated_rocket, rocket_rect.topleft)

def draw_ui_text(success, episodeIndex, success_percentage):
    episode_text = iteration_font.render(f"Episode: {episodeIndex+1}", True, WHITE)
    screen.blit(episode_text, (int(SCREEN_WIDTH * 0.07), int(SCREEN_HEIGHT * 0.9)))

    success_text = iteration_font.render(f"Success Rate: {success_percentage:.2f}%", True, WHITE)
    screen.blit(success_text, (int(SCREEN_WIDTH * 0.7), int(SCREEN_HEIGHT * 0.9)))

def draw_landing_pad(successful_landing):
    pad_width = 100
    pad_height = 15
    pad_x = (SCREEN_WIDTH - pad_width) // 2
    pad_y = SCREEN_HEIGHT - pad_height
    color = GREEN if successful_landing else WHITE
    pygame.draw.rect(screen, color, (pad_x, pad_y, pad_width, pad_height))

def draw_lunar_ui(x, y, angle, episodeIndex, success_percentage, successful_landing):
    screen.blit(background_image, (0, 0))
    draw_lander(x, y, angle)
    draw_ui_text(True, episodeIndex, success_percentage)
    draw_landing_pad(successful_landing)
    pygame.display.flip()

class DQN:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount factor
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        
        states = np.array([experience[0][0] for experience in minibatch])
        next_states = np.array([experience[3][0] for experience in minibatch])
        
        targets = self.model.predict(states)
        next_state_targets = self.target_model.predict(next_states)
        
        for i, (state, action, reward, next_state, done) in enumerate(minibatch):
            if done:
                targets[i][action] = reward
            else:
                targets[i][action] = reward + self.gamma * np.amax(next_state_targets[i])
                
        self.model.fit(states, targets, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

# --- Main Execution ---

env = gym.make('LunarLander-v2')
agent = DQN(env)
batch_size = 32
num_episodes = 1000
successful_landings = 0

for e in range(num_episodes):
    total_reward = 0
    successful_landing = False  # Initialize here
    state, _ = env.reset()
    state = np.reshape(state, [1, agent.state_size])
    
    for time in range(500):
        x = state[0][0]
        y = state[0][1]
        angle = state[0][4]
        success_percentage = (successful_landings / (e + 1)) * 100
        draw_lunar_ui(x, y, angle, e, success_percentage, successful_landing)

        action = agent.act(state)
        next_state, reward, done, flag, _ = env.step(action)
        total_reward += reward
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, agent.state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                exit()

        if done:
            if total_reward > 50:  # This threshold can be adjusted based on your needs
                successful_landings += 1
            agent.update_target_model()
            break
            
        # Train the DQN every 10 episodes
        if e % 10 == 0:
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

    successful_landing = False
    if total_reward > 50 and y < 0.1:
        successful_landing = True

    pygame.time.wait(500)

print("Training completed!")
pygame.quit()

  if not isinstance(terminated, (bool, np.bool8)):




KeyboardInterrupt: 