In [1]:
import pygame
import numpy as np
import pickle
import os
import time
from datetime import datetime

class QLearningAI:
    def __init__(self, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.q_table = {}
        self.actions = [0, 1, 2]  # stay, up, down
        
        # Load existing Q-table if available
        self.load_qtable()
    
    def get_state(self, paddle_y, ball_x, ball_y, ball_dx, ball_dy):
        """Simplified state space"""
        # Reduce precision to limit state space
        paddle_y = paddle_y // 60
        ball_x = ball_x // 100
        ball_y = ball_y // 60
        ball_direction = 0 if ball_dx > 0 else 1
        
        return (paddle_y, ball_x, ball_y, ball_direction)
    
    def choose_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(len(self.actions))
        
        if np.random.random() < self.epsilon:
            return np.random.choice(self.actions)
        return np.argmax(self.q_table[state])
    
    def learn(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(len(self.actions))
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros(len(self.actions))
        
        old_value = self.q_table[state][action]
        next_max = np.max(self.q_table[next_state])
        new_value = (1 - self.lr) * old_value + self.lr * (reward + self.gamma * next_max)
        self.q_table[state][action] = new_value
    
    def save_qtable(self):
        with open('pong_qtable.pkl', 'wb') as f:
            pickle.dump(self.q_table, f)
    
    def load_qtable(self):
        if os.path.exists('pong_qtable.pkl'):
            try:
                with open('pong_qtable.pkl', 'rb') as f:
                    self.q_table = pickle.load(f)
                print("Loaded existing Q-table")
            except:
                print("Created new Q-table")

def train_ai(episodes=100, display_training=False):
    """Separate training function"""
    print(f"Starting AI training for {episodes} episodes...")
    start_time = time.time()
    
    # Initialize pygame with minimal setup for training
    pygame.init()
    if display_training:
        screen = pygame.display.set_mode((800, 600))
    else:
        screen = pygame.display.set_mode((800, 600), flags=pygame.HIDDEN)
    
    # Initialize AI
    ai = QLearningAI()
    
    # Training parameters
    paddle_height = 90
    ball_size = 15
    paddle_speed = 5
    ball_speed = 7
    
    for episode in range(episodes):
        # Reset positions
        paddle_y = 300
        ball_x, ball_y = 400, 300
        ball_dx = ball_speed * (1 if np.random.random() > 0.5 else -1)
        ball_dy = ball_speed * (1 if np.random.random() > 0.5 else -1)
        
        episode_reward = 0
        max_steps = 1000
        steps = 0
        
        while steps < max_steps:
            steps += 1
            
            # Get current state
            state = ai.get_state(paddle_y, ball_x, ball_y, ball_dx, ball_dy)
            
            # Choose and perform action
            action = ai.choose_action(state)
            if action == 1:  # up
                paddle_y = max(0, paddle_y - paddle_speed)
            elif action == 2:  # down
                paddle_y = min(600 - paddle_height, paddle_y + paddle_speed)
            
            # Update ball position
            ball_x += ball_dx
            ball_y += ball_dy
            
            # Ball collisions
            if ball_y <= 0 or ball_y >= 600 - ball_size:
                ball_dy *= -1
            
            # Paddle collision
            if (750 <= ball_x <= 760 and
                paddle_y <= ball_y <= paddle_y + paddle_height):
                ball_dx *= -1
                reward = 1
            elif ball_x >= 800:
                reward = -1
                break
            else:
                reward = 0
            
            # Get next state and learn
            next_state = ai.get_state(paddle_y, ball_x, ball_y, ball_dx, ball_dy)
            ai.learn(state, action, reward, next_state)
            episode_reward += reward
            
            if display_training and steps % 3 == 0:  # Update display less frequently
                screen.fill((0, 0, 0))
                pygame.draw.rect(screen, (255, 255, 255), (760, paddle_y, 10, paddle_height))
                pygame.draw.rect(screen, (255, 255, 255), (ball_x, ball_y, ball_size, ball_size))
                pygame.display.flip()
        
        # Save progress periodically
        if (episode + 1) % 10 == 0:
            ai.save_qtable()
            elapsed_time = time.time() - start_time
            print(f"Episode {episode + 1}/{episodes} - Reward: {episode_reward:.2f} - Time: {elapsed_time:.1f}s")
    
    pygame.quit()
    print("Training completed!")
    return ai

class PongGame:
    def __init__(self):
        pygame.init()
        self.screen = pygame.display.set_mode((800, 600))
        pygame.display.set_caption("Pong with Q-Learning AI")
        
        self.paddle_height = 90
        self.ball_size = 15
        self.paddle_speed = 5
        self.ball_speed = 7
        
        self.reset_game()
        self.ai = QLearningAI()
        self.clock = pygame.time.Clock()
    
    def reset_game(self):
        self.player_y = 300
        self.ai_y = 300
        self.ball_x, self.ball_y = 400, 300
        self.ball_dx = self.ball_speed * (1 if np.random.random() > 0.5 else -1)
        self.ball_dy = self.ball_speed * (1 if np.random.random() > 0.5 else -1)
        self.score = [0, 0]
    
    def run(self):
        running = True
        while running:
            # Event handling
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
            
            # Player movement
            keys = pygame.key.get_pressed()
            if keys[pygame.K_UP]:
                self.player_y = max(0, self.player_y - self.paddle_speed)
            if keys[pygame.K_DOWN]:
                self.player_y = min(600 - self.paddle_height, self.player_y + self.paddle_speed)
            
            # AI movement
            state = self.ai.get_state(self.ai_y, self.ball_x, self.ball_y, self.ball_dx, self.ball_dy)
            action = self.ai.choose_action(state)
            if action == 1:
                self.ai_y = max(0, self.ai_y - self.paddle_speed)
            elif action == 2:
                self.ai_y = min(600 - self.paddle_height, self.ai_y + self.paddle_speed)
            
            # Ball movement
            self.ball_x += self.ball_dx
            self.ball_y += self.ball_dy
            
            # Ball collisions
            if self.ball_y <= 0 or self.ball_y >= 600 - self.ball_size:
                self.ball_dy *= -1
            
            # Paddle collisions
            if (30 <= self.ball_x <= 40 and
                self.player_y <= self.ball_y <= self.player_y + self.paddle_height):
                self.ball_dx *= -1
            elif (750 <= self.ball_x <= 760 and
                  self.ai_y <= self.ball_y <= self.ai_y + self.paddle_height):
                self.ball_dx *= -1
            
            # Scoring
            if self.ball_x < 0:
                self.score[1] += 1
                self.reset_game()
            elif self.ball_x > 800:
                self.score[0] += 1
                self.reset_game()
            
            # Drawing
            self.screen.fill((0, 0, 0))
            pygame.draw.rect(self.screen, (255, 255, 255), (30, self.player_y, 10, self.paddle_height))
            pygame.draw.rect(self.screen, (255, 255, 255), (760, self.ai_y, 10, self.paddle_height))
            pygame.draw.rect(self.screen, (255, 255, 255), (self.ball_x, self.ball_y, self.ball_size, self.ball_size))
            
            # Draw score
            font = pygame.font.Font(None, 74)
            score_text = font.render(f"{self.score[0]} - {self.score[1]}", True, (255, 255, 255))
            self.screen.blit(score_text, (350, 50))
            
            pygame.display.flip()
            self.clock.tick(60)
        
        pygame.quit()

if __name__ == "__main__":
    try:
        # Ask user if they want to train or play
        choice = input("Do you want to (t)rain the AI or (p)lay the game? (t/p): ").lower()
        
        if choice == 't':
            episodes = int(input("Enter number of training episodes (recommended 100-1000): "))
            display = input("Display training process? (y/n): ").lower() == 'y'
            train_ai(episodes, display_training=display)
        elif choice == 'p':
            game = PongGame()
            game.run()
        else:
            print("Invalid choice!")
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        input("Press Enter to exit...")






pygame 2.6.0 (SDL 2.28.4, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html
Do you want to (t)rain the AI or (p)lay the game? (t/p): t
Enter number of training episodes (recommended 100-1000): 1000
Display training process? (y/n): y
Starting AI training for 1000 episodes...
Loaded existing Q-table
Episode 10/1000 - Reward: 0.00 - Time: 2.5s
Episode 20/1000 - Reward: 0.00 - Time: 6.4s
Episode 30/1000 - Reward: 0.00 - Time: 10.5s
Episode 40/1000 - Reward: 0.00 - Time: 13.7s
Episode 50/1000 - Reward: 0.00 - Time: 20.6s
Episode 60/1000 - Reward: 0.00 - Time: 25.6s
Episode 70/1000 - Reward: 0.00 - Time: 30.4s
Episode 80/1000 - Reward: 0.00 - Time: 33.5s
Episode 90/1000 - Reward: 0.00 - Time: 36.1s
Episode 100/1000 - Reward: 0.00 - Time: 39.3s
Episode 110/1000 - Reward: 0.00 - Time: 40.8s
Episode 120/1000 - Reward: 0.00 - Time: 45.3s
Episode 130/1000 - Reward: 0.00 - Time: 48.1s
Episode 140/1000 - Reward: 0.00 - Time: 49.2s
Episode 150/1000 - Reward: 0.00

In [3]:
import pygame
import random
import numpy as np

class PongGame:
    def __init__(self, width=800, height=600):
        # Initialize Pygame
        pygame.init()
        
        # Game constants
        self.WIDTH = width
        self.HEIGHT = height
        self.PADDLE_WIDTH = 15
        self.PADDLE_HEIGHT = 90
        self.BALL_SIZE = 15
        self.PADDLE_SPEED = 5
        self.BALL_SPEED = 7
        
        # Colors
        self.WHITE = (255, 255, 255)
        self.BLACK = (0, 0, 0)
        
        # Create game window
        self.screen = pygame.display.set_mode((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Pong with AI")
        
        # Initialize game objects
        self.player_paddle = pygame.Rect(50, self.HEIGHT//2 - self.PADDLE_HEIGHT//2,
                                       self.PADDLE_WIDTH, self.PADDLE_HEIGHT)
        self.ai_paddle = pygame.Rect(self.WIDTH - 50 - self.PADDLE_WIDTH, 
                                   self.HEIGHT//2 - self.PADDLE_HEIGHT//2,
                                   self.PADDLE_WIDTH, self.PADDLE_HEIGHT)
        self.ball = pygame.Rect(self.WIDTH//2 - self.BALL_SIZE//2,
                              self.HEIGHT//2 - self.BALL_SIZE//2,
                              self.BALL_SIZE, self.BALL_SIZE)
        
        # Initial ball velocity
        self.ball_speed_x = self.BALL_SPEED
        self.ball_speed_y = self.BALL_SPEED
        
        # Score
        self.player_score = 0
        self.ai_score = 0
        self.font = pygame.font.Font(None, 74)
        
        # Clock for controlling game speed
        self.clock = pygame.time.Clock()
        
    def reset_ball(self):
        """Reset ball to center with random direction"""
        self.ball.center = (self.WIDTH//2, self.HEIGHT//2)
        self.ball_speed_x = self.BALL_SPEED * random.choice((1, -1))
        self.ball_speed_y = self.BALL_SPEED * random.choice((1, -1))
    
    def ai_move(self):
        """AI paddle movement logic"""
        # Predictive AI: Calculate where ball will intersect with AI paddle's x-position
        if self.ball_speed_x > 0:  # Only move if ball is moving towards AI
            # Calculate time to intersection
            time_to_intersect = (self.ai_paddle.left - self.ball.centerx) / self.ball_speed_x
            # Predict y position
            predicted_y = self.ball.centery + self.ball_speed_y * time_to_intersect
            
            # Add some randomness to make AI imperfect
            predicted_y += random.randint(-30, 30)
            
            # Move paddle towards predicted position
            if self.ai_paddle.centery < predicted_y:
                self.ai_paddle.y += self.PADDLE_SPEED
            if self.ai_paddle.centery > predicted_y:
                self.ai_paddle.y -= self.PADDLE_SPEED
        
        # Keep paddle within screen bounds
        if self.ai_paddle.top <= 0:
            self.ai_paddle.top = 0
        if self.ai_paddle.bottom >= self.HEIGHT:
            self.ai_paddle.bottom = self.HEIGHT
    
    def update_ball(self):
        """Update ball position and handle collisions"""
        # Move ball
        self.ball.x += self.ball_speed_x
        self.ball.y += self.ball_speed_y
        
        # Ball collision with top and bottom
        if self.ball.top <= 0 or self.ball.bottom >= self.HEIGHT:
            self.ball_speed_y *= -1
        
        # Ball collision with paddles
        if self.ball.colliderect(self.player_paddle) or self.ball.colliderect(self.ai_paddle):
            self.ball_speed_x *= -1
            # Add some randomness to y speed after paddle hits
            self.ball_speed_y += random.uniform(-1, 1)
        
        # Score points
        if self.ball.left <= 0:
            self.ai_score += 1
            self.reset_ball()
        if self.ball.right >= self.WIDTH:
            self.player_score += 1
            self.reset_ball()
    
    def run(self):
        """Main game loop"""
        running = True
        while running:
            # Event handling
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
            
            # Player paddle movement
            keys = pygame.key.get_pressed()
            if keys[pygame.K_UP] and self.player_paddle.top > 0:
                self.player_paddle.y -= self.PADDLE_SPEED
            if keys[pygame.K_DOWN] and self.player_paddle.bottom < self.HEIGHT:
                self.player_paddle.y += self.PADDLE_SPEED
            
            # AI movement
            self.ai_move()
            
            # Update ball
            self.update_ball()
            
            # Drawing
            self.screen.fill(self.BLACK)
            pygame.draw.rect(self.screen, self.WHITE, self.player_paddle)
            pygame.draw.rect(self.screen, self.WHITE, self.ai_paddle)
            pygame.draw.ellipse(self.screen, self.WHITE, self.ball)
            pygame.draw.aaline(self.screen, self.WHITE, 
                             (self.WIDTH//2, 0), (self.WIDTH//2, self.HEIGHT))
            
            # Draw scores
            player_text = self.font.render(str(self.player_score), True, self.WHITE)
            ai_text = self.font.render(str(self.ai_score), True, self.WHITE)
            self.screen.blit(player_text, (self.WIDTH//4, 20))
            self.screen.blit(ai_text, (3*self.WIDTH//4, 20))
            
            # Update display
            pygame.display.flip()
            
            # Control game speed
            self.clock.tick(60)
        
        pygame.quit()

# Create and run the game
if __name__ == "__main__":
    game = PongGame()
    game.run()
