In [None]:
import pygame

import pong
import printing

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    import os
    os.environ['SDL_VIDEODRIVER']='dummy'

from pong import Pong
from pong_observer import Player
from main import train
import strategy_pattern
from typing import List
from strategy_pattern import Strategy
from play import play
from pong_action import PongAction
pygame.quit()

# Getting to know the environment

Play a game of pong!
W/S: move left paddle up/down
UP/DOWN: move right paddle up/down

In [None]:
play()

# Training the neural network

What do you think, how long it will take until you can see a learning effect?
Run the training and find out!

In [None]:
strategy_pattern.strategies = {}
train(training_time=60)

Next you will implement the relevant methods for a successful training yourself.

# Defining the action space

Teach the neural network which actions exist by assigning a number to each action.

In [None]:
def get_action_map():
    return {
        0: PongAction(True, False), # move paddle up
        # 1: ...
        # 2: ...
    }
strategy_pattern.strategies[Strategy.ACTION_MAP] = get_action_map

# Defining the state space

The input to a neural network are always numbers or a tuple of numbers; e.g. `(12, 3, 4, 5)`.
We cant just throw a `Pong` object into the neural network and say "learn from this".
Extract the relevant information from the `Pong` object and return it as a tuple of numbers.
Note: Press `CTRL` and click on `Pong` in the code to go to the definition.

In [None]:
def get_state(observation: Pong) -> tuple:
    return observation.ticks_this_ball_exchange, 42, 99 # ,observation.somethingElse,...

strategy_pattern.strategies[Strategy.STATE] = get_state

Test if you give the neural network the right information:

In [None]:
train(training_time=60)

# Defining the structure of the neural network

Let's not rely on the code to automatically create a neural network for us.
What do you think, from what we did earlier, influences how the neural network must be structured?

Remember, we use a list notation to define the structure of the neural network.
E.g. E.g. `[4, 2, 3, 1]` created the following neural network:

In [None]:
from PIL import Image
im = Image.open("nn_structure.png")
display(im)

Implement you own network structure!

In [None]:
def get_network_structure() -> List[int]:
    # a neural network with 1 input, two hidden layers of size 2 and 1 output
    return [1, 2, 2, 1]

strategy_pattern.strategies[Strategy.NETWORK_STRUCTURE] = get_network_structure

Let's see if it works!

In [None]:
train(training_time=60)

# Defining a reward function

Now we will define a reward function i.e. tell the neural network what it did right and what it did wrong.
But first, let's define a few helper functions:

In [None]:
def enemy_scored(observation: Pong, next_observation: Pong) -> bool:
    """
    Did the enemy score a point?
    :param observation: how the game state looks like
    :param next_observation: how the game state looks like shortly after
    :return: True if the enemy scored a point, False otherwise
    :note: The enemy is the right side player!
    """
    return True # replace with your own logic

In [None]:
def how_far_is_ball_from_left_paddle(observation: Pong) -> float:
    """
    How far is the ball from the paddle (horizontally)?
    :param observation: how the game state looks like
    :return: The horizontal distance between ball and the left paddle.
    """
    return 0.0 # replace with your own logic

Now let's define the reward function:

In [None]:
def get_reward(observation: Pong, next_observation: Pong) -> float:
    """
    How good was the action the neural network took?
    :param observation: how the game state looks like
    :param next_observation: how the game state looks like shortly after
    :return: A number indicating how good the action was (positive: good, negative: bad, 0: neutral).
    """
    return 0 # your reward strategy goes here

strategy_pattern.strategies = {}
strategy_pattern.strategies[Strategy.REWARD] = get_reward

Before we actually train the neural network, test if the reward function works as expected.
When running the following code, you can control the left paddle while observing the reward printed to the console:

In [None]:
# Don't bother with the details of this code, but ask if you are interested
import copy
flags = copy.copy(printing.print_flags)
printing.print_flags.append(printing.PrintFlag.REWARD)
try:
    play(invincible_enemy=True, debug=True)
finally:
    printing.print_flags = flags

Let's see if it works!

In [None]:
train(training_time=60)

# Transforming observations

Now that we have trained our agent on the left hand side, lets try it for the other side too!

In [None]:
play(ai_enemy=True, swap_players=True)

What do you think? Great?!

Implement a function which "transforms" the observations,
such that we can leverage what we already learned.

In [None]:
def transform_observation(observation: Pong) -> Pong:
    width = pong.width
    height = pong.height

strategy_pattern.strategies[Strategy.TRANSFORM_OBSERVATION] = transform_observation

Let's try again!

In [None]:
play(ai_enemy=True, swap_players=True)