In [23]:
import gym 
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

import numpy as np
import random
import os

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

import pygame
from time import sleep
import Functions_and_classes
from Functions_and_classes import Star
from Functions_and_classes import star_position
from Functions_and_classes import Snake_tail, Snake, Snake_head
from Functions_and_classes import blit

Buliding an Enviroment

In [24]:
#Settting constatnts

WIDTH = HEIGHT = 900
WINDOW = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Snake")
BACKGROUND = (186, 255, 255)
FPS = WIDTH/20
CLOCK = pygame.time.Clock()

SNAKE_HEAD_IMG = pygame.image.load(os.path.join('Assets', 'Snake_head.png'))
SNAKE_TAIL_IMG = pygame.image.load(os.path.join('Assets', 'Snake_tail.png'))
SNAKE_HEAD = SNAKE_HEAD_IMG
STAR_IMG = pygame.image.load(os.path.join('Assets', 'Star.png'))

SPEED = 15

pygame.font.init() 
normal_size = pygame.font.SysFont('Comic Sans MS', round(WIDTH/30))
huge_size = pygame.font.SysFont('Comic Sans MS', round(HEIGHT/18))

WINDOW.fill(BACKGROUND)
pygame.display.update()
label = huge_size.render("Welcome to Snake in Python!", 12, (0,0,0))
blit(label, (WIDTH/2)-(WIDTH/4), (HEIGHT/2)-(HEIGHT/9), window=WINDOW)
pygame.display.update()



In [25]:
def Make_board(star_list, head, tail_list):
    Table = np.zeros(shape=(int(WIDTH), int(HEIGHT+2)))
    
    #Creating a board imagine
    for star in star_list: #Setting stars
        Table[int(star.Rectangle.x), int(star.Rectangle.y)] = 3 
    
    Table[int(head.Rectangle.x), int(head.Rectangle.y)] = 1 #Creating Head
    for tail in tail_list:
        Table[int(tail.Rectangle.x), int(tail.Rectangle.y)] = 2 #Creating tail
    
    return Table
    

In [26]:
class SnakeEnv(Env):
    def __init__(self):
        #Things for stable_baselines3
        self.action_space = Discrete(4) # --> 0 - free space, 1 - Head, 2 - Tail, 3 - Star
        self.observation_space = Box(low=0, high=5, shape=(WIDTH, HEIGHT+2), dtype=int)
        self.Hero = Snake(window=WINDOW)
        self.last_score = self.Hero.score  #Setting init score
        
        #Creating a star list
        star_list = [Star(star_position(WIDTH), star_position(HEIGHT), window=WINDOW)]
        
        for i in range(int(WIDTH/30)):
            star_list.append(Star(star_position(WIDTH), star_position(HEIGHT), window=WINDOW))
        
        self.star_list = star_list

        #star_list_np = np.array([star_list.Rectangle.x, star_list.Rectangle.y], dtype=int) / 30 #Creating a np.array of position of stars in game space

        self.state = Make_board(star_list=self.star_list, head=self.Hero, tail_list=self.Hero.list_of_tails)
        print(type(self.state))
        self.learing_time = 200
        pass
    
    def step(self, action):
        # INIT A NEXT STEP
        
        global FPS 
        local_FPS = self.Hero.score + FPS
        CLOCK.tick(local_FPS) 
        WINDOW.fill(BACKGROUND)

        if (self.Hero.score > self.last_score) and (len(self.star_list) != WIDTH/60):
            self.star_list.pop(-1)
            self.last_score = self.Hero.score

        # Score sign
        label = normal_size.render("Score: {0}".format(self.Hero.score), 12, (0,0,0))
        WINDOW.blit(label, (0, 0))
        for i in self.star_list:
            i.draw()
        
        reward = self.Hero.score
        # MAKING ACTIONS
        
        #Difrent action changing state
        # Left = 0, Up = 1, Right = 2, Down =3
        if action == 0:
            self.Hero.rotate(90)
        if action == 2:
            self.Hero.rotate(270)
        if action == 3:
            self.Hero.rotate(180)
        if action == 1:
            self.Hero.rotate(0)  


        self.Hero.move() #Moving and colecting stars
        for i in self.star_list:
            self.Hero.collect_star(i)

        # ENDING A STEP PROCESS
        self.state = Make_board(star_list=self.star_list, head=self.Hero, tail_list=self.Hero.list_of_tails)

        info = {}

        if self.learing_time <= 0:
            done = True
        else:
            self.learing_time -= 1
            done = False

        pygame.display.update() #Update at very end!

        WINDOW.fill(BACKGROUND)    

        return self.state, reward, done, info
    
    def render(self):
        pass
    
    def reset(self):
        self.Hero = Snake(window=WINDOW)
        self.last_score = self.Hero.score  #Setting init score
        
        #Creating a star list
        star_list = [Star(star_position(WIDTH), star_position(HEIGHT), window=WINDOW)]
        
        for i in range(int(WIDTH/30)):
            star_list.append(Star(star_position(WIDTH), star_position(HEIGHT), window=WINDOW))
        
        self.star_list = star_list

        #star_list_np = np.array(star_list, dtype=int) / 30 #Creating a np.array of position of stars in game space

        self.state = Make_board(star_list=self.star_list, head=self.Hero, tail_list=self.Hero.list_of_tails)

        self.learing_time = 200
        
        return self.state

    def close(self):
        pygame.quit()
        pass

In [27]:
env = SnakeEnv()
env = DummyVecEnv([lambda: env])

<class 'numpy.ndarray'>


In [28]:
log_path = os.path.join('Training', 'Logs')
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device


In [29]:
model.learn(total_timesteps=10)

Logging to Training/Logs/PPO_10


In [None]:
model.save(os.path.join('Training', 'Saved_models', 'Snake_PPO_100'))

In [None]:
env.close()