In [None]:
!git clone https://github.com/Yasgant/project-nmnbns.git
!cp ./project-nmnbns/game/* ./ -rf
!pip install pygame
!mkdir replay

Cloning into 'project-nmnbns'...
remote: Enumerating objects: 107, done.[K
remote: Counting objects: 100% (107/107), done.[K
remote: Compressing objects: 100% (75/75), done.[K
remote: Total 107 (delta 58), reused 76 (delta 30), pack-reused 0[K
Receiving objects: 100% (107/107), 73.29 KiB | 6.66 MiB/s, done.
Resolving deltas: 100% (58/58), done.
Collecting pygame
  Downloading pygame-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 540 kB/s 
[?25hInstalling collected packages: pygame
Successfully installed pygame-2.1.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
action_size = 9
stack_size = 4
learning_rate = 0.0025
state_size = (101, 121, stack_size)
total_episodes = 10000
batch_size = 64
gamma = 0.95
remember_size = 100000
pre_train = 5
epsilon = 0.1
new_img_size = (50, 30)

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import deque
import random
import sys
sys.path.append('/content')
from core import *
from danmaku import *
from enemies import *
from tensorflow.keras import layers, optimizers, models
import copy
from collections import deque

pygame 2.1.2 (SDL 2.0.16, Python 3.7.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [None]:
def discount_reward(rewards, gamma = gamma):
    prior = 0
    out = np.zeros_like(rewards)
    for i in reversed(range(len(rewards))):
        prior = prior * gamma + rewards[i]
        out[i] = prior
    return out / np.std(out - np.mean(out))

class MyModel:
    def __init__(self, state_size = state_size, action_size = action_size, learning_rate = learning_rate):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.model = models.Sequential([
            layers.Input(shape = (*new_img_size, stack_size)),
            layers.Conv2D(filters = 32, kernel_size = 4),
            layers.Conv2D(filters = 64, kernel_size = 2),
            layers.Flatten(),
            layers.Dense(512, activation = 'relu'),
            layers.Dense(64, activation = 'relu'),
            layers.Dense(action_size, activation = 'softmax')
        ])
        self.model.compile(loss = 'mean_squared_error', optimizer = optimizers.Adam(learning_rate))
    
    def predict(self, img, eps = epsilon):
        if np.random.random() < eps:
            return np.random.randint(action_size)
        prob = np.array(self.model(np.array([img])))[0]
        return np.random.choice(len(prob), p=prob)
    
    def predicts(self, imgs):
        return np.array(self.model(imgs))
    
    def train(self, rec):
        imgs_batch = np.array([r[0] for r in rec])
        action_batch = np.array([[1 if r[1] == j else 0 for j in range(action_size)] for r in rec])
        prob_batch = self.model.predict(imgs_batch) * action_batch
        r_batch = discount_reward(np.array([r[2] for r in rec]))
        self.model.fit(imgs_batch, prob_batch, sample_weight = r_batch, verbose = 0)

In [None]:
stacked_imgs = deque([np.zeros(new_img_size) for i in range(stack_size)], maxlen = stack_size)

def stack_img(stacked_imgs, img, fir = False):
    stacked_imgs.append(img)
    if fir:
        for i in range(stack_size - 1):
            stacked_imgs.append(img)
    stacked_img = np.stack(stacked_imgs, axis = 2)
    return stacked_img, stacked_imgs

In [None]:
class Memory:
    def __init__(self, maxlen = remember_size):
        self.queue = deque(maxlen = maxlen)
    
    def remember(self, img):
        self.queue.append(img)
    
    def sample(self, size = batch_size):
        return random.sample(self.queue, size)
    
    def sample_all(self):
        return self.queue
    

In [None]:
data = StageData.empty('S1',10*60*60)
# 0 ~ 1000
data.insert(0, AimmingEnemy3(100, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(9, AimmingEnemy3(110, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(18, AimmingEnemy3(120, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(27, AimmingEnemy3(130, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(36, AimmingEnemy3(140, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(45, AimmingEnemy3(150, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(54, AimmingEnemy3(160, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(63, AimmingEnemy3(170, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(72, AimmingEnemy3(180, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))
data.insert(81, AimmingEnemy3(190, 100, shoot_time = 10, bullet_speed=10, arc = np.pi / 24, alive_time=10*60))

# 1000 ~ 1500
data.insert(1000, AimmingEnemy(100, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1020, AimmingEnemy(100, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1040, AimmingEnemy(100, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1060, AimmingEnemy(100, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1080, AimmingEnemy(100, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1100, AimmingEnemy(100, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1000, AimmingEnemy(300, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1020, AimmingEnemy(300, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1040, AimmingEnemy(300, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1060, AimmingEnemy(300, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1080, AimmingEnemy(300, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))
data.insert(1100, AimmingEnemy(300, 10, shoot_time = 2, bullet_speed=10, alive_time=3*60, vy = 3))

# 1500 ~ 2500
data.insert(1500, RandomEnemy(200, 100, shoot_time = 5, bullet_speed=5, alive_time=7*60))

# 2500 ~ 3500
data.insert(2500, RandomEnemyWithFall(200, 100, shoot_time = 5, bullet_speed=5, alive_time=7*60))

In [None]:
def proc(img):
    new_img = np.zeros(new_img_size)
    x, y = 0, 0
    flag = False
    for i in range(len(img)):
        for j in range(len(img[0])):
            if img[i][j] == 0.3:
                x, y = i, j
                flag = True
                break
        if flag:
            break
    def in_map(x, y):
        return 0 <= x < state_size[0] and 0 <= y < state_size[1]
    for dx in range(-25, 25):
        for dy in range(-25, 5):
            if in_map(x+dx, y+dy):
                new_img[25+dx, 25+dy] = img[x+dx, y+dy]
            else:
                new_img[25+dx, 25+dy] = 1
    return new_img

In [None]:
#train
model = MyModel()
for j in range(total_episodes):
    memory = Memory()
    with open('./replay/'+str(j)+'.txt', "w") as f:
        G = game_with_op(Player(320, 400), copy.deepcopy(data))
        total_reward = 0
        flag = True
        while True:
            if flag:
                img = G.get_img()
                img = proc(img)
                img, stacked_imgs = stack_img(stacked_imgs, img, True)
                flag = False
            action = model.predict(img)
            f.write(str(action)+'\n')
            reward, done = G.op(action)
            total_reward += reward
            #print('Frame: {}, action: {}, reward: {:.0f}'.format(G.frame, action, total_reward))
            #print('X: {:.1f} Y: {:.1f}'.format(G.player.x, G.player.y))
            if done:
                next_img = np.zeros(new_img_size)
                next_img, stacked_imgs = stack_img(stacked_imgs, next_img)
                memory.remember((img, action, reward, next_img))
                model.train(memory.sample_all())
                print("Episode {} finished! Frames: {} Reward: {}".format(j, G.frame, total_reward))
            else:
                next_img = G.get_img()
                next_img = proc(next_img)
                #while (next_img != 0.7).all():
                    #G.op(0)
                    #f.write('0\n')
                    #print('skipping')
                    #next_img = G.get_img()
                next_img, stacked_imgs = stack_img(stacked_imgs, next_img)
                memory.remember((img, action, reward, next_img))
                img = next_img
            
            if done:
                if j % 100 == 99:
                    model.model.save('./drive/MyDrive/models/')
                break



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Episode 5050 finished! Frames: 84 Reward: -84600.39428555664
Episode 5051 finished! Frames: 83 Reward: -84666.82478174537
Episode 5052 finished! Frames: 92 Reward: -83324.1533227618
Episode 5053 finished! Frames: 78 Reward: -85373.08912520726
Episode 5054 finished! Frames: 122 Reward: -79016.52390684433
Episode 5055 finished! Frames: 46 Reward: -91020.05399457768
Episode 5056 finished! Frames: 83 Reward: -84773.18963450073
Episode 5057 finished! Frames: 56 Reward: -89065.92482948345
Episode 5058 finished! Frames: 67 Reward: -87208.8902450295
Episode 5059 finished! Frames: 56 Reward: -89061.36177661896
Episode 5060 finished! Frames: 83 Reward: -84759.88536812356
Episode 5061 finished! Frames: 83 Reward: -84541.03923303289
Episode 5062 finished! Frames: 83 Reward: -84626.86218484044
Episode 5063 finished! Frames: 92 Reward: -83404.08950664895
Episode 5064 finished! Frames: 101 Reward: -82008.08721079948
Episode 5065 finishe

In [None]:
model.model.save('/content/drive/MyDrive/copy2')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/copy2/assets


In [None]:
#test
for j in range(total_episodes):
    memory = Memory()
    with open('./replay/'+str(j)+'.txt', "w") as f:
        G = game_with_op(Player(320, 400), copy.deepcopy(data))
        total_reward = 0
        flag = True
        while True:
            if flag:
                img = G.get_img()
                img, stacked_imgs = stack_img(stacked_imgs, img, True)
                flag = False
            action = model.predict(img, 0)
            f.write(str(action)+'\n')
            reward, done = G.op(action)
            total_reward += reward
            #print('Frame: {}, action: {}, reward: {:.0f}'.format(G.frame, action, total_reward))
            #print('X: {:.1f} Y: {:.1f}'.format(G.player.x, G.player.y))
            if done:
                next_img = np.zeros(state_size[:2])
                next_img, stacked_imgs = stack_img(stacked_imgs, next_img)
                memory.remember((img, action, reward, next_img))
                model.train(memory.sample_all())
                print("Episode {} finished! Frames: {} Reward: {}".format(j, G.frame, total_reward))
            else:
                next_img = G.get_img()
                #while (next_img != 0.7).all():
                    #G.op(0)
                    #f.write('0\n')
                    #print('skipping')
                    #next_img = G.get_img()
                next_img, stacked_imgs = stack_img(stacked_imgs, next_img)
                memory.remember((img, action, reward, next_img))
                img = next_img
            
            if done:
                if j % 100 == 99:
                    model.model.save('./drive/MyDrive/models/')
                break

