In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
import numpy as np
import math
import copy
import torch
import torch.nn as nn
import torch.nn.functional as f

from game import *
from player import *

In [21]:
PATH = 'D:/New folder/project/Snake AI/models/'

In [22]:
def generate_individual_brain(input_size, num_hidden_layers, hidden_layers_size, output_size):
    layers = []
    layers.append(nn.LayerNorm(input_size))
    layers.append(nn.Linear(input_size, hidden_layers_size[0]))
    for layer in range(num_hidden_layers-1):
        layers.append(nn.Linear(hidden_layers_size[layer], hidden_layers_size[layer+1]))
        # layers.append(nn.Tanh())
    layers.append(nn.Linear(hidden_layers_size[-1], output_size))
    # layers.append(nn.Tanh())
    layers.append(nn.Softmax())
    AI_bc = nn.Sequential(*layers, )
    for layer in AI_bc.parameters():
        layer.requires_grad = False
    return AI_bc

In [23]:
def get_move(input, i, brains):
    dir = brains[i](input).argmax()
    return DIRECTIONS[dir]

In [24]:
def generate_input(game, window_size):
    
    head = game.snake_heads[0]
    positions = game.get_all_eligible_neighbors(head, borders=True, window_size=window_size)
    input_vector = []
    for i, position in enumerate(positions):
        # import pdb; pdb.set_trace();
        input_vector_val = game.extract_position(position)
        if input_vector_val == game.food_val:
            input_vector_val = 1
        elif input_vector_val == game.body_val:
            input_vector_val = -1
        elif input_vector_val == game.default_board_val:
            input_vector_val = 0
        elif input_vector_val == game.out_of_board:
            input_vector_val = -1
        input_vector.append(input_vector_val)
    input_vector.append(head[0])
    input_vector.append(head[1])
    input_vector.append(game.food_positions[0][0])
    input_vector.append(game.food_positions[0][1])
    input_vector.append(head[0]-game.food_positions[0][0])
    input_vector.append(head[1]-game.food_positions[0][1])
    input_vector = torch.tensor(input_vector).float().reshape(-1, input_size)
    return input_vector

In [25]:
def run_one_trial(board_size, set_seed, i, window_size, brains, print=False):
    moves = 0
    game = Game(board_size, set_seed=set_seed)
    # print(game.board)
    game.print_board(print)
    while game.zinda_hai_ki_nahi and moves <= move_limit:
        input = generate_input(game, window_size)
        dir = get_move(input, i, brains)
        game.move(dir)
        game.print_board(print)
        moves += 1
    return moves, len(game.snakes[0])

In [26]:
def one_gen(board_size, set_seed, gen=0, window_size=3, brains=None):
    max_moves = [0] * pop_size
    max_scores = [0] * pop_size
    avg_moves = [[]] * pop_size
    avg_scores = [[]] * pop_size
    for i in range(pop_size):    
        individual_moves = []
        individual_scores = []
        for j in range(num_trials):
            # print(f'trial {j} for pop {i} for gen {gen}')
            moves, score = run_one_trial(board_size, set_seed, i, window_size, brains)
            avg_moves[i].append(moves)
            avg_scores[i].append(score)
            individual_moves.append(moves)
            individual_scores.append(score)
            if moves > max_moves[i]:
                max_moves[i] = moves
            if score > max_scores[i]:
                max_scores[i] = score
        avg_moves[i] = sum(avg_moves[i])/len(avg_moves[i])
        avg_scores[i] = sum(avg_scores[i])/len(avg_scores[i])
        # print(f'ran {j} trials for individual {i} out of {pop_size} for gen {gen}, max move - {max(individual_moves)}, max score - {max(individual_scores)}')
    # print(f'max moves for gen {gen} - {max_moves}')
    # print(f'max scores  for gen {gen} - {max_scores}')
    return np.array(max_moves), np.array(max_scores), np.array(avg_moves), np.array(avg_scores)

In [27]:
def save_brains(PATH, brains):
    for i, brain in enumerate(brains):
        torch.save(brain, f'{PATH}_brain_{i}.pt')

def load_brains(PATH, brains):
    for pop in range(pop_size):
        brains[pop] = torch.load(f'{PATH}_brain_{pop}.pt')
        brains[pop].eval()

In [28]:
def mutate(brain):
    for layer in brain:
        if isinstance(layer, nn.Linear):
            # print('original')
            # print(layer.weight[0])
            # print('new')
            # new_weights = layer.weight + ((torch.rand(layer.weight.shape)<self.mutation_rate).long() * torch.normal(layer.weight.shape) * self.mutation_change)
            new_weights = layer.weight + ((torch.rand(layer.weight.shape)<mutation_rate).long() * torch.empty(layer.weight.shape).normal_(mean=0,std=1) * mutation_change)
            layer.weight = nn.Parameter(new_weights, requires_grad=False)
    return brain

In [29]:
def reproduce(brain_pair):
    brain_0 = brain_pair[0]
    brain_1 = brain_pair[1]
    new_brain = copy.deepcopy(brain_0)
    for i, layer in enumerate(new_brain):
        if isinstance(layer, nn.Linear):
            # new_weights = layer.weight + ((torch.rand(layer.weight.shape)<self.mutation_rate).long() * torch.normal(layer.weight.shape) * self.mutation_change)
            random_assignment = torch.randint_like(layer.weight, 0, 2)
            new_weights = torch.mul(layer.weight, random_assignment) + torch.mul(brain_1[i].weight, 1-random_assignment)
            layer.weight = nn.Parameter(new_weights, requires_grad=False)
    return new_brain

In [30]:
def evolve(top_25, brains):
    new_brains = []
    for brain in top_25:
        new_brains.append(copy.deepcopy(brains[brain]))
        new_brains.append(reproduce([brains[x] for x in np.random.choice(top_25, 2)]))
    for _ in top_25:
        new_brains.append(mutate(brains[brain]))
    brains = new_brains
    remaining = pop_size - len(brains)
    # print(f'generating {remaining} extra brains')
    brains += [generate_individual_brain(input_size, num_hidden_layers, hidden_layers_size, output_size) for _ in range(remaining)]
    return brains

In [31]:
def train_for_n_gen(brains):
    # num_gen = self.num_gen
    for gen in range(num_gen):
        # max_moves, max_scores, avg_scores = one_gen(board_size, set_seed, gen, window_size=input_window_size)
        max_moves, max_scores, avg_moves, avg_scores = one_gen(board_size, set_seed, 0, window_size=input_window_size, brains=brains)
        # final_scores = avg_scores + avg_moves/10
        # top_25 = np.argsort(final_scores)[::-1][:math.ceil(len(final_scores)/4)]
        top_25 = np.argsort(max_scores)[::-1][:math.ceil(len(max_scores)/4)]
        # print(max_scores[np.argsort(max_scores)][::-1][:math.ceil(len(max_scores)/4)])
        # top_25 = np.argsort(max_scores)[::-1][:math.ceil(len(max_scores)/4)]
        # top_25 = np.argsort(avg_scores)[::-1][:math.ceil(len(avg_scores)/4)]
        print(f"**END OF GEN {gen}**")
        print(f'max scores at the end of gen {gen} - {max_scores[top_25]}')
        # print(f'max moves at the end of gen {gen} - {max_moves}')
        if gen != (num_gen-1):
            brains = evolve(top_25, brains)
    return brains

In [32]:
pop_size = 300 # population size
num_trials = 50
move_limit = 100
num_gen = 100

mutation_rate = 0.05
mutation_change = 0.1
input_window_size = 3

input_size = (input_window_size * input_window_size) -1 + 6
num_hidden_layers = 4
hidden_layers_size = [25] *  num_hidden_layers
output_size = 4

board_size = 10
set_seed=False

In [33]:
brains = [generate_individual_brain(input_size, num_hidden_layers, hidden_layers_size, output_size) for _ in range(pop_size)]
brains = train_for_n_gen(brains)

**END OF GEN 0**
max scores at the end of gen 0 - [5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4]
**END OF GEN 1**
max scores at the end of gen 1 - [6 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4]
**END OF GEN 2**
max scores at the end of gen 2 - [5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4]
**END OF GEN 3**
max scores at the end of gen 3 - [6 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4]
**END OF GEN 4**
max scores at the end of gen 4 - [5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 

In [34]:
save_brains(PATH, brains)

In [35]:
run_one_trial(board_size=board_size, set_seed=True, i=0, window_size=3, brains=brains, print=True)

|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|++|__|__|
|__|__|__|__|__|__|__|++|__|__|
|__|__|__|__|__|__|__|* |__|__|
|__|__|__|__|__|__|@ |__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|

|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|++|__|__|
|__|__|__|__|__|__|* |++|__|__|
|__|__|__|__|__|__|@ |__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|

|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|@ |__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|++|__|__|
|__|__|__|__|__|__|++|++|__|__|
|__|__|__|__|__|__|* |__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|
|__|__|__|__|__|__|__|__|__|__|

|__|__|__|__|__|__|__|__|__|__|
|__|_

(3, 4)

In [36]:
sol_per_pop = 50
n_x = 7
n_h = 9
n_h2 = 15
n_y = 3
num_weights = n_x*n_h + n_h*n_h2 + n_h2*n_y

# Defining the population size.
pop_size = (sol_per_pop,num_weights)
#Creating the initial population.
new_population = np.random.choice(np.arange(-1,1,step=0.01),size=pop_size,replace=True)
new_population

array([[-0.26, -0.13,  0.16, ...,  0.47,  0.86,  0.63],
       [ 0.46, -0.11,  0.94, ...,  0.25,  0.17, -0.53],
       [-0.12,  0.16,  0.28, ..., -0.54, -0.52, -0.87],
       ...,
       [ 0.13,  0.27,  0.74, ..., -0.37, -0.32, -0.33],
       [-0.73,  0.8 ,  0.15, ...,  0.48, -0.79, -0.92],
       [-0.92, -0.74, -0.84, ..., -0.06, -0.31, -0.11]])