<a href="https://colab.research.google.com/github/cacoleman16/CS660-Project/blob/main/neat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import numpy as np  # pip install numpy
import cv2          # pip install opencv-python
import neat         # pip install neat-python
import pickle       # pip install cloudpickle
from envWrapper import make_env
import gym_super_mario_bros


class Worker(object):


    def init_values(self):
        self.counter = 0
        self.score = 0
        self.xpos = 0
        self.xpos_max = 0
        self.stage = 0
        self.lives = 3
    def _get_actions(self, actions):

        return np.random.choice(np.flatnonzero(actions == actions.max()))


    def update_fitness(self, info, reward, fitness_current):
        right = 0
        if info['status'] != 'small':
            bonus = 100
        else:
            bonus = 0

        if self.stage == info['stage'] and self.xpos <= info['x_pos']:
            fitness_current -= 5

        
        # reward = 0 

        if self.lives > info['life'] or  info['life'] == 255:
            reward = -150

        if (self.stage != info['stage']) and self.score != 0:
              bonus += 300


        self.xpos = info['x_pos']
        self.stage = info['stage']
        self.lives = info['life'] 


        if self.xpos > self.xpos_max:
            right = 10
            self.xpos_max = self.xpos

        fitness_current += ( info['score'] - self.score)  + reward + right + bonus
        self.score = info['score']

        return fitness_current
        

    def eval_genomes(self, genomes, config):


        for genome_id, genome in genomes:

            self.init_values()
        
            self.env =  gym_super_mario_bros.make('SuperMarioBros-v0')
            self.env =  make_env(self.env)
            state = self.env.reset()
            

            done = False
            fitness = 0
            old = 0
            current_max_fitness = 0
            net = neat.nn.RecurrentNetwork.create(genome, config)
            while not done:
                state = cv2.resize(state, None, fx=0.5, fy=0.5)
                nnOutput = net.activate(state.flatten())
                actions = np.array(nnOutput)
                action = self._get_actions(actions)
                nextState, reward, done, info = self.env.step(action)
                # self.env.render()
                fitness = self.update_fitness(info, reward, fitness)
                state = nextState
                self.counter += 1
                if self.counter % 50 == 0:
                    if (old == self.xpos) & (self.level == info['stage']):
                        done = True 
                    else:
                        old = self.xpos

                self.level = info['stage']
                if fitness  > current_max_fitness:
                    current_max_fitness = fitness 
                    self.counter = 0
         
                
                # if done or self.counter == 250:
                #     done = True
                #     print(genome_id, fitness)


            genome.fitness = current_max_fitness
            self.env.close()
            # print(genome, fitness)
            
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config')

p = neat.Population(config)


p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(10))
worker = Worker()
winner = p.run(worker.eval_genomes, 1000)

with open('winnerNEW.pkl', 'wb') as output:
    pickle.dump(winner, output, 1)
    



In [None]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT
import torch
import numpy as np
import cv2
import collections
import gym


class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env=None, skip=4):
        """Return only every `skip`-th frame"""
        super(MaxAndSkipEnv, self).__init__(env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = collections.deque(maxlen=2)
        self._skip = skip

    def step(self, action):
        total_reward = 0.0
        done = None
        for _ in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            self._obs_buffer.append(obs)
            total_reward += reward
            if done:
                break
        max_frame = np.max(np.stack(self._obs_buffer), axis=0)
        return max_frame, total_reward, done, info

    def reset(self):
        """Clear past frame buffer and init to first obs"""
        self._obs_buffer.clear()
        obs = self.env.reset()
        self._obs_buffer.append(obs)
        return obs


class ProcessFrame84(gym.ObservationWrapper):
    """
    Downsamples image to 84x84
    Greyscales image

    Returns numpy array
    """

    def __init__(self, env=None):
        super(ProcessFrame84, self).__init__(env)
        self.observation_space = gym.spaces.Box(
            low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)

    def observation(self, obs):
        return ProcessFrame84.process(obs)

    @staticmethod
    def process(frame):
        if frame.size == 240 * 256 * 3:
            img = np.reshape(frame, [240, 256, 3]).astype(np.float32)
        else:
            assert False, "Unknown resolution."
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * \
            0.587 + img[:, :, 2] * 0.114
        resized_screen = cv2.resize(
            img, (84, 110), interpolation=cv2.INTER_AREA)
        x_t = resized_screen[18:102, :]
        x_t = np.reshape(x_t, [84, 84, 1])
        return x_t.astype(np.uint8)


class ImageToPyTorch(gym.ObservationWrapper):
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(old_shape[-1], old_shape[0], old_shape[1]),
                                                dtype=np.float32)

    def observation(self, observation):
        return np.moveaxis(observation, 2, 0)


class ScaledFloatFrame(gym.ObservationWrapper):
    """Normalize pixel values in frame --> 0 to 1"""

    def observation(self, obs):
        return np.array(obs).astype(np.float32) / 255.0


class BufferWrapper(gym.ObservationWrapper):
    def __init__(self, env, n_steps, dtype=np.float32):
        super(BufferWrapper, self).__init__(env)
        self.dtype = dtype
        old_space = env.observation_space
        self.observation_space = gym.spaces.Box(old_space.low.repeat(n_steps, axis=0),
                                                old_space.high.repeat(n_steps, axis=0), dtype=dtype)

    def reset(self):
        self.buffer = np.zeros_like(
            self.observation_space.low, dtype=self.dtype)
        return self.observation(self.env.reset())

    def observation(self, observation):
        self.buffer[:-1] = self.buffer[1:]
        self.buffer[-1] = observation
        return self.buffer


def make_env(env):
    env = MaxAndSkipEnv(env)
    env = ProcessFrame84(env)
    env = ImageToPyTorch(env)
    env = BufferWrapper(env, 4)
    env = ScaledFloatFrame(env)
    return JoypadSpace(env, SIMPLE_MOVEMENT)


In [None]:
## the conif file 


[NEAT]
fitness_criterion     = max
fitness_threshold     = 1000000000000
pop_size              = 100
reset_on_extinction   = True

[DefaultGenome]
# node activation options
activation_default      = relu
activation_mutate_rate  = 0.04
activation_options      = sigmoid

# node aggregation options
aggregation_default     = sum
aggregation_mutate_rate = 0.3
aggregation_options     = sum

# node bias options
bias_init_mean          = 0.0
bias_init_stdev         = 1.0
bias_max_value          = 30.0
bias_min_value          = -30.0
bias_mutate_power       = 2.093
bias_mutate_rate        = 0.0509
bias_replace_rate       = 0.1

# genome compatibility options
compatibility_disjoint_coefficient = 2.0
compatibility_weight_coefficient   = 0.5

# connection add/remove rates
conn_add_prob           = 0.988
conn_delete_prob        = 0.146

# connection enable options
enabled_default         = True
enabled_mutate_rate     = 0.01

feed_forward            = True
initial_connection      = unconnected

# node add/remove rates
node_add_prob           = 0.25
node_delete_prob        = 0.1

# network parameters
num_hidden              = 4
num_inputs              =  7056
num_outputs             = 7

# node response options
response_init_mean      = 1.0
response_init_stdev     = 0.0
response_max_value      = 30.0
response_min_value      = -30.0
response_mutate_power   = 0.1
response_mutate_rate    = 0.1
response_replace_rate   = 0.0

# connection weight options
weight_init_mean        = 0.0
weight_init_stdev       = 1.0
weight_max_value        = 30
weight_min_value        = -30
weight_mutate_power     = 0.825
weight_mutate_rate      = 0.460
weight_replace_rate     = 0.0245

[DefaultSpeciesSet]
compatibility_threshold = 2.5

[DefaultStagnation]
species_fitness_func = mean
max_stagnation       = 10
species_elitism      = 2

[DefaultReproduction]
elitism            = 2
survival_threshold = 0.1