<a href="https://colab.research.google.com/github/akshatshah91/Game-AI/blob/master/GA_with_NN_for_GA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym
!apt-get install python-opengl -y

In [47]:
import numpy as np
import matplotlib.pyplot as plt
import gym
import torch
import torch.nn as nn
import copy

In [97]:
input_size     = 4     # Cart pos and speed
hidden_size    = 5    # The number of nodes at the hidden layer
output_size    = 2     # Number of outputs: left and right
generations    = 50
startingSize   = 100
bestPopulation = int(startingSize/2)
averageRuns    = 3
mutationRate   = 0.05
scoreCap = 500

In [3]:
class CartPoleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CartPoleNN, self).__init__()             # Inherited from the parent class nn.Module
        self.fc1 = nn.Linear(input_size, hidden_size)  # 1st Full-Connected Layer: 784 (input data) -> 500 (hidden node)
        self.relu = nn.ReLU()                          # Non-Linear ReLU Layer: max(0,x)
        self.fc2 = nn.Linear(hidden_size, output_size) # 2nd Full-Connected Layer: 500 (hidden node) -> 10 (output class)
    
    def forward(self, x):                              # Forward pass: stacking each layer together
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [4]:
def init_weights(w):
  if(type(w)==nn.Linear):
    print("yes")
    torch.nn.init.xavier_uniform(w.weight)
    w.bias.data.fill(0.00)

In [41]:
def generatePopulation(popSize):
  agents = []
  for x in range(popSize):
    tmp = CartPoleNN(input_size, hidden_size, output_size)
    for param in tmp.parameters():
      param.requires_grad = False
    init_weights(tmp)
    agents.append(tmp)
  return agents

In [50]:
def selectBest(env, agents, numBest, iterations):
  rewards = []
  for x in range(len(agents)):
    rewards.append((runSimulation(env, agents[x], iterations), x))
  rewards.sort(reverse=True)
  selected = []
  for x in range(numBest):
    selected.append(agents[rewards[x][1]])
  return selected, rewards[0][0]

In [78]:
def runSimulation(env, agent, iterations):
  rewards = 0
  action = 0
  for x in range(iterations):
    s = env.reset()
    gameOver = False
    while not gameOver:
      output = agent(torch.from_numpy(s).type("torch.FloatTensor"))
      if output[0] > output[1]:
        action = 0
      else:
        action = 1
      s, r, gameOver, _ = env.step(action)
      rewards += r
  return rewards/iterations

In [52]:
def mutate(agent, mutationRate):
  child = copy.deepcopy(agent)
  for param in child.parameters():
    if len(param.shape) is 1:
      for x in range(param.shape[0]):
        param[x] += mutationRate * np.random.randn()
    elif len(param.shape) is 2:
      for x in range(param.shape[0]):
        for y in range(param.shape[1]):
          param[x][y] += mutationRate * np.random.randn()
  return child

In [99]:
parents = generatePopulation(startingSize)
env = gym.make("CartPole-v1")
highScore = 0
for gen in range(generations):
  selected, score = selectBest(env, parents, bestPopulation, averageRuns)
  mutated = []
  for s in selected:
    mutated.append(mutate(s, mutationRate))
  parents = selected + mutated
  if score > highScore or (gen+1) % 10 is 0:
    highScore = max(score, highScore)
    print("Generation ", gen+1, ":\nGeneration Score: ", score, "\nHigh Score: ", highScore, "\n", sep='')
  if highScore == scoreCap:
    break

Generation 1:
Generation Score: 96.66666666666667
High Score: 96.66666666666667

Generation 3:
Generation Score: 402.3333333333333
High Score: 402.3333333333333

Generation 5:
Generation Score: 493.3333333333333
High Score: 493.3333333333333

Generation 6:
Generation Score: 500.0
High Score: 500.0

