In [1]:
import gym, random, os, sys
import numpy as np
from statistics import mean, median
from collections import Counter

from keras import Input
from keras.models import Sequential
from keras.layers import Dropout, Dense

current_path = os.path.dirname(os.path.abspath(sys.argv[0]))

In [2]:
env = gym.make('CartPole-v0')
env.reset()

goal_steps = 500
# Number of frames considered to be a win
score_req = 50
# Score requirement for a random game to be used for training
initial_games = 10000
# These number of random games will be carried out

# Random games just for visualizing
def random_games():
	for episode in range(5):
		env.reset()
		for i in range(goal_steps):
			observation, reward, done, info = env.step(env.action_space.sample())
			env.render()
	env.close()

In [3]:
# Creating the initial population of training data for neural network
def initial_population():
	training_data = []
	scores = []
	accepted_scores = []

	for var1 in range(initial_games):
		score = 0
		game_memory = []
		previous_observation = []

		for var2 in range(goal_steps):
			action = random.randrange(0,2)
			observation, reward, done, info = env.step(action)

			if len(previous_observation) > 0 :
				game_memory.append([list(previous_observation), action])

			previous_observation = observation
			score += reward

			if done :
				break

		if score >= score_req :
			accepted_scores.append(score)
			
			for data in game_memory : 	
				if data[1] == 1 :
					output = [0,1]
				elif data[1] == 0 :
					output = [1,0]

				training_data.append([data[0], output])

		env.reset()
		scores.append(score)

	training_data_to_save = np.array(training_data)
	np.save(file = current_path + '/training_data.npy', arr = training_data_to_save)

	print('Average accepted_scores : ', mean(accepted_scores))
	print('Median accepted_scores : ', median(accepted_scores))
	print(Counter(accepted_scores))

	return training_data[0]

In [4]:
def neural_net(input_size):
	model = Sequential()

	model.add(Input(shape = (input_size, 1)))

	model.add(Dense(128, activation = 'relu'))
	model.add(Dropout(0.2))

	model.add(Dense(256, activation = 'relu'))
	model.add(Dropout(0.2))

	model.add(Dense(512, activation = 'relu'))
	model.add(Dropout(0.2))

	model.add(Dense(256, activation = 'relu'))
	model.add(Dropout(0.2))

	model.add(Dense(128, activation = 'relu'))
	model.add(Dropout(0.2))

	model.add(Dense(2, activation = 'softmax'))

	model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
	print(model.summary())

	return model

In [5]:
def train_model(training_data, model = False):
	x_vals = np.array(training_data[0])
	y_vals = np.array(training_data[1])

	if not model :
		model = neural_net(input_size = len(x_vals[0]))

	#model.fit(x_vals, y_vals, epochs = 5, verbose = 1)
	print(x_vals)
	print(y_vals)

	return model

In [7]:
initial_population().shape

Average accepted_scores :  60.29639889196676
Median accepted_scores :  57.0
Counter({50.0: 36, 51.0: 34, 53.0: 30, 54.0: 21, 52.0: 21, 57.0: 19, 55.0: 18, 58.0: 16, 63.0: 13, 56.0: 13, 59.0: 13, 60.0: 12, 64.0: 12, 65.0: 11, 62.0: 10, 74.0: 9, 67.0: 7, 69.0: 6, 61.0: 6, 66.0: 5, 70.0: 5, 73.0: 4, 82.0: 4, 75.0: 4, 71.0: 4, 85.0: 3, 83.0: 3, 79.0: 2, 78.0: 2, 72.0: 2, 86.0: 1, 77.0: 1, 97.0: 1, 99.0: 1, 84.0: 1, 103.0: 1, 102.0: 1, 89.0: 1, 116.0: 1, 128.0: 1, 106.0: 1, 81.0: 1, 90.0: 1, 80.0: 1, 68.0: 1, 112.0: 1})


AttributeError: 'list' object has no attribute 'shape'