In [1]:
from google.colab import drive
#drive.mount('/gdrive')
#%cd /gdrive

# This will prompt for authorization.

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/MMAI_845

/content/drive/MyDrive/MMAI_845


In [3]:
%pwd

'/content/drive/MyDrive/MMAI_845'

In [5]:
# Training the AI
# Installing Keras
# conda install -c conda-forge keras

# Importing the libraries and the other python files
import os
import numpy as np
import random as rn

import step1_env as environment
import step2_buildingbrain as brain
import step3_dqn_rl_algo as dqn

# Setting seeds for reproducibility 

os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
rn.seed(12345)


# SETTING THE PARAMETERS
epsilon = .3
number_actions = 5
direction_boundary = (number_actions - 1) / 2
number_epochs = 100
max_memory = 3000 
batch_size = 512 
temperature_step = 1.5


# BUILDING THE ENVIRONMENT BY SIMPLY CREATING AN OBJECT OF THE ENVIRONMENT CLASS
env = environment.Environment(optimal_temperature = (18.0, 24.0),
                              initial_month = 0,
                              initial_number_users = 20,
                              initial_rate_data = 30)

# BUILDING THE BRAIN BY SIMPLY CREATING AN OBJECT OF THE BRAIN CLASS
brain = brain.Brain(learning_rate = 0.00001, number_actions = number_actions)


# BUILDING THE DQN MODEL BY SIMPLY CREATING AN OBJECT OF THE DQN CLASS
dqn = dqn.DQN(max_memory = max_memory, discount = 0.9)

# CHOOSING THE MODE
train = True

# TRAINING THE AI
env.train = train
model = brain.model
early_stopping = True
patience = 10
best_total_reward = -np.inf
patience_count = 0


if (env.train):
	# STARTING THE LOOP OVER ALL THE EPOCHS (1 Epoch = 5 Months)
	for epoch in range(1, number_epochs):
		# INITIALIAZING ALL THE VARIABLES OF BOTH THE ENVIRONMENT AND THE TRAINING LOOP
		total_reward = 0
		loss = 0.
		new_month = np.random.randint(0, 12)
		env.reset(new_month = new_month)
		game_over = False
		current_state, _, _ = env.observe()
		timestep = 0

		# STARTING THE LOOP OVER ALL THE TIMESTEPS (1 Timestep = 1 Minute) IN ONE EPOCH
		while ((not game_over) and timestep <= 5 * 30 * 24 * 60):

			# PLAYING THE NEXT ACTION BY EXPLORATION
			if np.random.rand() <= epsilon:
				action = np.random.randint(0, number_actions)
				if (action - direction_boundary < 0): 
					direction = -1
				else:
					direction = 1
				energy_ai = abs(action - direction_boundary) * temperature_step 


			# PLAYING THE NEXT ACTION BY INFERENCE

			else:
				q_values = model.predict(current_state) 
				action = np.argmax(q_values[0])
				if (action - direction_boundary < 0): 
					direction = -1
				else:
					direction = 1
				energy_ai = abs(action - direction_boundary) * temperature_step 


			# UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE
			next_state, reward, game_over = env.update_env(direction, energy_ai, int(timestep / (30*24*60)))


			total_reward += reward
			# STORING THIS NEW TRANSITION INTO THE MEMORY

			dqn.remember([current_state, action, reward, next_state], game_over)



			# GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS

			inputs, targets = dqn.get_batch(model, batch_size = batch_size)

			# COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS
			loss += model.train_on_batch(inputs, targets)
			timestep += 1
			current_state = next_state


		# PRINTING THE TRAINING RESULTS FOR EACH EPOCH
		print("\n")
		print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
		print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai))
		print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_noai)) 

		# EARLY STOPPING
		if (early_stopping):
			if (total_reward <= best_total_reward):
				patience_count += 1
			elif (total_reward > best_total_reward):
				best_total_reward = total_reward
				patience_count = 0
		
		if (patience_count >= patience):
			print("Early Stopping")
			break
		
		# SAVING THE MODEL
		model.save("model.h5")


  "The `lr` argument is deprecated, use `learning_rate` instead.")




Epoch: 001/100
Total Energy spent with an AI: 32
Total Energy spent with no AI: 69


Epoch: 002/100
Total Energy spent with an AI: 42
Total Energy spent with no AI: 36


Epoch: 003/100
Total Energy spent with an AI: 9
Total Energy spent with no AI: 6


Epoch: 004/100
Total Energy spent with an AI: 14
Total Energy spent with no AI: 43


Epoch: 005/100
Total Energy spent with an AI: 9
Total Energy spent with no AI: 14


Epoch: 006/100
Total Energy spent with an AI: 33
Total Energy spent with no AI: 62


Epoch: 007/100
Total Energy spent with an AI: 38
Total Energy spent with no AI: 49


Epoch: 008/100
Total Energy spent with an AI: 15
Total Energy spent with no AI: 56


Epoch: 009/100
Total Energy spent with an AI: 3
Total Energy spent with no AI: 0


Epoch: 010/100
Total Energy spent with an AI: 56
Total Energy spent with no AI: 65


Epoch: 011/100
Total Energy spent with an AI: 21
Total Energy spent with no AI: 25


Epoch: 012/100
Total Energy spent with an AI: 20
Total Energy spent 