# 0. Install Dependencies

In [None]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2
!pip install pygame

# 1. Test Random Environment with OpenAI Gym

In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import pygame
import os
import nltk
import string

pygame 2.0.1 (SDL 2.0.14, Python 3.7.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
window_width, window_height = 1000, 500

In [3]:
class RealEstateEnv(Env):
    def __init__(self):
        # Actions we can take, lower price, same price, increase price
        self.action_space = Discrete(3)
        # House prices array
        self.observation_space = Box(low=np.array([100000]), high=np.array([1100000]))
        # Set start price
        self.state = 450000 + random.randint(-100000,100000)
        # Set finding length
        self.finding_length = 180
    
    
    def init_render(self):
        pygame.init()
        self.window = pygame.display.set_mode((window_width, window_height))
        pygame.display.set_caption("Real Estate Env")
        self.clock = pygame.time.Clock()
    
    
    def text(self, text):
        font_color=(0,150,250)
        font_obj=pygame.font.Font(None, 25)
        # Render the objects
        text_obj = []
        for t in text:
            text_obj.append(font_obj.render(t, True, font_color))
        return text_obj
        
    def step(self, action, customer):
        # Apply action
        if action == 0:
            self.state -= 10000
        elif action == 2:
            self.state += 10000
        
        # Reduce house finding length by 1 second
        self.finding_length -= 1
        
        # Calculate reward
        if self.state >= customer.low_price and self.state <= customer.high_price: 
            reward = 1
        else:
            reward = -1
        
        # Check if finding time is over
        if self.finding_length <= 0: 
            done = True
        else:
            done = False
        
        # Apply price fluctuations
        # self.state += random.randint(-10000,25000)
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self, text_obj):
        # Implement viz
        self.window.fill((0,0,0))
        # draw orientation
        for i in range(0, len(text_obj)):
            self.window.blit(text_obj[i],(22,i*50))
        #left person
        #head
        pygame.draw.circle(self.window, (66, 84, 81), (int(150), int(320)), 40)
        #eyes
        pygame.draw.circle(self.window, (0, 200, 200), (int(130), int(310)), 8)
        pygame.draw.circle(self.window, (0, 200, 200), (int(170), int(310)), 8)
        #body
        pygame.draw.rect(self.window, (45,45,45), [120, 360, 60, 140])
        #right person
        #head
        pygame.draw.circle(self.window, (250,227,130), (window_width-int(150), int(320)), 40)
        #eyes
        pygame.draw.circle(self.window, (0, 20, 20), (window_width-int(130), int(310)), 8)
        pygame.draw.circle(self.window, (0, 20, 20), (window_width-int(170), int(310)), 8)
        #mouth
        pygame.draw.line(self.window, (255, 205, 205), [window_width-int(135), int(338)],[window_width-int(165),int(338)], 6)
        #body
        pygame.draw.rect(self.window, (220,197,100), [window_width-180, 360, 60, 140])
        pygame.display.update()
    
    def reset(self):
        # Reset base price
        self.state = 450000 + random.randint(-100000,100000)
        # Reset finding time
        self.finding_length = 180
        return self.state

In [4]:
class Customer():
    def __init__(self, num):
        self.square_footage = random.randint(500, 4500)
        self.num_bedrooms = round(self.square_footage / 800)
        self.low_price = self.square_footage * 450
        self.high_price = self.square_footage * 550
        self.id = num
    
    @staticmethod
    def load_customers(x):
        arr = []
        
        for i in range(x):
            arr.append(Customer(i))
        
        return arr

In [5]:
class Houses():
    def __init__(self, properties):
        self.square_footage = properties[0]
        self.bedrooms = properties[1]
        self.price = properties[2]

In [7]:
data_dir = "./data"
num_properties = 3
with open(data_dir) as f:
    s = f.read()

word_list = nltk.word_tokenize(s)

for word in word_list:
    if not word.isdigit():
        word_list.remove(word)

houses = []
temp2 = []
for i in range(len(word_list)):
    temp2.append(word_list[i])
    if len(temp2) == 3:
        house = Houses(temp2)
        houses.append(house)
        temp2.clear()

12384


In [None]:
env = RealEstateEnv()
env.init_render()

In [None]:
env.observation_space.sample()

In [None]:
episodes = 10
num_customers = 5
customers = Customer.load_customers(num_customers)
pygame.init()
for i in range(num_customers):
    for episode in range(1, episodes+1):
        state = env.reset()
        done = False
        score = 0 

        while not done:
            textArray = ["Score: " + str(score), "State: " + str(env.state), "High Price: " + str(customers[i].high_price), "Low Price: " + str(customers[i].low_price)]
            text_obj = env.text(textArray)
            env.clock.tick(30)
            action = env.action_space.sample()
            n_state, reward, done, info = env.step(action, customers[i])
            score+=reward
            env.render(text_obj)
            for eve in pygame.event.get():
                if eve.type==pygame.QUIT:
                    pygame.quit()
                    #sys.exit()
        print('Customer ID: {} Episode:{} Score:{}'.format(customers[i].id, episode, score))

pygame.quit()

# 2. Create a Deep Learning Model with Keras

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
states = env.observation_space.shape
actions = env.action_space.n

In [None]:
actions

In [None]:
def build_model(states, actions):
    model = Sequential()    
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
del model 

In [None]:
model = build_model(states, actions)

In [None]:
model.summary()

# 3. Build Agent with Keras-RL

In [None]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=True)

# 4. Reloading Agent from Memory

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [None]:
del model
del dqn
del env

In [None]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=True)