<a href="https://colab.research.google.com/github/epeay/random-number-guesser/blob/main/random_number_guesser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym



In [106]:
import gym
from gym import spaces
import numpy as np
from collections import deque

HIGHER_THAN_X = 1
CHOSEN_X = 70
history_size = 3

class NumberPredictionEnv(gym.Env):
    def __init__(self):
        super(NumberPredictionEnv, self).__init__()
        # Observation space: 10 integers between 0 and 99
        self.observation_space = spaces.Box(low=0, high=99, shape=(history_size,), dtype=np.int32)
        # Action space: Two actions, 0 (<= 70) and 1 (> 70)
        self.action_space = spaces.Discrete(2)
        self.current_number = None
        self.history = deque(maxlen=history_size)

    def reset(self):
      # Initialize the history with random values
      self.history = deque([np.random.randint(0, 100) for _ in range(history_size)], maxlen=history_size)
      self.current_number = self.history[-1]
      return np.array(self.history)

    def step(self, action):
        guess = ""
        if action == 1:
            guess = ">"
        else:
            guess = "<="

        # Determine if the action was correct
        if (action == 1 and self.current_number > CHOSEN_X) or (action == 0 and self.current_number <= CHOSEN_X):
            print(f"CORRECT - Guess: {self.current_number}{guess}{CHOSEN_X}. History: {self.history}")
            reward = 1
        else:
            print(f"WRONG   - Guess: {self.current_number}{guess}{CHOSEN_X}. History: {self.history}")
            reward = -1

        # Generate a new number and update history
        self.current_number = np.random.randint(0, 100)
        self.history.append(self.current_number)
        print(f"History: {self.history}")
        done = False

        return np.array(self.history), reward, done, {}

    def render(self, mode='human'):
        pass  # Rendering is not necessary for this simple task

# Create the custom environment
env = NumberPredictionEnv()

################################################################

import tensorflow as tf
from collections import deque
import random
import os

# Define the model
def build_model(input_shape, action_size):
    model = Sequential([
        Dense(24, input_dim=input_shape, activation='relu'),  # First hidden layer with 24 neurons
        Dense(24, activation='relu'),  # Second hidden layer with 24 neurons
        Dense(action_size, activation='linear')  # Output layer with neurons equal to action size
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = build_model(state_size, action_size)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            print("Random action")
            return random.randrange(self.action_size)

        print("Predicting action")
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=2)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Initialize the agent
state_size = history_size  # Since the environment returns 10 integers
action_size = 2  # Two actions: 0 (<= 70) and 1 (> 70)
agent = DQNAgent(state_size, action_size)

################################################################

# Parameters
episodes = 10
steps_per_episode = 50
batch_size = 32


def train(episodes, steps_per_episode, batch_size, state_size):
    # Training loop
    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])

        for step in range(steps_per_episode):
            print(f"Episode {e}, Step {step}")
            action = agent.act(state)
            # print(state)
            # print(f"State: {state[0]}, Action: {action}")
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state

        # Run replay at the end of each episode
        if len(agent.memory) > batch_size:
            print(f"Agent memory: {len(agent.memory)}")
            agent.replay(batch_size)

train(episodes, steps_per_episode, batch_size, state_size)


  and should_run_async(code)


Episode 0, Step 0
Random action
WRONG   - Guess: 86<=70. History: deque([35, 63, 86], maxlen=3)
History: deque([63, 86, 7], maxlen=3)
Episode 0, Step 1
Random action
WRONG   - Guess: 7>70. History: deque([63, 86, 7], maxlen=3)
History: deque([86, 7, 36], maxlen=3)
Episode 0, Step 2
Random action
WRONG   - Guess: 36>70. History: deque([86, 7, 36], maxlen=3)
History: deque([7, 36, 27], maxlen=3)
Episode 0, Step 3
Random action
CORRECT - Guess: 27<=70. History: deque([7, 36, 27], maxlen=3)
History: deque([36, 27, 32], maxlen=3)
Episode 0, Step 4
Random action
WRONG   - Guess: 32>70. History: deque([36, 27, 32], maxlen=3)
History: deque([27, 32, 21], maxlen=3)
Episode 0, Step 5
Random action
CORRECT - Guess: 21<=70. History: deque([27, 32, 21], maxlen=3)
History: deque([32, 21, 23], maxlen=3)
Episode 0, Step 6
Random action
WRONG   - Guess: 23>70. History: deque([32, 21, 23], maxlen=3)
History: deque([21, 23, 18], maxlen=3)
Episode 0, Step 7
Random action
WRONG   - Guess: 18>70. History: d

In [112]:
prev_epsilon = agent.epsilon
agent.epsilon = 0.0

def predict(agent, state):
    state = np.reshape(state, [1, history_size])
    action = agent.act(state)
    return action

history = deque([np.random.randint(0, 100) for _ in range(history_size)], maxlen=history_size)
current_number = history[-1]

correct = 0
incorrect = 0

# For Real Loop
for e in range(100):
    prediction = predict(agent, np.array(history))

    guess = ""
    if prediction == 1:
        guess = ">"
    else:
        guess = "<="

    # Determine if the prediction was correct
    if (prediction == 1 and current_number > CHOSEN_X) or (prediction == 0 and current_number <= CHOSEN_X):
        correct += 1
        print(f"CORRECT - Guess: {current_number}{guess}{CHOSEN_X}. History: {history}")
        reward = 1
    else:
        incorrect += 1
        print(f"WRONG   - Guess: {current_number}{guess}{CHOSEN_X}. History: {history}")
        reward = -1

    # Generate a new number and update history
    current_number = np.random.randint(0, 100)
    history.append(current_number)

print(f"Correct: {correct}, Incorrect: {incorrect}")


print(f"Agent History: {len(agent.memory)}")

Predicting action
CORRECT - Guess: 99>70. History: deque([8, 76, 99], maxlen=3)
Predicting action
WRONG   - Guess: 90<=70. History: deque([76, 99, 90], maxlen=3)
Predicting action
WRONG   - Guess: 80<=70. History: deque([99, 90, 80], maxlen=3)
Predicting action
CORRECT - Guess: 54<=70. History: deque([90, 80, 54], maxlen=3)
Predicting action
CORRECT - Guess: 86>70. History: deque([80, 54, 86], maxlen=3)
Predicting action
CORRECT - Guess: 19<=70. History: deque([54, 86, 19], maxlen=3)
Predicting action
CORRECT - Guess: 67<=70. History: deque([86, 19, 67], maxlen=3)
Predicting action
WRONG   - Guess: 44>70. History: deque([19, 67, 44], maxlen=3)
Predicting action
WRONG   - Guess: 65>70. History: deque([67, 44, 65], maxlen=3)
Predicting action
CORRECT - Guess: 28<=70. History: deque([44, 65, 28], maxlen=3)
Predicting action
WRONG   - Guess: 66>70. History: deque([65, 28, 66], maxlen=3)
Predicting action
CORRECT - Guess: 41<=70. History: deque([28, 66, 41], maxlen=3)
Predicting action
WRON