In [1]:
!pip install tf-agents



In [2]:
import gym

# Create an environment
env = gym.make('CartPole-v1')

# Run a random policy
observation = env.reset()
for _ in range(1000):
    env.render()
    action = env.action_space.sample()  # Take a random action
    observation, reward, done, _ = env.step(action)
    if done:
        observation = env.reset()
env.close()


#Discrete Action Space:
If your problem has a discrete action space, meaning there are a finite number of distinct actions, you can represent it using integers. For example, in the classic CartPole environment, the agent can take two actions: push the cart to the left or push it to the right.



In [3]:
import gym

env = gym.make('CartPole-v1')

# Discrete action space
num_actions = env.action_space.n

# Sample a random action
random_action = env.action_space.sample()

# Print the action space and a random action
print(f"Number of actions: {num_actions}")
print(f"Random action: {random_action}")


Number of actions: 2
Random action: 0


#Continuous Action Space:
For problems with continuous action spaces, where actions are real-valued, you can use arrays or tuples to represent the action space. The Pendulum environment is an example of an environment with a continuous action space.

In [4]:
import gym

env = gym.make('Pendulum-v1')

# Continuous action space
action_low = env.action_space.low
action_high = env.action_space.high

# Sample a random action
random_action = env.action_space.sample()

# Print the action space range and a random action
print(f"Action space range: {action_low} to {action_high}")
print(f"Random action: {random_action}")


Action space range: [-2.] to [2.]
Random action: [-1.8391681]


#Custom Action Space:

In some cases, you might have a custom action space that doesn't fit the typical discrete or continuous structures. You can create custom action spaces using Gym's spaces module.



In [5]:
import gym
from gym import spaces

# Custom action space example
custom_action_space = spaces.Discrete(4)  # A discrete space with 4 actions

# Sample a random action
random_action = custom_action_space.sample()

# Print the custom action space and a random action
print(f"Custom action space: {custom_action_space}")
print(f"Random action: {random_action}")


Custom action space: Discrete(4)
Random action: 1


#Here's a general guide on how you might approach this:

Identify Categorical Variables:
Identify the categorical variables in your dataset that you want to use as components of the action space.

Define Possible Combinations:
Determine the possible combinations of the categorical variables that make sense for your RL problem. Each unique combination can be treated as a distinct action.

Create a Discrete Action Space:
Use a discrete action space to represent the possible combinations. In Gym, you can use spaces.Discrete or spaces.MultiDiscrete depending on the nature of your action space.

In [7]:
import pandas as pd

df = pd.read_csv(r'C:\Users\s976791601\RAW_DATA\M_all_information_DIFF.csv')

In [8]:
df.isna().sum()

row ID                          0
p_2n_2_repeticao_Bin_A_1_30     0
p_2n_2_repeticao_Bin_B_31_60    0
p_2n_3_repeticao_Bin_A_1_20     0
p_2n_3_repeticao_Bin_B_21_40    0
                               ..
qtde_variacao_soma_int          1
faixa_soma_3_layers_int         0
faixa_soma_3_layers_str         0
faixa_soma_5_layers_int         0
faixa_soma_5_layers_str         0
Length: 328, dtype: int64

In [9]:
#print(df.iloc[-1])
#df.drop(df.tail(1).index,inplace=True) # drop last n rows
print(df.iloc[-1])

row ID                          2024-01-06 [10 13 20 40 43 56]
p_2n_2_repeticao_Bin_A_1_30                                 75
p_2n_2_repeticao_Bin_B_31_60                                51
p_2n_3_repeticao_Bin_A_1_20                                  1
p_2n_3_repeticao_Bin_B_21_40                                 0
                                             ...              
qtde_variacao_soma_int                                     NaN
faixa_soma_3_layers_int                                      2
faixa_soma_3_layers_str                             soma_media
faixa_soma_5_layers_int                                      3
faixa_soma_5_layers_str                             soma_media
Name: 2669, Length: 328, dtype: object


In [10]:
df.shape

(2670, 328)

In [11]:
import gym
from gym import spaces
import itertools

# Example: Two categorical variables with two values each
var1_values = df['P_2n_7_str'].unique()
var2_values = df['balanceamento_p_2n_7_str'].unique()
var3_values = df['faixa_soma_5_layers_str'].unique()

right_output = df['target'].unique()

In [12]:
right_output

array(['[1 1 2 1 1 0]', '[3 0 1 0 1 1]', '[3 2 0 0 1 0]', '[1 2 1 1 1 0]',
       '[2 1 1 1 0 1]', '[1 1 0 2 1 1]', '[1 0 0 0 1 4]', '[1 1 2 1 0 1]',
       '[0 1 1 2 0 2]', '[1 3 1 0 1 0]', '[0 1 0 1 2 2]', '[1 1 2 0 1 1]',
       '[0 1 0 2 0 3]', '[0 1 0 2 1 2]', '[2 3 0 0 0 1]', '[0 0 2 2 1 1]',
       '[2 1 2 0 0 1]', '[0 1 3 1 0 1]', '[1 0 0 2 2 1]', '[2 0 0 2 1 1]',
       '[2 1 1 1 1 0]', '[0 0 1 0 2 3]', '[1 0 1 0 1 3]', '[0 3 0 1 1 1]',
       '[2 0 2 0 0 2]', '[2 1 0 0 1 2]', '[0 2 1 0 2 1]', '[1 1 0 2 2 0]',
       '[0 3 0 1 0 2]', '[1 1 2 0 0 2]', '[1 2 2 0 0 1]', '[2 0 1 1 0 2]',
       '[1 0 0 0 3 2]', '[0 2 0 1 1 2]', '[3 1 0 0 2 0]', '[0 1 1 3 1 0]',
       '[1 2 1 2 0 0]', '[0 2 1 1 2 0]', '[1 0 1 1 1 2]', '[1 0 2 1 1 1]',
       '[1 1 1 1 1 1]', '[2 0 1 1 2 0]', '[1 1 1 0 1 2]', '[0 1 3 0 2 0]',
       '[0 1 1 1 1 2]', '[1 2 0 1 2 0]', '[2 1 0 1 0 2]', '[1 2 1 1 0 1]',
       '[0 1 2 1 2 0]', '[1 3 1 1 0 0]', '[1 1 1 0 2 1]', '[1 0 2 1 2 0]',
       '[2 1 1 2 0 0]', '

In [13]:
var2_values

array(['desbalanceado_para_o_Inicio', 'desbalanceado_para_o_Final',
       'equilibrado_3x_3x'], dtype=object)

In [14]:
var3_values

array(['soma_baixa', 'soma_minima', 'soma_media', 'soma_maxima',
       'soma_alta'], dtype=object)

In [15]:
# Create a list of all possible combinations
all_combinations = list(itertools.product(var1_values, var2_values, var3_values))

# Define the action space
action_space = spaces.Discrete(len(all_combinations))
# action_space = spaces.Discrete(len(right_output))

# Sample a random action
random_action = action_space.sample()

# Print the action space and a random action
print(f"Action space: {action_space}")
print(f"Random action: {random_action}")


Action space: Discrete(105)
Random action: 58


In [19]:
import gym
from gym import spaces
import numpy as np

class LotteryEnv(gym.Env):
    def __init__(self, var1_values, var2_values, var3_values, right_output):
        super(LotteryEnv, self).__init__()

        all_combinations = list(itertools.product(var1_values, var2_values, var3_values));
        self.var1_values = var1_values
        self.var2_values = var2_values
        self.var3_values = var3_values
        self.right_output = right_output
        self.combinations = all_combinations


        # Action space: Discrete space representing all possible combinations
        self.action_space = spaces.Discrete(self._compute_total_combinations())

        # Observation space: Discrete space representing the drawn numbers
        self.observation_space = spaces.MultiDiscrete(len(all_combinations))

        # State initialization
        self.state = None

    def _compute_total_combinations(self):
        return len(all_combinations)
        #return len(right_output)

    def reset(self):
        # Reset the state for a new episode
        #self.state = np.random.choice(self.var1_values, self.var2_values, self.var3_values, replace=False)
        return self.state

    def step(self, action):
        # Validate the action
        if action < 0 or action >= self._compute_total_combinations():
            raise ValueError("Invalid action")

        # Compute the drawn numbers based on the action
        # drawn_numbers = np.random.choice(self.num_balls, size=self.num_to_draw, replace=False)
        drawn_numbers = np.array(self.right_output)

        # Compute the reward (e.g., based on the number of correct guesses)
        reward = self._compute_reward(drawn_numbers, action)

        # Update the state (optional, depending on your problem)
        self.state = drawn_numbers

        # Check if the episode is done (optional, depending on your problem)
        done = False

        return drawn_numbers, reward, done, {}

    def _compute_reward(self, drawn_numbers, action):
        # Example: Reward based on the number of correct guesses
        # TENHO QUE FAZER O TREE AQUI
        correct_guesses = np.sum(np.isin(drawn_numbers, np.array(action)))
        #reward = correct_guesses / self.num_to_draw  # Normalize to be between 0 and 1
        reward = right_output  # Normalize to be between 0 and 1
        return reward


In [17]:
# Example usage
env = LotteryEnv(var1_values, var2_values, var3_values, right_output)
state = env.reset()

In [18]:
for _ in range(10):
    action = env.action_space.sample()
    #print([action])
    next_state, reward, done, info = env.step(action)
    print('--------- ENV.COMBINATIONS -------------')
    print(env.combinations[action])
    print('--------- action -------------')
    print(f"Action: {action}")
    print('--------- next_state -------------')
    print(f"NextState: {next_state}")
    print('--------- reward -------------')
    print(f"Reward: {reward}")
    print('--------- done -------------')
    print(f"Reward: {done}")
    print('--------- info -------------')
    print(f"Info: {info}")

--------- ENV.COMBINATIONS -------------
('bin_B_31_a_60_6x', 'equilibrado_3x_3x', 'soma_alta')
--------- action -------------
Action: 89
--------- next_state -------------
NextState: ['[1 1 2 1 1 0]' '[3 0 1 0 1 1]' '[3 2 0 0 1 0]' '[1 2 1 1 1 0]'
 '[2 1 1 1 0 1]' '[1 1 0 2 1 1]' '[1 0 0 0 1 4]' '[1 1 2 1 0 1]'
 '[0 1 1 2 0 2]' '[1 3 1 0 1 0]' '[0 1 0 1 2 2]' '[1 1 2 0 1 1]'
 '[0 1 0 2 0 3]' '[0 1 0 2 1 2]' '[2 3 0 0 0 1]' '[0 0 2 2 1 1]'
 '[2 1 2 0 0 1]' '[0 1 3 1 0 1]' '[1 0 0 2 2 1]' '[2 0 0 2 1 1]'
 '[2 1 1 1 1 0]' '[0 0 1 0 2 3]' '[1 0 1 0 1 3]' '[0 3 0 1 1 1]'
 '[2 0 2 0 0 2]' '[2 1 0 0 1 2]' '[0 2 1 0 2 1]' '[1 1 0 2 2 0]'
 '[0 3 0 1 0 2]' '[1 1 2 0 0 2]' '[1 2 2 0 0 1]' '[2 0 1 1 0 2]'
 '[1 0 0 0 3 2]' '[0 2 0 1 1 2]' '[3 1 0 0 2 0]' '[0 1 1 3 1 0]'
 '[1 2 1 2 0 0]' '[0 2 1 1 2 0]' '[1 0 1 1 1 2]' '[1 0 2 1 1 1]'
 '[1 1 1 1 1 1]' '[2 0 1 1 2 0]' '[1 1 1 0 1 2]' '[0 1 3 0 2 0]'
 '[0 1 1 1 1 2]' '[1 2 0 1 2 0]' '[2 1 0 1 0 2]' '[1 2 1 1 0 1]'
 '[0 1 2 1 2 0]' '[1 3 1 1 0 0]' '[1

In [None]:
import numpy as np

class QLearningAgent:
    def __init__(self, action_space_size, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1):
        self.action_space_size = action_space_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob

        # Initialize Q-values for each action
        self.q_values = np.zeros(action_space_size)

    def select_action(self, state):
        # Exploration-exploitation trade-off
        if np.random.rand() < self.exploration_prob:
            return np.random.randint(self.action_space_size)  # Explore: select a random action
        else:
            return np.argmax(self.q_values)  # Exploit: select the action with the highest Q-value

    def update_q_values(self, state, action, reward, next_state):
        # Q-learning update rule
        best_next_action_value = np.max(self.q_values)
        self.q_values[action] += self.learning_rate * (reward + self.discount_factor * best_next_action_value - self.q_values[action])

# Example usage
env = gym.make('env')  # Replace 'YourLotteryEnv-v0' with the actual name of your environment
agent = QLearningAgent(action_space_size=env.action_space.n)

num_episodes = 1000

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)

        agent.update_q_values(state, action, reward, next_state)

        state = next_state
        total_reward += reward

        if done:
            break

    print(f"Episode {episode + 1}, Total Reward: {total_reward}")

# Evaluate the agent after training
# ...


NameNotFound: ignored