<a href="https://colab.research.google.com/github/jcmachicaocuf/codigos_CUF_LLM_NLP/blob/main/Reinforcement_Learning_Simple_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Parámetros de temperatura
LOW_TEMP = 35
HIGH_TEMP = 40
TEMP_RANGE = HIGH_TEMP - LOW_TEMP
NUM_ACTIONS = 2 # Incrementar, Disminuir

# Parámetros de episodios
NUM_EPISODES = 1000
MAX_STEPS_PER_EPISODE = 30

# Parámetros del modelo
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.99

In [None]:
# Initialize Q-table with zeros
q_table = np.zeros((TEMP_RANGE, NUM_ACTIONS))

In [None]:
# Selecciona la acción con epsilon-greedy policy
def select_action(state, epsilon):
    if np.random.uniform(0, 1) < epsilon:
        return np.random.choice(NUM_ACTIONS)
    else:
        return np.argmax(q_table[state, :])

# Define a function to update Q-values using Q-learning algorithm
def update_q_value(state, action, reward, next_state):
    if state < TEMP_RANGE and action < NUM_ACTIONS:
        q_value = q_table[state, action]
        max_next_q_value = np.max(q_table[next_state, :]) if next_state < TEMP_RANGE else 0
        new_q_value = (1 - LEARNING_RATE) * q_value + LEARNING_RATE * (reward + DISCOUNT_FACTOR * max_next_q_value)
        q_table[state, action] = new_q_value

In [None]:
# Simula el control de la temperatura de la ducha
def simulate_shower():
    learned_values = []
    current_temp = np.random.randint(LOW_TEMP, HIGH_TEMP + 1)
    for step in range(MAX_STEPS_PER_EPISODE):
        state = int(current_temp - LOW_TEMP)
        epsilon = 1.0 / ((step / 10) + 1)
        action = select_action(state, epsilon)

        # Acción
        if action == 0:  # Disminuye temperatura
            current_temp -= 1
        elif action == 1:  # Incrementa temperatura
            current_temp += 1

        # Mantiene la temperatura dentro de los límites
        current_temp = np.clip(current_temp, LOW_TEMP, HIGH_TEMP)

        if LOW_TEMP < current_temp < HIGH_TEMP:
            reward = 1  # Positive reward for staying within limits
        else:
            reward = -1  # Negative reward for reaching temperature limits
        # Calcula la recompensa por distancia

        next_state = int(current_temp - LOW_TEMP)
        update_q_value(state, action, reward, next_state)

        learned_values.append([state, action, reward, next_state])

        if current_temp == LOW_TEMP or current_temp == HIGH_TEMP:
            break
    return pd.DataFrame(learned_values)

In [None]:
for episode in range(NUM_EPISODES):
    l_values = simulate_shower()

print("Q-table entrenada:")
print(q_table)
print(l_values)

Q-table entrenada:
[[73.84292362 82.97882534]
 [79.69804754 86.27881792]
 [86.2312142  86.36296272]
 [86.35645204 85.97036247]
 [86.11303945 -1.        ]]
    0  1  2  3
0   3  1  1  4
1   4  0  1  3
2   3  1  1  4
3   4  0  1  3
4   3  0  1  2
5   2  1  1  3
6   3  0  1  2
7   2  1  1  3
8   3  1  1  4
9   4  0  1  3
10  3  1  1  4
11  4  0  1  3
12  3  0  1  2
13  2  1  1  3
14  3  0  1  2
15  2  1  1  3
16  3  0  1  2
17  2  1  1  3
18  3  1  1  4
19  4  1 -1  5


In [None]:
# Define a function to control the shower temperature using the trained Q-table
def control_shower_with_q_table():
    current_temp = np.random.randint(LOW_TEMP-5, HIGH_TEMP+5 + 1)
    print("Initial temperature:", current_temp)
    for step in range(MAX_STEPS_PER_EPISODE):
        state = int(current_temp - LOW_TEMP)
        action = np.argmax(q_table[state, :])

        # Apply the action
        if action == 0:  # Decrease temperature
            current_temp -= 1
        elif action == 1:  # Increase temperature
            current_temp += 1

        # Clip temperature to stay within limits
        current_temp = np.clip(current_temp, LOW_TEMP, HIGH_TEMP)

        print("Step:", step + 1, "Temperature:", current_temp, "Acción:", action)

        if current_temp == LOW_TEMP or current_temp == HIGH_TEMP:
            break

# Demonstrate the performance of the trained Q-table
control_shower_with_q_table()


Initial temperature: 38
Step: 1 Temperature: 37 Acción: 0
Step: 2 Temperature: 38 Acción: 1
Step: 3 Temperature: 37 Acción: 0
Step: 4 Temperature: 38 Acción: 1
Step: 5 Temperature: 37 Acción: 0
Step: 6 Temperature: 38 Acción: 1
Step: 7 Temperature: 37 Acción: 0
Step: 8 Temperature: 38 Acción: 1
Step: 9 Temperature: 37 Acción: 0
Step: 10 Temperature: 38 Acción: 1
Step: 11 Temperature: 37 Acción: 0
Step: 12 Temperature: 38 Acción: 1
Step: 13 Temperature: 37 Acción: 0
Step: 14 Temperature: 38 Acción: 1
Step: 15 Temperature: 37 Acción: 0
Step: 16 Temperature: 38 Acción: 1
Step: 17 Temperature: 37 Acción: 0
Step: 18 Temperature: 38 Acción: 1
Step: 19 Temperature: 37 Acción: 0
Step: 20 Temperature: 38 Acción: 1
Step: 21 Temperature: 37 Acción: 0
Step: 22 Temperature: 38 Acción: 1
Step: 23 Temperature: 37 Acción: 0
Step: 24 Temperature: 38 Acción: 1
Step: 25 Temperature: 37 Acción: 0
Step: 26 Temperature: 38 Acción: 1
Step: 27 Temperature: 37 Acción: 0
Step: 28 Temperature: 38 Acción: 1
Step: