In [1]:
import numpy as np
from tqdm import tqdm

In [56]:
class Q_DICT:
    def __init__(self):
        cue_state_start=[0.00, 0.00]
        target_state_start=[0.00, 0.00]
        angle_start=0.00
        power_start=0.00
        reward=0.00
        self.Q_dict = {f'{cue_state_start}, {target_state_start}, {angle_start}, {power_start}': reward}
        
    def init_q_dict(self, x_disc, y_disc, angles_disc, powers_disc):
        x_max = 10
        states_x = np.linspace(0, x_max, num = x_disc) # List from 0 to x_max, with x_discritized numbers

        y_max = 10
        states_y = np.linspace(0, y_max, num = y_disc) # List from 0 to y_max, with y_discritized numbers

        angles = np.linspace(0, 360, num = angles_disc) # List from 0 to 360, with theta_disc numbers

        max_power = 3600
        powers = np.linspace(0, max_power, num = powers_disc) # List from 0 to max_power, with powers_disc numbers
        print(powers)

        print(f"size dict list = {np.power(states_x.size*states_y.size, 2)}\n")

        for s_cue_x in tqdm(states_x):
            for s_cue_y in states_y:
                for s_target_x in states_x:
                    for s_target_y in states_y:
                        for angle in angles:
                            for power in powers:
                                cue_state = [s_cue_x, s_cue_y]
                                target_state = [s_target_x, s_target_y]
                                self.Q_dict.update({f'{cue_state}, {target_state}, {angle}, {power}': 0.00})

        print(len(self.Q_dict))

    def update_q_dict(self, cue_state, target_state, angle, power, reward):
        current_reward = self.get_q_val(cue_state, target_state, angle, power)
        if current_reward is not None:
            updated_reward = current_reward + reward
        else:
            updated_reward = reward       
        self.Q_dict.update({f'{cue_state}, {target_state}, {angle}, {power}': updated_reward})     

    def get_q_val(self, cue_state, target_state, angle, power):
        state_action_pair = f'{cue_state}, {target_state}, {angle}, {power}'
        if state_action_pair in self.Q_dict: 
            current_val = self.Q_dict[state_action_pair]
        else:
            current_val = None
        return current_val

# Initialize the Q-table as a Python dict

## a) Init full Action and State Space (Will take FOREVER)

In [58]:
'''
Q = Q_DICT()

Q.Q_dict = init_q_dict(x_disc=501, y_disc=501, angles_disc=361, powers_disc=5)
'''

'\nQ = Q_DICT()\n\nQ.Q_dict = init_q_dict(x_disc=501, y_disc=501, angles_disc=361, powers_disc=5)\n'

## b) Initialize Q-table and update as we see states (faster, but may not have an action for an unseen state-action pair)

In [66]:
Q = Q_DICT()

print(Q.Q_dict)

{'[0.0, 0.0], [0.0, 0.0], 0.0, 0.0': 0.0}


In [67]:
cue_state = [1.0, 0.0]
target_state = [0.0, 1.0]
angle = 0.00
power = 0.00
reward = 10.00

Q.update_q_dict(cue_state, target_state, angle, power, reward)

print(Q.Q_dict)

{'[0.0, 0.0], [0.0, 0.0], 0.0, 0.0': 0.0, '[1.0, 0.0], [0.0, 1.0], 0.0, 0.0': 10.0}


# Check Q-table (Q-dict) to see what the latest key is

In [68]:
print(list(Q.Q_dict)[-1])

[1.0, 0.0], [0.0, 1.0], 0.0, 0.0


# Check to see what the Q-value is for a given key

In [69]:
# Check what reward is for a specific state-action pair:
cue_state = [1.00, 0.00]
target_state = [0.00, 1.00]
angle = 0.00
power = 0.00

q_val = Q.get_q_val(cue_state, target_state, angle, power)
print(q_val)

10.0
