In [1]:
import numpy as np 
import random 

In [3]:
states = np.arange(0,101,10) #0,10,20,30...100 - 10 is the increment 
actions = ['FILL', 'STOP']
print ("States:", states) 
print ("Actions:", actions) 

States: [  0  10  20  30  40  50  60  70  80  90 100]
Actions: ['FILL', 'STOP']


In [4]:
Q = np.zeros((len(states), len(actions)))

alpha = 0.1
gamma = 0.9 
epsilon = 0.2 
episodes = 300

print ('Q-table shape: ',Q.shape) 

Q-table shape:  (11, 2)


In [6]:
def get_reward(level,action): 
    if 40<= level <=70: 
        reward = 10  #ideal range 
    else: 
        reward =-10 #too low/high 
    if action == 'FILL' and level >=90: 
        reward -= 10 
    if action == 'STOP' and level <=10: 
        reward -= 10 #empty risk 
    return reward 

print ('Reward example (level=60m FILL):', get_reward(80, 'FILL'))

Reward example (level=60m FILL): -10


In [20]:
def next_level(level,action): 
    if action == 'FILL': 
        level += random.choice([5,10,15])
    else: 
        level -= random.choice([5,10,15])
    return int(np.clip(level,0,100))

print ('Next Level Example:',next_level(50, 'FILL'))
#output will be either 50+5, 50+10 or 50+15

Next Level Example: 65


In [22]:
for ep in range(episodes): 
    level = random.choice(states)
    for _ in range(15): 
        if random.uniform(0,1) < epsilon: 
            action = random.choice(actions) 
        else: 
            action = actions[np.argmax(Q[level // 10])]

        next_state = next_level(level,action)
        reward = get_reward(next_state,action) 

        a = actions.index(action) 
        best_next = np.max(Q[next_state // 10])
        Q[level // 10, a] += alpha * (reward + gamma * best_next - Q[level // 10,a])

print ("Training completed")

Training completed


In [24]:
try: 
    level = int(input('Enter starting water level (0-100): '))
    if level < 0 or level > 100: 
        raise ValueError ('Water level our of range!')
except ValueError as e: 
    print (e) 
    level = 50 
    print ('Default level st to 50%')

print(f'\nStarting with level: {level}%')
print('Simulating for 10 steps:\n')
 
for step in range(10):
    action = actions[np.argmax(Q[level // 10])]
    print(f'Step {step+1}: Level={level}% + Action={action}')
    level = next_level(level, action)
 
print('Simulation complete. Water tank control finished.')

Enter starting water level (0-100):  100



Starting with level: 100%
Simulating for 10 steps:

Step 1: Level=100% + Action=STOP
Step 2: Level=95% + Action=STOP
Step 3: Level=90% + Action=STOP
Step 4: Level=75% + Action=STOP
Step 5: Level=65% + Action=STOP
Step 6: Level=50% + Action=FILL
Step 7: Level=55% + Action=FILL
Step 8: Level=65% + Action=STOP
Step 9: Level=50% + Action=FILL
Step 10: Level=55% + Action=FILL
Simulation complete. Water tank control finished.
