In [1]:
!pip install gymnasium renderlab



In [2]:
!pip install opencv-python



In [3]:
!pip install pygame



In [4]:
import gymnasium as gym
import random
from IPython.display import clear_output
%config NotebookApp.iopub_msg_rate_limit=10000
import time

In [80]:
#visualise maze:
rfpMaze = ["SFFF", "FHHH", "FFFF", "HFHF", "FFGF"]
maze1 = ["SFFF", "FHHF", "FHFG", "FFFH", "HGHH"]

desc = rfpMaze
mazeSize = [len(desc),len(desc[0])]

env = gym.make('FrozenLake-v1', desc=desc, map_name="5x5", is_slippery=False, render_mode="human") 

In [81]:
# Q-LEARNING FUNCTIONS

epsilonValue = 0.2
alpha = 0.5
qTable_1 = {}
currentState = 0

def resetTable():
    global qTable_1
    qTable_1 = {}
    for i in range(mazeSize[0]*mazeSize[1]):
        qTable_1[i] = [0,0,0,0] 
    global currentState
    currentState = 0

def calcPossibleMoves(state):
    global qTable_1
    possibleMoves = []
    
    if state == 0:
        return [1,2]
    
    if (state+1) % mazeSize[1] != 0:
        possibleMoves.append(2)
        
    if (state+1) % mazeSize[1] != 1:
        possibleMoves.append(0)
        
    if state > (mazeSize[1]-1):
        possibleMoves.append(3)
    
    if state < ((mazeSize[0] * mazeSize[1]) - mazeSize[1]):
        possibleMoves.append(1)
        
    return possibleMoves

def nextStep(state):
    global qTable_1
    possMoves = calcPossibleMoves(state)
    
    if random.random() < epsilonValue:
        nextMove = random.choice(possMoves)
    else:
        qValues = {}
        for move in possMoves:
            qValues[move] = qTable_1[state][move]
        maxValue = max(qValues.values())
        count = sum(1 for value in qValues.values() if value == maxValue)
        if count > 1:
            nextMove = random.choice(possMoves)
        else:
            nextMove = max(qValues, key=qValues.get)
    return nextMove

convergenceThresh = 0.015
def updateTable(direction, nextState, reward):
    global qTable_1
    didConverge = False
    global currentState
    updated = qTable_1[currentState][direction] + alpha*(reward + max(qTable_1[nextState]) - qTable_1[currentState][direction])
    changeInQ = abs(qTable_1[currentState][direction] - updated)
    if changeInQ < convergenceThresh:
        if changeInQ > 0 and max(qTable_1[0]):
            didConverge = True
    qTable_1[currentState][direction] = updated
    currentState = nextState
    return didConverge, changeInQ

In [88]:
maxEpisodes = 1000
currentEpisode = 1
converged = False

resetTable()
env.reset()
start_time = time.time()
while currentEpisode <= maxEpisodes:
    if converged:
        break
        
    direction = nextStep(currentState)
    observation, reward, terminated, truncated, info = env.step(direction)
    
    if terminated:
        if reward < 1:
            reward = -1
    
    converged, changeInQ = updateTable(direction, observation, reward)

    if terminated or truncated or converged:
        observation, info = env.reset()
        if not converged:
            currentEpisode += 1

        
    if converged:
        end_time = time.time()

    clear_output(wait=True)
    print("Episode: " + str(currentEpisode) + "/" + str(maxEpisodes))
    print("Time: " + str(round(time.time()-start_time, 3)) + " sec")
    print("Q-Table:")
    for i in range(len(qTable_1)):
        print(str(i) + ": " + str(qTable_1[i]))
    print("change in Q: " + str(changeInQ))
            
if converged:
    duration = end_time - start_time
    print(str(round(duration, 3)) + " seconds to converge")
else:
    print("No convergence")

Episode: 10/1000
Time: 13.279 sec
Q-Table:
0: [0, 0.0, 0.0, 0]
1: [0.0, -0.75, 0.0, 0]
2: [0.0, 0, 0, 0]
3: [0, 0, 0, 0]
4: [0, 0.0, -0.9375, 0.0]
5: [0.0, 0.0, 0.0, 0.0]
6: [0, 0, 0.0, 0]
7: [0, 0, 0, 0]
8: [0, -0.5, 0.0, 0]
9: [0, 0, 0.0, -0.5]
10: [0, 0, 0, -0.5]
11: [0, 0, 0, 0]
12: [0, 0, 0, 0.0]
13: [0, 0, 0, 0]
14: [0, 0, 0, 0]
15: [0, 0, 0, 0]
16: [0, 0, 0, 0]
17: [0, 0, 0, 0]
18: [0, 0, 0, 0]
19: [0, 0, 0, 0]
change in Q: 0.0


KeyboardInterrupt: 

In [None]:
env.reset()

In [89]:
env.close()