#### Results Frozen Lake Random variable siz Map

Link: https://www.gymlibrary.dev/environments/toy_text/frozen_lake/

In [1]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
import requests
import time
import pygame
import re
import json
import matplotlib.pyplot as plt
import pandas as pd

from gymnasium.envs.toy_text.frozen_lake import generate_random_map

##### Selected models

In [2]:
# LLM API details (Modify if needed)
LLM_API_URL = "http://localhost:1234/v1/chat/completions"  # Change to your LM Studio API URL
#MODEL_NAME ="Mistral-Nemo-Instruct-2407-GGUF"
MODEL_NAME = "Phi-4-mini-instruct-GGUF"


##### Creating the instruction

In [3]:
# === Instruction prompt ===
instruction = """
You are a reinforcement learning agent playing the FrozenLake-v1 environment (6x6 grid) from OpenAI Gym.

Each tile in the environment can be:
- 'S': Start tile (only at beginning)
- 'F': Frozen tile — safe to step on
- 'H': Hole — dangerous, stepping into it ends the episode
- 'G': Goal — reach this tile to win

Your goal is to reach the goal tile ('G') while avoiding holes.

You are told what the tiles around you are:
- Left (0)
- Down (1)
- Right (2)
- Up (3)

Your job is to pick the safest direction that helps you reach the goal.

Only respond with one digit: 0, 1, 2, or 3. No explanation, no extra text.
"""

##### Creating the prompt

Problem agent is stuck on a 5x5 grid. Possible reasons:  
1. In a 4x4 grid, the goal is only a few moves away.   
2. Mistral can "guess-and-check" its way there.  
3. In a 5x5 grid it looks like the LLM doesn’t know how far away the goal is. 
 
So for that reason a new function is created to give the agent a clear directional goal (e.g., "the goal is southeast").  

In [4]:
# Top-level helper to get a tile from the grid
def get_tile(desc, r, c):
    if 0 <= r < len(desc) and 0 <= c < len(desc[0]):
        return desc[r][c]
    return "Wall"

# Top-level helper to locate the goal in the grid
def find_goal_position(desc):
    for r, row_vals in enumerate(desc):
        for c, val in enumerate(row_vals):
            if val == 'G':
                return (r, c)
    return None  # Should never happen if map is valid

def build_prompt(observation, desc):
    size = len(desc)
    row, col = divmod(observation, size)
    current_tile = desc[row][col]

    # Get neighboring tiles using the top-level get_tile()
    neighbors = {
        0: get_tile(desc, row, col - 1),  # Left
        1: get_tile(desc, row + 1, col),  # Down
        2: get_tile(desc, row, col + 1),  # Right
        3: get_tile(desc, row - 1, col),  # Up
    }

    directions = ["Left", "Down", "Right", "Up"]
    neighbor_descriptions = "\n".join(
        [f"{directions[a]} ({a}): {tile}" for a, tile in neighbors.items()]
    )

    # Get goal position and direction delta
    goal_row, goal_col = find_goal_position(desc)
    delta_row = goal_row - row
    delta_col = goal_col - col

    vertical = "down" if delta_row > 0 else "up" if delta_row < 0 else "same row"
    horizontal = "right" if delta_col > 0 else "left" if delta_col < 0 else "same column"

    prompt = f"""
    You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position ({row}, {col}) on a tile of type '{current_tile}'.
    The goal is at position ({goal_row}, {goal_col}).
    To move toward the goal, go {abs(delta_row)} step(s) {vertical} and {abs(delta_col)} step(s) {horizontal}.

    Here are the surrounding tiles:
    {neighbor_descriptions}

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
    """
    return prompt.strip()


##### Calling the LLM with the prompt and instruction

In [5]:
def query_llm(prompt):
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": instruction},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.2,
        "max_tokens": 5,
        "stop": ["\n", ".", " "],
    }

    try:
        # Debudging: print the prompt sent to the LLM
        #print("\n=== Prompt to LLM ===") 
        #print(prompt)
        #print("=====================")

        response = requests.post(
            LLM_API_URL,
            headers={"Content-Type": "application/json"},
            data=json.dumps(payload)
        )
        response_json = response.json()
        raw_result = response_json["choices"][0]["message"]["content"].strip()

        #print("LLM Raw Response:", raw_result)

        match = re.search(r"\b[0-3]\b", raw_result)
        if match:
            action = int(match.group())
            print(f"Chosen Action: {action}")
            return action
        else:
            raise ValueError("No valid action returned.")

    except Exception as e:
        print("LLM ERROR:", e)
        fallback = random.randint(0, 3)
        print(f"Using fallback action: {fallback}")
        return fallback


##### Running the agent

In [6]:
def run_frozenlake():
    #EPISODES = 1  # For testing
    EPISODES = 50  # Total number of episodes to run
    steps_history = []
    reward_history = []
    time_history = []

    for episode in range(EPISODES):
        # Generate a new map for this episode
        random_map = generate_random_map(size=6, p=0.9) # 6x6 grid, 90% frozen tiles
        #random_map = generate_random_map(size=6, p=0.1) # 6x6 grid, 10% holes
        # Create a new environment using that map
        env = gym.make("FrozenLake-v1", desc=random_map, render_mode="human", is_slippery=False)

        # Extract tile layout from the environment
        desc = [[cell.decode('utf-8') for cell in row] for row in env.unwrapped.desc]

        print(f"\n========== STARTING EPISODE {episode + 1} ==========\n")
        observation, info = env.reset()
        total_reward = 0
        start_time = time.time()

        for step in range(100):
            print(f"\n--- Step {step + 1} ---")
            print(f"Observation (state index): {observation}")
            prompt = build_prompt(observation, desc)
            print("Prompt:", prompt)    
            action = query_llm(prompt)

            observation, reward, terminated, truncated, info = env.step(action)
            total_reward += reward

            if terminated or truncated:
                env.close()
                break

        end_time = time.time()
        elapsed = end_time - start_time

        steps_history.append(step + 1)
        reward_history.append(total_reward)
        time_history.append(elapsed)

        print(f"\n Episode finished in {step + 1} steps")
        print(f" Total Reward: {total_reward}")
        print(f" Elapsed Time: {elapsed:.2f} seconds")

    env.close()

    print("\n=== Summary ===")
    print(f"Steps: {steps_history}")
    print(f"Rewards: {reward_history}")
    print(f"Times: {time_history}")

##### Result Mistral-Nemo-Instruct-2407-GGUF(6x6 - 1 episode - max 100 steps)

In [10]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Phi-4-mini-instruct-GGUF (6x6 - 1 epsiode - 100 steps)

In [13]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Phi-4-mini-instruct-GGUF (6x6 - 10 epsiodes - 100 steps)

In [15]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): H
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Result Mistral-Nemo-Instruct-2407-GGUF(6x6 - 10 episodes - max 100 steps)

In [17]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Result Mistral-Nemo-Instruct-2407-GGUF(6x6 - 10 episodes - max 100 steps - complexity = 0.1)

In [26]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): H
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Results Phi-4-mini-instruct-GGUF (6x6 - 10 episodes - max 100 steps - complexity = 0.1)

In [28]:
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): H
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Result Mistral-Nemo (6x6 - 50 episodes - max 100 steps - complexity = 0.9)

In [7]:
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 

##### Result Phi-4-Mini (6x6 - 50 episodes - max 100 steps - complexity = 0.9)

In [7]:
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G = Goal (you win by reaching this)

    You are currently at position (0, 0) on a tile of type 'S'.
    The goal is at position (5, 5).
    To move toward the goal, go 5 step(s) down and 5 step(s) right.

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Your task: Choose the safest direction that brings you closer to the goal while avoiding holes.

    Respond with a single digit (no explanation):
    0 = Left, 1 = Down, 2 = Right, 3 = Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 6
Prompt: You are navigating a 5x5 FrozenLake grid.

    Tile meanings:
    - S = Start (your starting tile)
    - F = Frozen tile (safe)
    - H = Hole (dangerous, episode ends if you fall in)
    - G 