#### Results Frozen Lake Default Map

##### Import libraries

In [29]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
import requests
import time
import pygame
import re
import json
import matplotlib.pyplot as plt
import pandas as pd

##### Selected models

In [None]:
# LLM API details (Modify if needed)
LLM_API_URL = "http://localhost:1234/v1/chat/completions"  # Change to your LM Studio API URL
#MODEL_NAME ="Mistral-Nemo-Instruct-2407-GGUF"
MODEL_NAME = "Phi-4-mini-instruct-GGUF"
#MODEL_NAME ="Qwen2-0.5B-Instruct-GGUF"

##### Default map 

In [31]:
# === Environment Config ===
DEFAULT_MAP = [
    ['S', 'F', 'F', 'F'],
    ['F', 'H', 'F', 'H'],
    ['F', 'F', 'F', 'H'],
    ['H', 'F', 'F', 'G']
]

##### Creating the instruction

In [None]:
# === Instruction prompt ===
instruction = """
You are a reinforcement learning agent playing the FrozenLake-v1 environment (4x4 grid) from OpenAI Gym.

Each tile in the environment can be:
- 'S': Start tile (only at beginning)
- 'F': Frozen tile — safe to step on
- 'H': Hole — dangerous, stepping into it ends the episode
- 'G': Goal — reach this tile to win

Your goal is to reach the goal tile ('G') while avoiding holes.

You are told what the tiles around you are:
- Left (0)
- Down (1)
- Right (2)
- Up (3)

Your job is to pick the safest direction that helps you reach the goal.

Only respond with one digit: 0, 1, 2, or 3. No explanation, no extra text.
"""

##### Creating the prompt

In [None]:
# === Utility: get tile safely ===
def get_tile(desc, r, c):
    if 0 <= r < 4 and 0 <= c < 4:
        return desc[r][c]
    else:
        return "Wall"

# === Prompt builder ===
def build_prompt(observation, desc):
    row, col = divmod(observation, 4)
    neighbors = {
        0: get_tile(desc, row, col - 1),  # Left
        1: get_tile(desc, row + 1, col),  # Down
        2: get_tile(desc, row, col + 1),  # Right
        3: get_tile(desc, row - 1, col),  # Up
    }

    directions = ["Left", "Down", "Right", "Up"]
    neighbor_descriptions = "\n".join(
        [f"{directions[a]} ({a}): {tile}" for a, tile in neighbors.items()]
    )

    prompt = f"""
    You are at position ({row}, {col}) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    {neighbor_descriptions}

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
    """
    return prompt.strip()


##### Calling the LLM with the prompt and instruction

In [None]:

def query_llm(prompt):
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": instruction},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.2,
        "max_tokens": 5,
        "stop": ["\n", ".", " "],
    }

    try:
        #print("\n=== Prompt to LLM ===")
        #print(prompt)
        #print("=====================")

        response = requests.post(
            LLM_API_URL,
            headers={"Content-Type": "application/json"},
            data=json.dumps(payload)
        )
        response_json = response.json()
        raw_result = response_json["choices"][0]["message"]["content"].strip()

        #print("LLM Raw Response:", raw_result)

        match = re.search(r"\b[0-3]\b", raw_result)
        if match:
            action = int(match.group())
            print(f"Chosen Action: {action}")
            return action
        else:
            raise ValueError("No valid action returned.")

    except Exception as e:
        print("LLM ERROR:", e)
        fallback = random.randint(0, 3)
        print(f"Using fallback action: {fallback}")
        return fallback


##### Running the agent for 1 episode

In [27]:
# === Main episode runner ===
def run_frozenlake():
    env = gym.make("FrozenLake-v1", render_mode="human", is_slippery=False)
    EPISODES = 1  # For testing
    desc = DEFAULT_MAP
    steps_history = []
    reward_history = []
    time_history = []

    for episode in range(EPISODES):
        print(f"\n========== STARTING EPISODE {episode + 1} ==========\n")
        observation, info = env.reset()
        total_reward = 0
        start_time = time.time()

        for step in range(50):
            print(f"\n--- Step {step + 1} ---")
            print(f"Observation (state index): {observation}")
            prompt = build_prompt(observation, desc)
            print("Prompt:", prompt)    
            action = query_llm(prompt)

            observation, reward, terminated, truncated, info = env.step(action)
            total_reward += reward

            if terminated or truncated:
                break

        end_time = time.time()
        elapsed = end_time - start_time

        steps_history.append(step + 1)
        reward_history.append(total_reward)
        time_history.append(elapsed)

        print(f"\n✅ Episode finished in {step + 1} steps")
        print(f"🏆 Total Reward: {total_reward}")
        print(f"⏱️ Elapsed Time: {elapsed:.2f} seconds")

    env.close()

    print("\n=== Summary ===")
    print(f"Steps: {steps_history}")
    print(f"Rewards: {reward_history}")
    print(f"Times: {time_history}")

##### Result Qwen2-0.5B-Instruct-GGUF for 1 episode

Note: Qwen agent does not know what to do. It does not move, it only turns around till the max number of steps is reached.

In [20]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are at position (0, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 3

--- Step 2 ---
Observation (state index): 0
Prompt: You are at position (0, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 3

--- Step 3 ---
Observation (state index): 0
Prompt: You are at position (0, 0) on a 4x4 FrozenLake grid.

 

##### Result Mistral-Nemo-Instruct-2407-GGUF for 1 episode

In [28]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are at position (0, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 4
Prompt: You are at position (1, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): H
Up (3): S

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 1

--- Step 3 ---
Observation (state index): 8
Prompt: You are at position (2, 0) on a 4x4 FrozenLake grid.

    

##### Running the agent for 10 episodes

In [None]:
# === Main episode runner ===
def run_frozenlake():
    env = gym.make("FrozenLake-v1", render_mode="human", is_slippery=False)
    EPISODES = 10  
    desc = DEFAULT_MAP
    steps_history = []
    reward_history = []
    time_history = []

    for episode in range(EPISODES):
        print(f"\n========== STARTING EPISODE {episode + 1} ==========\n")
        observation, info = env.reset()
        total_reward = 0
        start_time = time.time()

        for step in range(50):
            print(f"\n--- Step {step + 1} ---")
            print(f"Observation (state index): {observation}")
            prompt = build_prompt(observation, desc)
            print("Prompt:", prompt)    
            action = query_llm(prompt)

            observation, reward, terminated, truncated, info = env.step(action)
            total_reward += reward

            if terminated or truncated:
                break

        end_time = time.time()
        elapsed = end_time - start_time

        steps_history.append(step + 1)
        reward_history.append(total_reward)
        time_history.append(elapsed)

        print(f"\n✅ Episode finished in {step + 1} steps")
        print(f"🏆 Total Reward: {total_reward}")
        print(f"⏱️ Elapsed Time: {elapsed:.2f} seconds")

    env.close()

    print("\n=== Summary ===")
    print(f"Steps: {steps_history}")
    print(f"Rewards: {reward_history}")
    print(f"Times: {time_history}")

##### Result Phi-4-mini-instruct-GGUF (10 episodes)

Note: Phi-4 agent is moving it some times loses, sometimes wins but most of the time it reached the maximum number of steps

In [37]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are at position (0, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 4
Prompt: You are at position (1, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): H
Up (3): S

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 1

--- Step 3 ---
Observation (state index): 8
Prompt: You are at position (2, 0) on a 4x4 FrozenLake grid.

    

##### Result Mistral-Nemo-Instruct-2407-GGUF fro 10 episodes

In [36]:
# === Run the program ===
if __name__ == "__main__":
    run_frozenlake()




--- Step 1 ---
Observation (state index): 0
Prompt: You are at position (0, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): F
Up (3): Wall

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 1

--- Step 2 ---
Observation (state index): 4
Prompt: You are at position (1, 0) on a 4x4 FrozenLake grid.

    Tile meanings:
    - S: Start
    - F: Frozen (safe)
    - H: Hole (dangerous, ends episode)
    - G: Goal (reach this)

    Here are the surrounding tiles:
    Left (0): Wall
Down (1): F
Right (2): H
Up (3): S

    Which direction should the agent move?

    Respond with one digit: 0=Left, 1=Down, 2=Right, 3=Up
Chosen Action: 1

--- Step 3 ---
Observation (state index): 8
Prompt: You are at position (2, 0) on a 4x4 FrozenLake grid.

    