<a href="https://colab.research.google.com/github/faizankshaikh/ForaGym/blob/main/notebooks/ExampleForagym.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Installation

In [8]:
#@title install required libraries
# !git clone --quiet https://github.com/faizankshaikh/ForaGym.git
# %cd ForaGym/
# !pip install -q gymnasium

/content/ForaGym
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m925.5/925.5 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25h

# 2. Create Gym environment

In [9]:
#@title import required libs and modules
import numpy as np
import pandas as pd
import gymnasium as gym

In [10]:
#@title initiatlize gym env
env_verbose = gym.make(
    "foragym:foragym/ForaGym-v1",
    render_mode="human"
)

env = gym.make(
    "foragym:foragym/ForaGym-v1",
    render_mode=None
)

In [11]:
#@title visualize transition matrix
num_states = env_verbose.nS
num_actions = env_verbose.nA

df = []
for state in range(num_states):
    for action in range(num_actions):
        for items in env_verbose.P[state][action]:
            try:
                p, new_state, reward, done = items
                days_left, life_point, forest_type = env_verbose.decode(state)
                days_left_next, life_point_next, forest_type_next = env_verbose.decode(new_state)
                df.append([
                    state,
                    days_left,
                    life_point,
                    forest_type,
                    env_verbose.action_dict[action],
                    days_left_next,
                    life_point_next,
                    forest_type_next,
                    round(p, 3),
                    reward,
                    done
                ])
            except:
                pass

df = pd.DataFrame(df, columns=[
    "id_state",
    "days_left",
    "life_points_left_left",
    "id_forest",
    "action",
    "days_left_next",
    "life_points_left_next",
    "id_forest_next",
    "prob_transition",
    "reward",
    "terminated"
])
df.head()

Unnamed: 0,id_state,days_left,life_points_left_left,id_forest,action,days_left_next,life_points_left_next,id_forest_next,prob_transition,reward,terminated
0,576,1,1,0,wait,0.0,0.0,0.0,1.0,-1,True
1,576,1,1,0,forage,0.0,0.0,0.0,0.333,-1,True
2,576,1,1,0,forage,0.0,0.0,0.0,0.125,-1,True
3,576,1,1,0,forage,0.0,2.0,0.0,0.167,0,True
4,576,1,1,0,forage,0.0,2.0,0.0,0.125,0,True


# 3. Solve the foraging task

## 3.1 Heuristic methods

In [12]:
#@title display sample episodes
num_episodes = 2

for episode in range(num_episodes):
    print(f"Episode #{episode+1}")
    print("=" * 10)

    done = False
    print("Initial state:-")
    obs, info = env_verbose.reset(seed=42)
    print()

    while not done:
        action = env_verbose.action_space.sample()
        print(f"Action to take: {env_verbose.action_dict[action]}")
        print()
        obs, reward, done, _, info = env_verbose.step(action)
        print()

    print("-" * 10)

Episode #1
Initial state:-
--Forest Quality for the left environment: 0.25
--Threat Encounter probability for the left environment: 0.00
--Nutritional Quality for the left environment: 1.00
--Forest Quality for the right environment: 0.67
--Threat Encounter probability for the right environment: 0.67
--Nutritional Quality for the right environment: 1.00
----------

--Days left: 8
--Current life: 5
--Current Forest Quality: 0.67
--Current Threat Encounter probability: 0.67
--Current Nutritional Quality: 1.00

Action to take: wait

--Consequence: Waited
--Reward?: 0
--Episode done?: False
--Days left: 7
--Current life: 4
--Current Forest Quality: 0.25
--Current Threat Encounter probability: 0.00
--Current Nutritional Quality: 1.00

Action to take: forage

--Consequence: Left environment / Forage failed / No threat encountered
--Reward?: 0
--Episode done?: False
--Days left: 6
--Current life: 2
--Current Forest Quality: 0.25
--Current Threat Encounter probability: 0.00
--Current Nutrition

  and should_run_async(code)


In [13]:
#@title evaluate algorithm (always wait)
num_episodes = 10000
episode_rewards = []

for episode in range(num_episodes):
    done = False
    obs, info = env.reset()

    total_reward = 0

    while not done:
        action = 0
        obs, reward, done, _, info = env.step(action)

        total_reward += reward

    episode_rewards.append(total_reward)

episode_rewards = np.array(episode_rewards)

def print_metrics(episode_rewards):
    interval_size = 100
    num_intervals = num_episodes // interval_size

    means = []
    std_devs = []

    for i in range(0, num_episodes, interval_size):
        interval_data = episode_rewards[i:i+interval_size]
        mean = np.mean(interval_data, axis=0)
        means.append(mean)

    for i in range(num_intervals - 1):
        mean_diff = means[i+1] - means[i]
        std_devs.append(mean_diff)

    avg = (1 + np.mean(episode_rewards, axis=0)) * 100
    std_dev = np.std(std_devs, axis=0) * 100

    return avg, std_dev

avg, std_dev = print_metrics(episode_rewards)

print(f"survival rate (in %): {avg:.1f} +- {std_dev:.3f}")

survival rate (in %): 0.0 +- 0.000


In [14]:
#@title evaluate algorithm (always forage)
num_episodes = 10000
episode_rewards = []

for episode in range(num_episodes):
    done = False
    obs, info = env.reset()

    total_reward = 0

    while not done:
        action = 1
        obs, reward, done, _, info = env.step(action)

        total_reward += reward

    episode_rewards.append(total_reward)

avg, std_dev = print_metrics(np.array(episode_rewards))

print(f"survival rate (in %): {avg:.1f} +- {std_dev:.3f}")

survival rate (in %): 13.7 +- 5.084


In [15]:
#@title evaluate algorithm (hail mary)
num_episodes = 10000
episode_rewards = []

for episode in range(num_episodes):
    done = False
    obs, info = env.reset()

    total_reward = 0

    while not done:
        action = 1 if obs["life_points_left"] == 1 else 0
        obs, reward, done, _, info = env.step(action)

        total_reward += reward

    episode_rewards.append(total_reward)

avg, std_dev = print_metrics(np.array(episode_rewards))

print(f"survival rate (in %): {avg:.1f} +- {std_dev:.3f}")

survival rate (in %): 10.8 +- 3.763


In [16]:
#@title evaluate algorithm (random actions)
num_episodes = 10000
episode_rewards = []

for episode in range(num_episodes):
    done = False
    obs, info = env.reset()

    total_reward = 0

    while not done:
        action = env.action_space.sample()
        obs, reward, done, _, info = env.step(action)

        total_reward += reward

    episode_rewards.append(total_reward)

avg, std_dev = print_metrics(np.array(episode_rewards))

print(f"survival rate (in %): {avg:.1f} +- {std_dev:.3f}")

survival rate (in %): 9.8 +- 3.529
