# Libraries

In [1]:
import gym
import time
import matplotlib.pyplot as plt
from pyswip import Prolog
from utils import create_level, define_reward, process_state, perform_action, show_match,display_inventory,parse_predicate,extract_monsters
from nle import nethack
#from shop_generator import creashop
from run import completemapdes,run
import random

# Simulation parameters

In [2]:
NUM_EPISODES = 5
MAX_STEPS = 50
PATH = 'kb.pl'
RISK_THRESHOLD=200# when the agent feels it's not healthy anymore
MONEY_THRESHOLD=1
MONEY=40
ARROWS= 2
ARROWS_THRESHOLD=4

Initialize the knowledge base.

In [3]:
KB = Prolog()
KB.consult(PATH)

#loading initial agent paramters into the knowledge base
KB.asserta(f"health_threshold({RISK_THRESHOLD})")
KB.asserta(f"money_threshold({MONEY_THRESHOLD})")
KB.asserta(f"money({MONEY})")
KB.asserta(f"arrows({ARROWS})")
KB.asserta(f"arrows_threshold({ARROWS_THRESHOLD})")





#### Main code
- Perform `NUM_EPISODES` experiences in the environment.
- Use `Prolog` to define the axioms and choose the action to perform.
- The main goal is to _reach and eat_ the `apple`.

In [4]:
rewards = []
action=None

run=run()
for episode in range(NUM_EPISODES):
    des_file=run.getdes()
    room_type=run.gettype()
    actions=tuple(nethack.CompassDirection )+(nethack.Command.PICKUP,nethack.Command.PAY,nethack.Command.WIELD,nethack.Command.SIT,nethack.Command.QUAFF)
    env = gym.make("MiniHack-Skill-Custom-v0",
                    character="sam-hum-neu-mal",
                    observation_keys=('screen_descriptions','inv_strs','blstats','message','pixel','chars','inv_oclasses','glyphs'),
                    des_file=des_file,
                    autopickup=False,
                    actions=actions
               )
    #print(des_file)
    # count the number of steps of the current episode
    steps = 0
    # store the cumulative reward
    reward = 0.0
    # collect obs['pixel'] to visualize
    ep_states = []
    #extract knowledge of the battlefield from the des_file
    if any(map(lambda x:x==room_type,['mazeroom','minibossroom'])):
        KB.asserta("corridors")
    monsters=extract_monsters(des_file)


    obs = env.reset()
    goal_x,goal_y=-1,-1
    for row in des_file.split('\n'):
        if 'DOOR' in row:
            start=row.index('(')
            pos_x,pos_y=map(int,row[start+1:-1].split(','))
            if goal_x < pos_x:
                goal_x,goal_y=pos_x,pos_y

        
    KB.asserta(f'battlefield_start({goal_x},{goal_y})')
    ep_states.append(obs['pixel'])
    done = False
    # Main loop
    while not done and steps < MAX_STEPS:
        process_state(obs, KB)
        # tell the agent the present enemies
        for monster_name,pos_x,pos_y in monsters:
            KB.asserta(f"position(enemy,\'{monster_name}\',{pos_x},{pos_y})")
        # Get the observation from the env and assert the facts in the kb 
        try:
            actions=list(KB.query('action(X)'))
            action = actions[0]
            action = action['X']
        except Exception as e:
            #print(e)
            action = None
        # Perform the action in the environment
        if action:
            obs, reward, done, info = perform_action(action, env,KB)
            ep_states.append(obs['pixel'])
        else:
            break
        steps += 1
    # Display game with interface
    #show_match(ep_states)
    #env.render()
    #print(f"room type:{room_type} {list(KB.query('corridors'))}")
    env.render()
    print(list(KB.query("battlefield_start(X,Y)")))
    rewards.append(reward)
    # reset the environment and retract axioms that may cause errors
    obs = env.reset()
    KB.retractall('stepping_on(agent,_,_)')
    KB.retractall('position(_,_,_,_)')
    KB.retractall('shopping_done')
    KB.retractall('corridors')
    KB.retractall(f'battlefield_start(_,_)')
    run.nextlevel()


print(f'After {NUM_EPISODES} episodes, mean return is {sum(rewards)/NUM_EPISODES}')
print("The rewards of the episodes are:", rewards)


[0;37mI[0;37mt[0;37m'[0;37ms[0;30m [0;37ms[0;37mo[0;37ml[0;37mi[0;37md[0;30m [0;37ms[0;37mt[0;37mo[0;37mn[0;37me[0;37m.[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30

: 