In [2]:
import os
import sys

# JVM/Pyserini bootstrap：使用完整 JDK21，解决 jdk.incubator.vector 与类版本问题
_JDK_HOME = "/usr/lib/jvm/java-21-openjdk-amd64"
_JVM_PATH = os.path.join(_JDK_HOME, "lib", "server", "libjvm.so")
if os.path.exists(_JVM_PATH):
    os.environ["JAVA_HOME"] = _JDK_HOME
    os.environ["JDK_HOME"] = _JDK_HOME
    os.environ["PATH"] = f"{_JDK_HOME}/bin:" + os.environ.get("PATH", "")
    os.environ["LD_LIBRARY_PATH"] = f"{_JDK_HOME}/lib/server"
    os.environ["JVM_PATH"] = _JVM_PATH
    try:
        import jnius_config

        jnius_config.set_options(
            "--add-modules=jdk.incubator.vector",
            f"-Djava.home={_JDK_HOME}",
            f"-Djava.library.path={_JDK_HOME}/lib/server",
        )
        print(
            f"[pyserini jvm setup] JAVA_HOME={_JDK_HOME}, "
            f"JVM_PATH={_JVM_PATH}, python_prefix={sys.prefix}"
        )
    except Exception as e:
        print(f"[pyserini jvm setup] failed to set jnius_config: {e}")
else:
    print(f"[pyserini jvm setup] expected JVM at {_JVM_PATH} not found")

os.environ["CUDA_VISIBLE_DEVICES"] = "7"
from explorer import Explorer

[pyserini jvm setup] JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64, JVM_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/libjvm.so, python_prefix=/home/xingkun/miniconda3/envs/webshop_update_py310


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# start_timestep = 0
model_name = "mistral3.2-24b"
# model_name = "qwen3-30b"
env_name = "webshop"
backend_env = "webshop-vanilla"

max_steps = 20
threshold =  0.25
decay_rate =  300
start_timestep = 0

use_global_verifier = True
use_experience = True
save_experience = True

cur_name =f"log_{use_global_verifier}_{model_name}_{env_name}_{backend_env}"
log_dir=f"./log/"
backend_log_dir=log_dir
storage_path=f"./storage/exp_store.json"
depreiciate_exp_store_path=f"./storage/depreiciate_exp_store.json"

In [4]:
enable_confirm_purchase= True
session = 8

e = Explorer(
    model_name = model_name,
    env_name = env_name,
    backend_env = backend_env,
    max_steps = max_steps,
    use_global_verifier = use_global_verifier,
    use_experience = use_experience,
    save_experience = save_experience,
    start_timestep = start_timestep,
    threshold = threshold,
    decay_rate = decay_rate,
    log_dir=log_dir,
    backend_log_dir=backend_log_dir,
    storage_path=storage_path,
    depreiciate_exp_store_path=depreiciate_exp_store_path,
    enable_confirm_purchase=enable_confirm_purchase,
    session=session,
)

Loading weights: 100%|█| 585/585 [00:12<00:00, 46.69it/s, Materializing param=model.vision_tower.transfor
Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


100%|████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 118741.44it/s]
Dec 31, 2025 1:37:56 AM org.apache.lucene.store.MemorySegmentIndexInputProvider <init>
INFO: Using MemorySegmentIndexInput with Java 21; to disable start with -Dorg.apache.lucene.store.MMapDirectory.enableMemorySegments=false


Loaded 14 goals.


In [5]:
e.adaptor.initialize_env()

  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")


In [6]:
cur_state = e.adaptor.get_state()
print(cur_state)

{'url': 'http://127.0.0.1:3000/<session_id>', 'html': 'WebShop Search'}


In [7]:
retrieved_experiences = e.exp_backend.retrieve_experience(cur_state)
print(retrieved_experiences)

[]


In [8]:
p = e.adaptor.get_action_prompt(retrieved_experiences)
print(p)

<s>[INST] You are an intelligent exploration agent navigating a web shop. Your goal is to understand the task instruction and buy the correct product with the highest possible score of (1.0). Respond with only the action you want to execute, without any additional explanation or formatting.

Task Instruction: i am looking for x-large, red color women faux fur lined winter warm jacket coat, and price lower than 90.00 dollars

Current URL: http://127.0.0.1:3000/<session_id>

Current Webpage Display Text: WebShop Search

Available Actions: [search]

IMPORTANT STRATEGY:
1. SCORE 1.0 IS THE ONLY GOAL.
2. If a previous action got 1.0 -> REPEAT IT.
3. If a previous action got anything less than 1.0 (e.g. 0.5, 0.75) -> IT IS WRONG. DO NOT REPEAT IT. CHOOSE A DIFFERENT OPTION.
---

You goal is to buy the most suitable product that satisfies the task instruction and get the maximum score (1.0).
Please consider selecitng required options before buying.
Based on the current state and task instruct

In [9]:
model_inputs = e.explorer_model.tokenizer(
            [p],
            return_tensors="pt",
        ).to(e.explorer_model.model.device)

In [10]:
import torch
with torch.no_grad():
    generated_ids = e.explorer_model.model.generate(
        **model_inputs,
        max_new_tokens=e.explorer_model.max_new_tokens,
        pad_token_id=e.explorer_model.tokenizer.pad_token_id,
        do_sample=True,
        temperature=0.7, # Mistral models often prefer slightly lower temperature
        top_p=0.95,
    )

# Strip the prompt tokens from the output
generated_ids = [
    output_ids[len(input_ids):]
    for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [11]:
generated_ids = [
    output_ids[len(input_ids):]
    for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

In [13]:
response = e.explorer_model.tokenizer.batch_decode(generated_ids, skip_special_tokens=False)[0]

In [14]:
response

''

In [None]:
enable_confirm_purchase= False
session = 9

e.init_after_model(
    model_name = model_name,
    env_name = env_name,
    backend_env = backend_env,
    max_steps = max_steps,
    use_global_verifier = use_global_verifier,
    use_experience = use_experience,
    save_experience = save_experience,
    start_timestep = start_timestep,
    threshold = threshold,
    decay_rate = decay_rate,
    log_dir=log_dir,
    backend_log_dir=backend_log_dir,
    storage_path=storage_path,
    depreiciate_exp_store_path=depreiciate_exp_store_path,
    enable_confirm_purchase=enable_confirm_purchase,
    session=session,
)

In [None]:
# for i in range(20):
#     print(f"--- {i}/20 ---")
#     e.explore()

In [None]:
e.adaptor.initialize_env()
print("-----------------------------")
print(e.adaptor.get_env_description())
e.adaptor.step("search[coat]")
e.adaptor.step("click[b09kp78g37]")
e.adaptor.step("click[x-large]")
# e.adaptor.step("click[yellow]")
e.adaptor.step("click[red]")
e.adaptor.step("click[buy now]")
print("-----------------------------")
print(e.adaptor.get_state())
print("-----------------------------")
print(e.adaptor.get_available_actions())
print("-----------------------------")

In [None]:
e.adaptor.step("click[a]")
e.adaptor.extract_reward_score()

In [None]:
e.adaptor.extract_reward_score()


In [None]:
e.adaptor.initialize_env()

In [None]:
print(e.adaptor.get_env_description())

In [None]:
e.adaptor.step("search[coat]")
e.adaptor.step("click[b09kp78g37]")


In [None]:
cur_state = e.adaptor.get_state()
e.exp_backend.retrieve_experience(cur_state)