# TP-MCTS on Colab

This notebook clones the repo and runs a single experiment.

In [8]:
# Clone repo
%cd /content
!rm -rf tp_mcts
!git clone https://github.com/eliezerRevach/tp_mcts.git
%cd /content/tp_mcts

/content
Cloning into 'tp_mcts'...
remote: Enumerating objects: 2151, done.[K
remote: Counting objects: 100% (296/296), done.[K
remote: Compressing objects: 100% (146/146), done.[K
remote: Total 2151 (delta 173), reused 258 (delta 148), pack-reused 1855 (from 1)[K
Receiving objects: 100% (2151/2151), 15.03 MiB | 8.19 MiB/s, done.
Resolving deltas: 100% (1335/1335), done.
Updating files: 100% (127/127), done.
/content/tp_mcts


In [9]:
%%bash
set -e
cd /content/tp_mcts

echo "Repo commit:"
git rev-parse --short HEAD
git log -1 --oneline

echo
python - <<'PY'
import unified_planning
import unified_planning.parser as p

print("unified_planning package:", unified_planning.__file__)
print("parser file:", p.__file__)
with open(p.__file__, "r") as f:
    text = f.read()
print("--reward_mode present:", "reward_mode" in text)
PY

Repo commit:
ea61dfe
ea61dfe Adds thesis context and reward mode comparison

unified_planning package: /content/tp_mcts/unified_planning/__init__.py
parser file: /content/tp_mcts/unified_planning/parser.py
--reward_mode present: True


In [10]:
# Install dependencies
!pip -q install dill numpy

In [11]:
# Global run config (edit here to affect all runs)
CONFIG = {
    "DOMAIN": "simple",
    "GARBAGE_AMOUNT": 3,
    "DEADLINE": 20,
    "RUNS": 40,
    "SEARCH_TIME": 1,
    "SELECTION_TYPE": "rootInterval",
    "SEED": 123,
}

config_path = "/content/tp_mcts/run_config.env"
with open(config_path, "w", encoding="utf-8") as f:
    for key, value in CONFIG.items():
        f.write(f"{key}={value}\n")

print("Wrote", config_path)
print(CONFIG)

Wrote /content/tp_mcts/run_config.env
{'DOMAIN': 'simple', 'GARBAGE_AMOUNT': 3, 'DEADLINE': 20, 'RUNS': 40, 'SEARCH_TIME': 1, 'SELECTION_TYPE': 'rootInterval'}


In [12]:
%%bash
# Small/easier domain for quick runs with higher success.
# Old reward mode (terminal only).
set -e
source /content/tp_mcts/run_config.env
MODE=terminal

echo "=== reward_mode=$MODE ==="
python -u unified_planning/run_domain.py --domain "$DOMAIN" --garbage_amount "$GARBAGE_AMOUNT" --deadline "$DEADLINE" --runs "$RUNS" --search_time "$SEARCH_TIME" --selection_type "$SELECTION_TYPE" --reward_mode "$MODE" --seed "$SEED" 2>&1 | \
  tee "run_${MODE}.log" | awk "/^Started round/ {count++; print \"Progress:\", count \"/${RUNS}\"}"

echo "--- summary ($MODE) ---"
grep -E "Completed|Amount of success|Average success time|STD success time" "run_${MODE}.log" || true
echo "--- last 50 lines ($MODE) ---"
tail -n 50 "run_${MODE}.log" || true

=== reward_mode=terminal ===
Progress: 1/40
Progress: 2/40
Progress: 3/40
Progress: 4/40
Progress: 5/40
Progress: 6/40
Progress: 7/40
Progress: 8/40
Progress: 9/40
Progress: 10/40
Progress: 11/40
Progress: 12/40
Progress: 13/40
Progress: 14/40
Progress: 15/40
Progress: 16/40
Progress: 17/40
Progress: 18/40
Progress: 19/40
Progress: 20/40
Progress: 21/40
Progress: 22/40
Progress: 23/40
Progress: 24/40
Progress: 25/40
Progress: 26/40
Progress: 27/40
Progress: 28/40
Progress: 29/40
Progress: 30/40
Progress: 31/40
Progress: 32/40
Progress: 33/40
Progress: 34/40
Progress: 35/40
Progress: 36/40
Progress: 37/40
Progress: 38/40
Progress: 39/40
Progress: 40/40
--- summary (terminal) ---
Completed = 40
Amount of success = 40
Average success time = 12
STD success time = 0.0
--- last 50 lines (terminal) ---
The time of the plan so far: 4
started step 2
Current state is state: got(0) ; 
The chosen action is start_action_1
The time of the plan so far: 4
started step 3
Current state is state: got(0) 

In [13]:
%%bash
# New reward mode (deadline-aware).
set -e
source /content/tp_mcts/run_config.env
MODE=deadline

echo "=== reward_mode=$MODE ==="
python -u unified_planning/run_domain.py --domain "$DOMAIN" --garbage_amount "$GARBAGE_AMOUNT" --deadline "$DEADLINE" --runs "$RUNS" --search_time "$SEARCH_TIME" --selection_type "$SELECTION_TYPE" --reward_mode "$MODE" --seed "$SEED" 2>&1 | \
  tee "run_${MODE}.log" | awk "/^Started round/ {count++; print \"Progress:\", count \"/${RUNS}\"}"

echo "--- summary ($MODE) ---"
grep -E "Completed|Amount of success|Average success time|STD success time" "run_${MODE}.log" || true
echo "--- last 50 lines ($MODE) ---"
tail -n 50 "run_${MODE}.log" || true

=== reward_mode=deadline ===
Progress: 1/40
Progress: 2/40
Progress: 3/40
Progress: 4/40
Progress: 5/40
Progress: 6/40
Progress: 7/40
Progress: 8/40
Progress: 9/40
Progress: 10/40
Progress: 11/40
Progress: 12/40
Progress: 13/40
Progress: 14/40
Progress: 15/40
Progress: 16/40
Progress: 17/40
Progress: 18/40
Progress: 19/40
Progress: 20/40
Progress: 21/40
Progress: 22/40
Progress: 23/40
Progress: 24/40
Progress: 25/40
Progress: 26/40
Progress: 27/40
Progress: 28/40
Progress: 29/40
Progress: 30/40
Progress: 31/40
Progress: 32/40
Progress: 33/40
Progress: 34/40
Progress: 35/40
Progress: 36/40
Progress: 37/40
Progress: 38/40
Progress: 39/40
Progress: 40/40
--- summary (deadline) ---
Completed = 40
Amount of success = 40
Average success time = 12
STD success time = 0.0
--- last 50 lines (deadline) ---
The time of the plan so far: 4
started step 2
Current state is state: got(0) ; 
The chosen action is start_action_1
The time of the plan so far: 4
started step 3
Current state is state: got(0) 

In [14]:
%%bash
# After run finishes, show summary lines
for MODE in terminal deadline; do
  echo "=== reward_mode=$MODE ==="
  grep -E "Completed|Amount of success|Average success time|STD success time" "run_${MODE}.log" || true
done

=== reward_mode=terminal ===
Completed = 40
Amount of success = 40
Average success time = 12
STD success time = 0.0
=== reward_mode=deadline ===
Completed = 40
Amount of success = 40
Average success time = 12
STD success time = 0.0


In [15]:
import sys, platform, os
print("executable:", sys.executable)
print("platform:", platform.platform())
print("cwd:", os.getcwd())
print("has /content:", os.path.exists("/content"))

executable: /usr/bin/python3
platform: Linux-6.6.105+-x86_64-with-glibc2.35
cwd: /content/tp_mcts
has /content: True


In [16]:
!lscpu


Architecture:                x86_64
  CPU op-mode(s):            32-bit, 64-bit
  Address sizes:             46 bits physical, 48 bits virtual
  Byte Order:                Little Endian
CPU(s):                      2
  On-line CPU(s) list:       0,1
Vendor ID:                   GenuineIntel
  Model name:                Intel(R) Xeon(R) CPU @ 2.20GHz
    CPU family:              6
    Model:                   79
    Thread(s) per core:      2
    Core(s) per socket:      1
    Socket(s):               1
    Stepping:                0
    BogoMIPS:                4399.99
    Flags:                   fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pg
                             e mca cmov pat pse36 clflush mmx fxsr sse sse2 ss h
                             t syscall nx pdpe1gb rdtscp lm constant_tsc rep_goo
                             d nopl xtopology nonstop_tsc cpuid tsc_known_freq p
                             ni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2ap
                   