In [None]:
import pandas as pd

pd.options.display.max_columns = 100
pd.options.display.min_rows = 10
pd.options.display.precision = 3
pd.options.display.float_format = "{:.3f}".format

import numpy as np

np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
np.set_printoptions(formatter={"float_kind": "{:.3f}".format})

import sys
from pathlib import Path

# from psych import _fast
# import psych.estimation as est

ROOT_PATH = (
    Path("__file__").resolve().parents[1]
)  # 0 for .py or unsaved notebooks and 1 for .ipynb
sys.path.append(ROOT_PATH.as_posix())

DATA_PATH = ROOT_PATH / "data"
RESULTS_PATH = ROOT_PATH / "results"
MODEL_PATH = ROOT_PATH / "analysis" / "models"

In [None]:
df_resp = pd.read_parquet(DATA_PATH / "COTS_2025_data.parquet")

In [None]:
df_resp["max_score"] = df_resp.groupby("item_id")["score"].transform("max")

In [None]:
df_nr = est.item_calibrations(
    df_resp,
    label_mapper={
        "itemset_id": "setID",
        "person_id": "regID",
        "score": "score",
        "item_id": "itemID",
        "op_theta": "theta",
    },
)

In [None]:
import cmdstanpy
import pandas as pd
import numpy as np


# 1. Ensure IDs are dense integers starting from 1 for Stan
df_resp["stan_pid"] = pd.factorize(df_resp["person_id"])[0] + 1
df_resp["stan_iid"] = pd.factorize(df_resp["item_id"])[0] + 1

person_id = (df_resp["person_id"] + 1).to_numpy()
item_id = ((df_resp["item_id"] + 1).to_numpy(),)
score = ((df_resp["score"] + 1).to_numpy(),)
n_items = len(np.unique(item_id))
n_persons = len(np.unique(person_id))
n_obs = len(df_resp)

max_scores = (df_resp.groupby("item_id")["score"].max() + 1).to_numpy()
overall_max_score = max_scores.max()

theta = df_resp.groupby(person_id)["op_theta"].first()
# --- Create Stan Data Dictionary ---
stan_data = {
    "I": n_items,
    "P": n_persons,
    "N": n_obs,
    "K_max": overall_max_score,
    "pp": (df_resp["person_id"] + 1).to_numpy(),
    "ii": (df_resp["item_id"] + 1).to_numpy(),
    "resp": (df_resp["score"] + 1).to_numpy(),
    "K": max_scores,
    "theta": theta,
}

# --- Compile and Run Model ---
model = cmdstanpy.CmdStanModel(
    stan_file=R"C:\Users\wmuntean\.research\unified-LS\analysis\models\pcm_fixed_theta.stan"
)

fit = model.sample(
    data=stan_data,
    chains=4,
    parallel_chains=4,
    iter_warmup=10,
    iter_sampling=1000,
    show_progress=True,
)


# You can now inspect the estimated delta parameters
print(fit.summary(vars=["delta"]))

In [None]:
os.environ["PATH"].split(";")

In [None]:
import os
import cmdstanpy

# Force environment to use RTools
os.environ["PATH"] = r"C:\rtools44\usr\bin;C:\rtools44\x86_64-w64-mingw32.static.posix\bin;" + os.environ["PATH"]
os.environ["MAKE"] = "make"  # Use RTools make, not mingw32-make

# Remove any conda bin paths that might interfere
path_parts = os.environ["PATH"].split(os.pathsep)
filtered_paths = [p for p in path_parts if "miniforge3" not in p or "Scripts" in p]
filtered_paths = [p for p in filtered_paths if "Strawberry" not in p]
os.environ["PATH"] = os.pathsep.join(filtered_paths)

# Try to compile
# model = cmdstanpy.CmdStanModel(stan_file=str(MODEL_PATH / "pcm_fixed_theta.stan"))

In [None]:
# --------------------------------------------------------------
# 0️⃣  *** RUN THIS IN A FRESH PYTHON PROCESS ***
# --------------------------------------------------------------

import os
import sys
import shutil
from pathlib import Path

# ------------------------------------------------------------------
# 1️⃣  Tell the script where your R‑Tools installation lives
# ------------------------------------------------------------------
RTOOLS_ROOT = Path(r"C:\rtools44")      # <-- change if you use rtools42/43
RTOOLS_USR   = RTOOLS_ROOT / "usr" / "bin"
RTOOLS_MINGW = RTOOLS_ROOT / "mingw64" / "bin"
RTOOLS_SH    = RTOOLS_ROOT / "bin"      # contains sh.exe, bash.exe, etc.

# ------------------------------------------------------------------
# 2️⃣  Build a **clean** PATH
# ------------------------------------------------------------------
# Windows system folders that must stay on the path for Python itself
SYSTEM_PATHS = [
    str(Path(os.getenv("SystemRoot", r"C:\Windows")) / "system32"),
    str(Path(os.getenv("SystemRoot", r"C:\Windows")) / "System32" / "Wbem"),
    str(Path(os.getenv("SystemRoot", r"C:\Windows")) / "System32" / "WindowsPowerShell" / "v1.0"),
]

# Order matters – the three R‑Tools dirs *must be first*
clean_path_parts = [
    str(RTOOLS_USR),
    str(RTOOLS_MINGW),
    str(RTOOLS_SH),
]

clean_path_parts.extend(SYSTEM_PATHS)    # keep the essential system dirs
os.environ["PATH"] = os.pathsep.join(clean_path_parts)

# ------------------------------------------------------------------
# 3️⃣  Wipe any Conda / Strawberry / MinGW environment variables
# ------------------------------------------------------------------
# Anything that points at another compiler, linker, or Unix‑tool
contaminants = [
    "CC", "CXX", "CPP", "LD", "AR", "RANLIB",
    "CFLAGS", "CXXFLAGS", "LDFLAGS", "CPPFLAGS",
    "MAKEFLAGS", "MAKELEVEL", "MFLAGS",
    "CONDA_PREFIX", "CONDA_DEFAULT_ENV", "CONDA_EXE",
    "CONDA_SHLVL", "CONDA_PROMPT_MODIFIER",
    "CONDA_BACKUP_.*",        # any backup vars Conda may have created
    "PKG_CONFIG_PATH", "PKG_CONFIG_LIBDIR",
    "INCLUDE", "LIB", "LIBRARY_PATH", "CPATH",
    "STRABERRY", "STRAWBERRY",  # just in case the word appears
]

for var in list(os.environ):
    for bad in contaminants:
        if var == bad or (bad.endswith(".*") and var.startswith(bad[:-2])):
            del os.environ[var]

# ------------------------------------------------------------------
# 4️⃣  Explicitly tell CmdStan which make and shell to use
# ------------------------------------------------------------------
os.environ["MAKE"]   = "make"                     # R‑Tools ships a `make.exe`
os.environ["SHELL"]  = str(RTOOLS_SH / "sh.exe")  # POSIX shell that knows cut/expr
# Optional but nice to have:
os.environ["RTOOLS_ROOT"] = str(RTOOLS_ROOT)
os.environ["RTOOLS_HOME"] = str(RTOOLS_ROOT)

# ------------------------------------------------------------------
# 5️⃣  Quick sanity‑check – see what the current process can find
# ------------------------------------------------------------------
def which(cmd):
    """Return full path of the executable that `shutil.which` finds."""
    return shutil.which(cmd)

print("\n=== SANITY CHECK ===")
print("make   :", which("make"))
print("g++    :", which("x86_64-w64-mingw32-g++.exe"))
print("gcc    :", which("x86_64-w64-mingw32-gcc.exe"))
print("ld     :", which("ld.exe"))
print("sh     :", which("sh"))
print("cut    :", which("cut"))
print("expr   :", which("expr"))
print("PATH   :", os.environ["PATH"][:200], "...")   # first part only
print("-------------------\n")

In [None]:
cmdstanpy.install_cmdstan()