In [35]:
import pandas as pd
import numpy as np
from datetime import datetime
import pandas as pd
import numpy as np
import arviz as az
import pickle
from pathlib import Path
import cmdstanpy
cmdstanpy.set_cmdstan_path("/home/enzou/cmdstan")

In [36]:
train_df = pd.read_csv("data/quarters_df/quarters_train_df.csv", parse_dates=['data_partida'])

In [38]:
# ==========================================================
# Converter DF de 'Wide' para 'Long'
# ==========================================================

# Transformar A
df_a = train_df.rename(columns={
    'team_hash_a': 'team',
    'team_hash_b': 'opp',
    'fg2_att_a': 'fga2', 'fg2_made_a': 'fgm2',
    'fg3_att_a': 'fga3', 'fg3_made_a': 'fgm3',
    'ft_att_a': 'fta',   'ft_made_a': 'ftm',
    'pace_a': 'poss',
    'pts_a': 'pts'
})
df_a["side"] = "A"

# Transformar B
df_b = train_df.rename(columns={
    'team_hash_b': 'team',
    'team_hash_a': 'opp',
    'fg2_att_b': 'fga2', 'fg2_made_b': 'fgm2',
    'fg3_att_b': 'fga3', 'fg3_made_b': 'fgm3',
    'ft_att_b': 'fta',   'ft_made_b': 'ftm',
    'pace_b': 'poss',
    'pts_b': 'pts'
})
df_b["side"] = "B"

# Concatenar
df_long = pd.concat([df_a, df_b], ignore_index=True)

# Manter só colunas úteis
df_long = df_long[[
    "hash_partida", "periodo", "temporada", "team", "opp",
    "fga2", "fgm2", "fga3", "fgm3", "fta", "ftm", "poss", "pts"
]]

# ==========================================================
# Lidar com OTs
# ==========================================================
# Mapear períodos (1,2,3,4, OT1, OT2, …) para inteiros
periodos = sorted(df_long["periodo"].unique(), key=lambda x: (str(x).startswith("OT"), x))
period_index = {p: i+1 for i, p in enumerate(periodos)}

df_long["period"] = df_long["periodo"].map(period_index).astype(int)

In [41]:

EPS = 1e-6

# Índices de times
teams = sorted(df_long["team"].unique())
team_index = {t: i + 1 for i, t in enumerate(teams)}

df_long["team_id"] = df_long["team"].map(team_index)
df_long["opp_id"] = df_long["opp"].map(team_index)

stan_data = {
    "N": len(df_long),
    "T": len(teams),
    "Q": df_long["period"].nunique(),
    "G": df_long["hash_partida"].nunique(),
    "team": df_long["team_id"].astype(int).to_list(),
    "opp": df_long["opp_id"].astype(int).to_list(),
    "period": df_long["period"].astype(int).to_list(),
    "game_id": df_long["hash_partida"].astype("category").cat.codes.add(1).to_list(),
    "poss": df_long["poss"].astype(float).to_list(),
    "eps": EPS,
    "y2a": df_long["fga2"].astype(int).to_list(),
    "y3a": df_long["fga3"].astype(int).to_list(),
    "yfta": df_long["fta"].astype(int).to_list(),
    "y2m": df_long["fgm2"].astype(int).to_list(),
    "y3m": df_long["fgm3"].astype(int).to_list(),
    "yftm": df_long["ftm"].astype(int).to_list(),
}

# Compilar e rodar Stan
model = cmdstanpy.CmdStanModel(stan_file="models/v1/mack_model_ldb.stan")
fit = model.sample(
    data=stan_data,
    iter_warmup=2000,
    iter_sampling=2000,
    chains=4,
    parallel_chains=4,
    seed=42,
    max_treedepth=12,
    adapt_delta=0.9,
    show_progress=True
)

# LOO-PSIS
idata = az.from_cmdstanpy(posterior=fit)
loo = az.loo(idata, pointwise=True)
print(loo)

# Salvar resultados
out_dir = Path("models/v1")
out_dir.mkdir(parents=True, exist_ok=True)

fit.save_csvfiles(str(out_dir / "draws"))

summary_df = fit.summary()
summary_df.to_parquet(out_dir / "summary.parquet")

with open(out_dir / "idata.pkl", "wb") as f:
    pickle.dump(idata, f)

with open(out_dir / "stan_data.pkl", "wb") as f:
    pickle.dump(stan_data, f)

with open(out_dir / "loo.txt", "w") as f:
    f.write(str(loo))

with open(out_dir / "loo.pkl", "wb") as f:
    pickle.dump(loo, f)

print("✅ Modelo salvo em models/v1 com draws, summary, stan_data e loo-psis.")

17:17:30 - cmdstanpy - INFO - compiling stan file /home/enzou/ldb/models/v1/mack_model_ldb.stan to exe file /home/enzou/ldb/models/v1/mack_model_ldb
17:18:50 - cmdstanpy - INFO - compiled model executable: /home/enzou/ldb/models/v1/mack_model_ldb
17:18:50 - cmdstanpy - INFO - CmdStan start processing
chain 1 |[33m          [0m| 00:00 Status
[A

chain 1 |[33m▏         [0m| 00:00 Status
[A

[A[A

[A[A
chain 1 |[33m▍         [0m| 06:58 Iteration:    1 / 4000 [  0%]  (Warmup)17:25:52 - cmdstanpy - ERROR - Chain [1] error: terminated by signal 2 Unknown error -2
17:25:52 - cmdstanpy - ERROR - Chain [3] error: terminated by signal 2 Unknown error -2
17:25:52 - cmdstanpy - ERROR - Chain [2] error: terminated by signal 2 Unknown error -2
17:25:52 - cmdstanpy - ERROR - Chain [4] error: terminated by signal 2 Unknown error -2


KeyboardInterrupt: 