# Debug de Inferência (step-by-step)
Notebook para validar visualmente a classe de inferência usando `data/inference.parquet` com 3 usuários.

## 1) Carregar dependências e configurar ambiente
Importe bibliotecas e ajuste o caminho do projeto.

In [1]:
import os
import sys
from pathlib import Path

import pandas as pd
import numpy as np
import torch

project_root = Path.cwd().parents[0]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from main.config import config
from core.model import Model
from core.inference import InferenceEngine


## 2) Ler `inference.parquet` em um DataFrame
Carregue o arquivo e inspecione o schema.

In [2]:
import pyarrow.parquet as pq

data_path = project_root / "data" / "inference.parquet"
assert data_path.exists(), f"Arquivo não encontrado: {data_path}"

# Ler com pyarrow ignorando metadata pandas
table = pq.read_table(data_path)

# Converter para pandas sem usar metadata
df_raw = table.to_pandas(ignore_metadata=True, strings_to_categorical=False)

# Converter colunas Decimal para float
for col in df_raw.select_dtypes(include=['object']).columns:
    try:
        df_raw[col] = pd.to_numeric(df_raw[col], errors='ignore')
    except:
        pass

print(df_raw.shape)
df_raw.head(5)


(345, 99)


  df_raw[col] = pd.to_numeric(df_raw[col], errors='ignore')


Unnamed: 0,usuarioId,locacaoId,locacaoCicloId,situacao_locacao,parcelaId,parcelaTipo,parcelaTipoId,usuarioMovimentoCaucaoId,veiculoEventoMultaId,codigoMulta,...,regiaoId,paisId,pacoteId,pacoteNome,pacoteTipoId,recorrencia_pagamento,pacoteDuracaoTipoId,pacoteDuracao,moeda,atualizacao_dt
0,4976086,5353690,496288,60,89452538,Aluguel Minha Sport,7,0,0.0,-,...,2,1,4004,Conquiste Sport ESD 0km,6,mensal,2,36,Real - Brasil,2026-01-28 13:47:58.931300+00:00
1,2013977,2177516,220167,60,69483755,Aluguel,3,0,0.0,-,...,2,1,2026,Sport - Minha Mottu 0Km,2,semanal,1,728,Real - Brasil,2026-01-28 13:47:58.931300+00:00
2,2013977,2177516,220167,60,69483751,Aluguel,3,0,0.0,-,...,2,1,2026,Sport - Minha Mottu 0Km,2,semanal,1,728,Real - Brasil,2026-01-28 13:47:58.931300+00:00
3,4976086,5353690,496288,60,89452531,Aluguel Minha Sport,7,0,0.0,-,...,2,1,4004,Conquiste Sport ESD 0km,6,mensal,2,36,Real - Brasil,2026-01-28 13:47:58.931300+00:00
4,4976086,5353690,496288,60,89452518,Aluguel Minha Sport,7,0,0.0,-,...,2,1,4004,Conquiste Sport ESD 0km,6,mensal,2,36,Real - Brasil,2026-01-28 13:47:58.931300+00:00


In [3]:
df_raw.dtypes

usuarioId                              int64
locacaoId                              int64
locacaoCicloId                         int64
situacao_locacao                       int64
parcelaId                              int64
                                ...         
recorrencia_pagamento                 object
pacoteDuracaoTipoId                    int64
pacoteDuracao                          int64
moeda                                 object
atualizacao_dt           datetime64[us, UTC]
Length: 99, dtype: object

## 3) Exibir amostra dos 3 usuários para validação visual
Mostre alguns campos-chave por usuário.

In [4]:
user_col = config.columns.user_id_col
creation_col = config.columns.creation_date_col
payment_col = config.columns.payment_date_col
delay_col = config.columns.delay_col

users = df_raw[user_col].dropna().unique().tolist()
print("Usuarios:", users)

cols_preview = [user_col, creation_col, config.columns.due_date_col, payment_col, delay_col]
cols_preview = [c for c in cols_preview if c in df_raw.columns]

(df_raw
 .sort_values([user_col, creation_col])
 .loc[:, cols_preview]
 .groupby(user_col)
 .head(10)
)

Usuarios: [4976086, 2013977, 1000667]


Unnamed: 0,usuarioId,criacaoData,vencimentoData,pagamentoData,Dias_atraso
29,1000667,2023-06-26 18:38:05.637000+00:00,2023-06-27,2023-06-26 18:43:50+00:00,-1.0
30,1000667,2023-06-26 18:38:05.637000+00:00,2023-06-27,2023-06-26 18:43:50+00:00,-1.0
31,1000667,2023-06-26 18:38:05.637000+00:00,2023-06-27,2023-06-26 18:43:50+00:00,-1.0
5,1000667,2023-06-27 16:53:51.277000+00:00,2023-09-07,2023-09-07 22:42:03+00:00,0.0
21,1000667,2023-06-27 16:53:51.277000+00:00,2023-08-31,2023-08-31 20:22:53+00:00,0.0
23,1000667,2023-06-27 16:53:51.277000+00:00,2023-08-17,2023-08-18 17:34:07+00:00,1.0
24,1000667,2023-06-27 16:53:51.277000+00:00,2023-08-17,2023-08-18 17:34:07+00:00,1.0
49,1000667,2023-06-27 16:53:51.277000+00:00,2023-08-24,2023-08-24 22:49:01+00:00,0.0
58,1000667,2023-06-27 16:53:51.277000+00:00,2023-08-10,2023-08-11 00:12:03+00:00,1.0
59,1000667,2023-06-27 16:53:51.277000+00:00,2023-08-10,2023-08-11 00:12:03+00:00,1.0


## 4) Instanciar a classe e executar passo a passo
Carregar metadata/estado do modelo, preparar dados e montar sequências.

In [5]:
runs_dir = project_root / config.paths.runs_dir
meta_files = sorted(runs_dir.glob("**/model_metadata.pt"), key=lambda p: p.stat().st_mtime, reverse=True)
assert meta_files, "Nenhum model_metadata.pt encontrado em runs/"

metadata_path = meta_files[0]
run_dir = metadata_path.parent
state_dict_path = run_dir / "model_state_dict.pt"

metadata = torch.load(metadata_path, map_location="cpu", weights_only=False)

model = Model(
    embedding_dimensions=metadata["embedding_dimensions"],
    num_continuous=metadata["num_continuous"],
    target_scaler=metadata.get("target_scaler"),
    feature_scaler=metadata.get("feature_scaler"),
    config=metadata["config"],
)

if state_dict_path.exists():
    model.load_state_dict(torch.load(state_dict_path, map_location="cpu"))
else:
    print("Aviso: model_state_dict.pt não encontrado. Usando pesos atuais do modelo recém-criado.")

engine = InferenceEngine(
    model=model,
    target_scaler=metadata.get("target_scaler"),
    feature_scalers=metadata.get("feature_scaler"),
    config=metadata["config"],
    categorical_maps=metadata.get("categorical_maps", {}),
)

prepared = engine._prepare_dataframe(df_raw)
sequences, meta_rows = engine._build_sequences(prepared)

len(sequences), [len(s) for s in sequences]


[22:44:48] [Temporal Features] Extracted 13 features 

[22:44:48] [History Features] Extracted 5 features 

[22:44:48] [Sequence Features] Extracted 12 features 

[22:44:48] [Value Features] Extracted 5 features 



  .apply(process_user_history)


(2, [50, 3])

## 5) Visualizar saídas intermediárias no DataFrame
Inspecione as sequências montadas e as colunas geradas.

In [9]:
if sequences:
    for i, seq in enumerate(sequences):
        display(seq[[user_col, creation_col, config.columns.due_date_col, payment_col, delay_col]])
else:
    print("Nenhuma sequência encontrada.")


Unnamed: 0,usuarioId,criacaoData,vencimentoData,pagamentoData,Dias_atraso
93,2013977,2024-08-06 12:33:36.533000+00:00,2025-11-26 00:00:00+00:00,2025-11-26 12:48:56+00:00,0.0
83,2013977,2024-08-06 12:33:36.533000+00:00,2025-09-17 00:00:00+00:00,2025-09-17 22:46:49+00:00,0.0
82,2013977,2024-08-06 12:33:36.533000+00:00,2025-09-10 00:00:00+00:00,2025-09-10 19:25:21+00:00,0.0
76,2013977,2024-08-06 12:33:36.533000+00:00,2025-07-30 00:00:00+00:00,2025-07-30 23:46:23+00:00,0.0
80,2013977,2024-08-06 12:33:36.533000+00:00,2025-08-27 00:00:00+00:00,2025-08-27 19:04:49+00:00,0.0
55,2013977,2024-08-06 12:33:36.533000+00:00,2025-03-05 00:00:00+00:00,2025-03-05 21:38:30+00:00,0.0
54,2013977,2024-08-06 12:33:36.533000+00:00,2025-02-26 00:00:00+00:00,2025-02-27 00:28:49+00:00,1.0
53,2013977,2024-08-06 12:33:36.533000+00:00,2025-02-19 00:00:00+00:00,2025-02-18 15:43:48+00:00,-1.0
52,2013977,2024-08-06 12:33:36.533000+00:00,2025-02-12 00:00:00+00:00,2025-02-13 22:27:54+00:00,1.0
51,2013977,2024-08-06 12:33:36.533000+00:00,2025-02-05 00:00:00+00:00,2025-02-03 12:50:20+00:00,-1.0


Unnamed: 0,usuarioId,criacaoData,vencimentoData,pagamentoData,Dias_atraso
129,4976086,2025-12-09 16:21:20.700000+00:00,2025-12-09 00:00:00+00:00,2025-12-09 16:21:56+00:00,0.0
130,4976086,2025-12-09 16:21:20.700000+00:00,2026-01-05 00:00:00+00:00,2025-12-20 20:10:09+00:00,-1.0
131,4976086,2025-12-09 16:21:20.700000+00:00,2026-02-05 00:00:00+00:00,NaT,


In [7]:
preds_df = engine.predict(df_raw)
preds_df

[22:44:49] [Temporal Features] Extracted 13 features 

[22:44:49] [History Features] Extracted 5 features 

[22:44:49] [Sequence Features] Extracted 12 features 

[22:44:49] [Value Features] Extracted 5 features 



  .apply(process_user_history)


Unnamed: 0,usuarioId,locacaoId,ordem_parcela,criacaoData,vencimentoData,pagamentoData,predicted_days_to_payment,sequence_length
0,2013977,2177516,26,2024-08-06 12:33:36.533000+00:00,2026-02-04 00:00:00+00:00,NaT,6.751598,50
1,4976086,5353690,36,2025-12-09 16:21:20.700000+00:00,2026-02-05 00:00:00+00:00,NaT,0.0,3


## 6) Salvar/Exportar resultados para conferência
Salve os resultados finais para inspeção externa.

In [8]:
output_path = project_root / "data" / "inference_results.csv"
preds_df.to_csv(output_path, index=False)
print("Salvo em:", output_path)


Salvo em: c:\Users\Victo\OneDrive\Área de Trabalho\rental-churn - 2\rental-churn\data\inference_results.csv
