In [1]:
from pathlib import Path
import duckdb
import pandas as pd
from datetime import datetime

In [2]:
# 1) Ruta del proyecto y DB
repo_root = Path(r"C:\Users\serrios\01_prueba_analitico_4")
db_path = repo_root / "op_cobro" / "database" / "analytics.duckdb"

# 2) Carpeta de salida para CSV (la que pediste)
out_dir = repo_root / "op_cobro" / "notebooks" / "outputs"
out_dir.mkdir(parents=True, exist_ok=True)  # crea si no existe

print("DB:", db_path)
print("Outputs:", out_dir)
print("DB exists:", db_path.exists())

DB: C:\Users\serrios\01_prueba_analitico_4\op_cobro\database\analytics.duckdb
Outputs: C:\Users\serrios\01_prueba_analitico_4\op_cobro\notebooks\outputs
DB exists: True


In [3]:
con = duckdb.connect(str(db_path), read_only=True)

# Verifica que la tabla existe
tables = con.execute("SHOW TABLES").df()
tables.head(20)

Unnamed: 0,name


In [5]:
con.execute("""
    SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_schema = 'model_results'
""").df()

Unnamed: 0,table_schema,table_name
0,model_results,scores_best
1,model_results,top_scores


In [6]:
df_scores = con.execute("SELECT * FROM model_results.scores_best").df()
df_scores.shape, df_scores.head()

((63257, 6),
            num_doc              obl17 split_group     score  \
 0  100028088711497  17806128775334800        TEST  0.504483   
 1  100028088711497  19052883624618300        TEST  0.504483   
 2  100028088711497  26826204206697800        TEST  0.504483   
 3  100028088711497  31616528257602900        TEST  0.504483   
 4  100028088711497  38822113357418200        TEST  0.504483   
 
                              run_id   auc_oot  
 0  2302c0aeafb94666854593add7e69ae7  0.772079  
 1  2302c0aeafb94666854593add7e69ae7  0.772079  
 2  2302c0aeafb94666854593add7e69ae7  0.772079  
 3  2302c0aeafb94666854593add7e69ae7  0.772079  
 4  2302c0aeafb94666854593add7e69ae7  0.772079  )

In [7]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_path = out_dir / f"scores_best_{timestamp}.csv"

df_scores.to_csv(csv_path, index=False, encoding="utf-8")
print("✅ CSV generado en:", csv_path)
print("Filas:", len(df_scores))

✅ CSV generado en: C:\Users\serrios\01_prueba_analitico_4\op_cobro\notebooks\outputs\scores_best_20260223_095649.csv
Filas: 63257


In [10]:
con.close()

<h3> Creación tabla final</h3>

In [15]:
from pathlib import Path
import duckdb

repo_root = Path(r"C:\Users\serrios\01_prueba_analitico_4")
db_path = repo_root / "op_cobro" / "database" / "analytics.duckdb"

con = duckdb.connect(str(db_path))

sql = """
CREATE SCHEMA IF NOT EXISTS model_results;

CREATE OR REPLACE TABLE model_results.decision_final_v1 AS
SELECT
  num_doc,
  obl17,
  split_group,
  score,
  run_id,
  auc_oot,
  CASE WHEN score = 1.0 THEN 0 ELSE 1 END AS gestion_cobro,
  CASE WHEN score = 1.0 THEN 'NO_ASIGNAR_GESTION' ELSE 'ASIGNAR_GESTION' END AS decision_label,
  'v1_score_eq_1' AS policy_version,
  1.0 AS threshold_used,
  CURRENT_TIMESTAMP AS created_at
FROM model_results.scores_best;
"""
con.execute(sql)

# Validación rápida
res = con.execute("""
SELECT decision_label, COUNT(*) AS n
FROM model_results.decision_final_v1
GROUP BY decision_label
""").df()

con.close()
res

Unnamed: 0,decision_label,n
0,ASIGNAR_GESTION,62434
1,NO_ASIGNAR_GESTION,823


In [16]:
from pathlib import Path
import duckdb
import pandas as pd
from datetime import datetime

# Rutas
repo_root = Path(r"C:\Users\serrios\01_prueba_analitico_4")
db_path = repo_root / "op_cobro" / "database" / "analytics.duckdb"

# Carpeta de salida (la creamos si no existe)
out_dir = repo_root / "op_cobro" / "notebooks" / "outputs"
out_dir.mkdir(parents=True, exist_ok=True)

# Conexión
con = duckdb.connect(str(db_path), read_only=True)

# Leer tabla final
df_final = con.execute("SELECT * FROM model_results.decision_final_v1").df()

print("✅ Tabla cargada:", df_final.shape)
df_final.head()

✅ Tabla cargada: (63257, 11)


Unnamed: 0,num_doc,obl17,split_group,score,run_id,auc_oot,gestion_cobro,decision_label,policy_version,threshold_used,created_at
0,100028088711497,17806128775334800,TEST,0.504483,2302c0aeafb94666854593add7e69ae7,0.772079,1,ASIGNAR_GESTION,v1_score_eq_1,1.0,2026-02-23 10:09:39.406000-05:00
1,100028088711497,19052883624618300,TEST,0.504483,2302c0aeafb94666854593add7e69ae7,0.772079,1,ASIGNAR_GESTION,v1_score_eq_1,1.0,2026-02-23 10:09:39.406000-05:00
2,100028088711497,26826204206697800,TEST,0.504483,2302c0aeafb94666854593add7e69ae7,0.772079,1,ASIGNAR_GESTION,v1_score_eq_1,1.0,2026-02-23 10:09:39.406000-05:00
3,100028088711497,31616528257602900,TEST,0.504483,2302c0aeafb94666854593add7e69ae7,0.772079,1,ASIGNAR_GESTION,v1_score_eq_1,1.0,2026-02-23 10:09:39.406000-05:00
4,100028088711497,38822113357418200,TEST,0.504483,2302c0aeafb94666854593add7e69ae7,0.772079,1,ASIGNAR_GESTION,v1_score_eq_1,1.0,2026-02-23 10:09:39.406000-05:00


In [17]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_path = out_dir / f"decision_final_v1_{timestamp}.csv"

df_final.to_csv(csv_path, index=False, encoding="utf-8")
print("✅ CSV generado en:", csv_path)
print("Filas exportadas:", len(df_final))

✅ CSV generado en: C:\Users\serrios\01_prueba_analitico_4\op_cobro\notebooks\outputs\decision_final_v1_20260223_101244.csv
Filas exportadas: 63257


In [None]:
con.close()
print("Conexión cerrada ✅")

Conexión cerrada ✅
