In [1]:
from core.libs import pd, tqdm
from core.paths import resolve_inventory_paths
from CruisesProcessor.utils.metadata_extractor import extract_metadata_from_excel
from core.db import get_engine

engine = get_engine()

📄 Extrayendo metadatos...
💻 Conectado a la base de datos helloworldtree


In [3]:
# 1. Trae todos los registros con inventory_date = 'pending'
query = """
    SELECT contract_code, rel_path
    FROM masterdatabase.inventory_metrics
    WHERE inventory_date = 'pending'
"""
df_pending = pd.read_sql(query, engine)

if df_pending.empty:
    print("No hay registros pendientes.")
    exit(0)

print(df_pending.head(5))

  contract_code                                           rel_path
0        US0057  USA/2023_ForestInventory/4-Contractor Cruises/...
1        US0040  USA/2023_ForestInventory/4-Contractor Cruises/...
2        US0047  USA/2023_ForestInventory/4-Contractor Cruises/...
3        US0056  USA/2023_ForestInventory/4-Contractor Cruises/...
4        US0067  USA/2023_ForestInventory/4-Contractor Cruises/...


In [9]:
from sqlalchemy import text

# 2. Filtra rel_paths válidos (no None, no vacíos)
df_valid = df_pending[df_pending['rel_path'].notna() & (df_pending['rel_path'] != '')]
paths_absolutas = resolve_inventory_paths(df_valid['rel_path'].tolist())

errores = []
actualizados = 0
sin_fecha = 0

for idx, (contract_code, rel_path_abs) in tqdm(
        enumerate(zip(df_valid['contract_code'], paths_absolutas)), total=len(paths_absolutas)):
    try:
        meta = extract_metadata_from_excel(rel_path_abs)
        cruise_date = meta.get("cruise_date")
        if cruise_date and cruise_date != '' and cruise_date != "pending":
            # UPDATE con fecha real
            q = text("""
                UPDATE masterdatabase.inventory_metrics
                SET inventory_date = :cruise_date
                WHERE contract_code = :contract_code
            """)
            with engine.begin() as conn:
                conn.execute(q, {"cruise_date": cruise_date, "contract_code": contract_code})
            print(f"✅ Actualizado {contract_code}: {cruise_date}")
            actualizados += 1
        else:
            # UPDATE con "Excel without cruise date"
            q = text("""
                UPDATE masterdatabase.inventory_metrics
                SET inventory_date = :cruise_date
                WHERE contract_code = :contract_code
            """)
            with engine.begin() as conn:
                conn.execute(q, {"cruise_date": "Excel without cruise date", "contract_code": contract_code})
            print(f"⚠️ Sin fecha para {contract_code}: marcado como 'Excel without cruise date'")
            sin_fecha += 1
    except Exception as e:
        print(f"❌ Error en {contract_code} ({rel_path_abs}): {e}")
        errores.append((contract_code, str(e)))

print(f"\nListo. Total actualizados: {actualizados} / {len(df_valid)}")
print(f"Total marcados como 'Excel without cruise date': {sin_fecha}")
if errores:
    print("\nErrores:")
    for c, err in errores:
        print(f"{c}: {err}")


  ws_parser.bind_all()
  ws_parser.bind_all()
  1%|          | 1/104 [00:00<00:40,  2.53it/s]

✅ Actualizado US0057: 2024-04-11 00:00:00


  2%|▏         | 2/104 [00:00<00:44,  2.30it/s]

✅ Actualizado US0040: 2024-04-02 00:00:00


  3%|▎         | 3/104 [00:01<00:37,  2.71it/s]

✅ Actualizado US0047: 2024-04-03 00:00:00


  4%|▍         | 4/104 [00:01<00:45,  2.21it/s]

✅ Actualizado US0056: 2024-05-14 00:00:00


  5%|▍         | 5/104 [00:02<00:40,  2.46it/s]

✅ Actualizado US0067: 2024-04-11 00:00:00


  6%|▌         | 6/104 [00:02<00:36,  2.66it/s]

✅ Actualizado US0075: 2024-04-24 00:00:00


  7%|▋         | 7/104 [00:02<00:43,  2.22it/s]

✅ Actualizado US0086: 2024-05-17 00:00:00


  8%|▊         | 8/104 [00:03<00:38,  2.48it/s]

✅ Actualizado US0065: 2024-06-18 00:00:00


  9%|▊         | 9/104 [00:03<00:37,  2.57it/s]

✅ Actualizado US0063: 2024-04-02 00:00:00


 10%|▉         | 10/104 [00:04<00:40,  2.30it/s]

✅ Actualizado US0082: 2024-06-18 00:00:00


 11%|█         | 11/104 [00:04<00:38,  2.44it/s]

✅ Actualizado US0078: 2024-04-02 00:00:00


 12%|█▏        | 12/104 [00:34<14:36,  9.53s/it]

⚠️ Sin fecha para CR0029: marcado como 'Excel without cruise date'


 12%|█▎        | 13/104 [01:06<24:26, 16.11s/it]

⚠️ Sin fecha para CR0081: marcado como 'Excel without cruise date'


 13%|█▎        | 14/104 [01:37<31:14, 20.83s/it]

⚠️ Sin fecha para CR0070: marcado como 'Excel without cruise date'


 14%|█▍        | 15/104 [01:39<22:07, 14.91s/it]

✅ Actualizado CR0107: 01/16/2025


 15%|█▌        | 16/104 [01:39<15:31, 10.58s/it]

✅ Actualizado CR0100: 2025-02-03 00:00:00


 16%|█▋        | 17/104 [01:40<11:07,  7.67s/it]

✅ Actualizado CR0104: 01/28/2025


 17%|█▋        | 18/104 [01:41<07:57,  5.55s/it]

⚠️ Sin fecha para CR0092: marcado como 'Excel without cruise date'


 18%|█▊        | 19/104 [01:41<05:37,  3.97s/it]

✅ Actualizado US0147: 2025-01-28 00:00:00


 19%|█▉        | 20/104 [01:41<04:05,  2.93s/it]

✅ Actualizado US0151: 2025-01-24 00:00:00


 20%|██        | 21/104 [01:42<02:56,  2.13s/it]

✅ Actualizado US0120: 2025-02-07 00:00:00


 21%|██        | 22/104 [01:42<02:08,  1.57s/it]

✅ Actualizado US0123: 2025-02-11 00:00:00


 22%|██▏       | 23/104 [01:42<01:39,  1.23s/it]

✅ Actualizado US0127: 2025-03-04 00:00:00


 23%|██▎       | 24/104 [01:43<01:15,  1.06it/s]

✅ Actualizado US0030: 2025-03-06 00:00:00


 24%|██▍       | 25/104 [01:43<00:57,  1.37it/s]

✅ Actualizado US0055: 2025-02-05 00:00:00


 25%|██▌       | 26/104 [01:44<00:56,  1.39it/s]

✅ Actualizado CR0039: 2024-01-16 00:00:00


 26%|██▌       | 27/104 [01:44<00:50,  1.53it/s]

✅ Actualizado CR0046: 2024-01-15 00:00:00


 27%|██▋       | 28/104 [01:45<00:50,  1.49it/s]

✅ Actualizado CR0068: 2024-01-26 00:00:00


 28%|██▊       | 29/104 [01:45<00:40,  1.85it/s]

✅ Actualizado US0071: 2025-02-26 00:00:00


 29%|██▉       | 30/104 [01:45<00:33,  2.20it/s]

✅ Actualizado US0092: 2025-02-05 00:00:00


 30%|██▉       | 31/104 [01:46<00:29,  2.51it/s]

✅ Actualizado US0095: 2025-01-31 00:00:00


 31%|███       | 32/104 [01:46<00:30,  2.36it/s]

✅ Actualizado US0113: 2025-03-06 00:00:00


 32%|███▏      | 33/104 [01:46<00:25,  2.75it/s]

✅ Actualizado US0124: 2025-02-25 00:00:00


 33%|███▎      | 34/104 [01:47<00:23,  2.96it/s]

✅ Actualizado US0032: 2025-03-06 00:00:00


 34%|███▎      | 35/104 [01:47<00:25,  2.74it/s]

⚠️ Sin fecha para US0101: marcado como 'Excel without cruise date'


 35%|███▍      | 36/104 [01:47<00:23,  2.88it/s]

✅ Actualizado US0121: 2025-01-24 00:00:00


 36%|███▌      | 37/104 [01:48<00:22,  3.03it/s]

⚠️ Sin fecha para US0102: marcado como 'Excel without cruise date'


 37%|███▋      | 38/104 [01:48<00:19,  3.32it/s]

⚠️ Sin fecha para US0114: marcado como 'Excel without cruise date'


 38%|███▊      | 39/104 [01:49<00:29,  2.22it/s]

✅ Actualizado GT0040: 2025-03-12 00:00:00


 38%|███▊      | 40/104 [01:49<00:24,  2.58it/s]

✅ Actualizado US0126: 2025-01-31 00:00:00


 39%|███▉      | 41/104 [01:50<00:29,  2.10it/s]

✅ Actualizado CR0059: 2024-01-18 00:00:00


 40%|████      | 42/104 [01:50<00:29,  2.08it/s]

✅ Actualizado CR0053: 2024-01-16 00:00:00


 41%|████▏     | 43/104 [01:51<00:32,  1.89it/s]

✅ Actualizado CR0055: 2023-12-02 00:00:00


 42%|████▏     | 44/104 [01:51<00:31,  1.93it/s]

✅ Actualizado CR0060: 2024-02-22 00:00:00


 43%|████▎     | 45/104 [01:52<00:29,  2.03it/s]

✅ Actualizado US0062: 2025-03-11 00:00:00


 44%|████▍     | 46/104 [01:52<00:27,  2.13it/s]

✅ Actualizado CR0076: 2024-02-06 00:00:00


 45%|████▌     | 47/104 [01:53<00:29,  1.91it/s]

✅ Actualizado CR0074: 2024-02-01 00:00:00


 46%|████▌     | 48/104 [01:53<00:28,  1.97it/s]

✅ Actualizado CR0063: 2024-01-16 00:00:00


 47%|████▋     | 49/104 [01:54<00:35,  1.56it/s]

✅ Actualizado GT0031a: 2025-03-17 00:00:00


 48%|████▊     | 50/104 [01:55<00:36,  1.50it/s]

✅ Actualizado GT0036: 2025-01-21 00:00:00


 49%|████▉     | 51/104 [01:55<00:33,  1.59it/s]

✅ Actualizado GT0035: 2025-02-03 00:00:00


 50%|█████     | 52/104 [01:56<00:26,  1.95it/s]

✅ Actualizado US0104: 2025-02-04 00:00:00


 51%|█████     | 53/104 [01:56<00:25,  1.99it/s]

✅ Actualizado US0039: 2025-03-11 00:00:00


 52%|█████▏    | 54/104 [01:56<00:21,  2.37it/s]

⚠️ Sin fecha para US0103: marcado como 'Excel without cruise date'


 53%|█████▎    | 55/104 [01:57<00:18,  2.70it/s]

✅ Actualizado US0096: 2025-02-05 00:00:00


 54%|█████▍    | 56/104 [01:57<00:22,  2.12it/s]

✅ Actualizado GT0024b: 2025-02-17 00:00:00


 55%|█████▍    | 57/104 [01:58<00:19,  2.47it/s]

✅ Actualizado US0099: 2025-02-07 00:00:00


 56%|█████▌    | 58/104 [01:58<00:23,  1.98it/s]

✅ Actualizado GT0045: 2025-01-15 00:00:00


 57%|█████▋    | 59/104 [01:59<00:23,  1.89it/s]

✅ Actualizado GT0044: 2025-01-14 00:00:00


 58%|█████▊    | 60/104 [01:59<00:19,  2.23it/s]

✅ Actualizado US0109: 2025-01-28 00:00:00


 59%|█████▊    | 61/104 [02:00<00:20,  2.06it/s]

✅ Actualizado US0100: 2025-02-18 00:00:00


 60%|█████▉    | 62/104 [02:00<00:21,  1.99it/s]

✅ Actualizado GT0034: 2025-03-22 00:00:00


 61%|██████    | 63/104 [02:01<00:19,  2.10it/s]

✅ Actualizado US0164: 2025-03-05 00:00:00


 62%|██████▏   | 64/104 [02:01<00:22,  1.75it/s]

✅ Actualizado GT0038: 2025-02-18 00:00:00


 62%|██████▎   | 65/104 [02:02<00:24,  1.60it/s]

✅ Actualizado GT0030: 2025-01-29 00:00:00


 63%|██████▎   | 66/104 [02:02<00:19,  1.96it/s]

✅ Actualizado US0106: 2025-01-24 00:00:00


 64%|██████▍   | 67/104 [02:03<00:15,  2.33it/s]

✅ Actualizado US0146: 2025-03-06 00:00:00


 65%|██████▌   | 68/104 [02:03<00:13,  2.69it/s]

✅ Actualizado US0077: 2025-02-11 00:00:00


 66%|██████▋   | 69/104 [02:03<00:14,  2.38it/s]

✅ Actualizado US0115: 2025-02-03 00:00:00


 67%|██████▋   | 70/104 [02:04<00:12,  2.69it/s]

✅ Actualizado US0117: 2025-02-26 00:00:00


 68%|██████▊   | 71/104 [02:04<00:11,  2.93it/s]

✅ Actualizado US0091: 2025-01-29 00:00:00


 69%|██████▉   | 72/104 [02:04<00:12,  2.60it/s]

✅ Actualizado US0118: 2025-02-28 00:00:00


 70%|███████   | 73/104 [02:05<00:11,  2.77it/s]

✅ Actualizado US0097: 2025-03-12 00:00:00


 71%|███████   | 74/104 [02:05<00:12,  2.43it/s]

✅ Actualizado CR0051: 2024-01-11 00:00:00


 72%|███████▏  | 75/104 [02:06<00:15,  1.91it/s]

✅ Actualizado CR0047: 2024-02-19 00:00:00


 73%|███████▎  | 76/104 [02:07<00:16,  1.67it/s]

✅ Actualizado CR0035: 2024-02-09 00:00:00


 74%|███████▍  | 77/104 [02:07<00:15,  1.75it/s]

✅ Actualizado CR0049: 2024-01-16 00:00:00


 75%|███████▌  | 78/104 [02:08<00:16,  1.59it/s]

✅ Actualizado CR0048: 2024-02-20 00:00:00


 76%|███████▌  | 79/104 [02:09<00:14,  1.72it/s]

✅ Actualizado CR0052: 2024-01-09 00:00:00


 77%|███████▋  | 80/104 [02:09<00:15,  1.53it/s]

✅ Actualizado CR0044: 2024-11-01 00:00:00


 78%|███████▊  | 81/104 [02:10<00:14,  1.63it/s]

✅ Actualizado CR0042: 2024-01-31 00:00:00


 79%|███████▉  | 82/104 [02:11<00:14,  1.55it/s]

✅ Actualizado CR0098: 2024-02-14 00:00:00


 80%|███████▉  | 83/104 [02:11<00:13,  1.58it/s]

✅ Actualizado CR0064: 2024-02-20 00:00:00


 81%|████████  | 84/104 [02:12<00:13,  1.53it/s]

✅ Actualizado CR0062: 2024-01-17 00:00:00


 82%|████████▏ | 85/104 [02:12<00:11,  1.68it/s]

⚠️ Sin fecha para CR0086: marcado como 'Excel without cruise date'


 83%|████████▎ | 86/104 [02:13<00:11,  1.60it/s]

✅ Actualizado CR0066: 2024-01-30 00:00:00


 84%|████████▎ | 87/104 [02:14<00:09,  1.73it/s]

✅ Actualizado CR0065: 2024-02-08 00:00:00


 85%|████████▍ | 88/104 [02:14<00:09,  1.60it/s]

✅ Actualizado CR0069: 2024-01-17 00:00:00


 86%|████████▌ | 89/104 [02:15<00:08,  1.68it/s]

✅ Actualizado CR0061: 2024-01-18 00:00:00


 87%|████████▋ | 90/104 [02:16<00:09,  1.52it/s]

✅ Actualizado CR0077: 2024-02-08 00:00:00


 88%|████████▊ | 91/104 [02:16<00:08,  1.59it/s]

✅ Actualizado CR0075: 2024-01-24 00:00:00


 88%|████████▊ | 92/104 [02:17<00:07,  1.52it/s]

✅ Actualizado CR0073: 2024-02-12 00:00:00


 89%|████████▉ | 93/104 [02:17<00:06,  1.68it/s]

✅ Actualizado CR0089: 2024-01-31 00:00:00


 90%|█████████ | 94/104 [02:18<00:07,  1.40it/s]

✅ Actualizado CR0102: 2024-01-10 00:00:00


 91%|█████████▏| 95/104 [02:19<00:07,  1.19it/s]

✅ Actualizado CR0078: 2024-02-07 00:00:00


 92%|█████████▏| 96/104 [02:20<00:07,  1.13it/s]

✅ Actualizado CR0087: 2024-02-23 00:00:00


 93%|█████████▎| 97/104 [02:21<00:05,  1.30it/s]

✅ Actualizado US0068: 2024-04-10 00:00:00


 94%|█████████▍| 98/104 [02:22<00:04,  1.32it/s]

✅ Actualizado US0081: 2024-04-10 00:00:00


 95%|█████████▌| 99/104 [02:22<00:03,  1.55it/s]

✅ Actualizado US0070: 2024-04-02 00:00:00


 96%|█████████▌| 100/104 [02:22<00:02,  1.79it/s]

✅ Actualizado US0059: 2024-04-04 00:00:00


 97%|█████████▋| 101/104 [02:23<00:01,  1.68it/s]

✅ Actualizado US0087: 2024-05-15 00:00:00


 98%|█████████▊| 102/104 [02:24<00:01,  1.87it/s]

✅ Actualizado US0073: 2024-04-12 00:00:00


 99%|█████████▉| 103/104 [02:24<00:00,  2.06it/s]

✅ Actualizado US0083: 2024-04-12 00:00:00


100%|██████████| 104/104 [02:55<00:00,  1.68s/it]

⚠️ Sin fecha para CR0028: marcado como 'Excel without cruise date'

Listo. Total actualizados: 94 / 104
Total marcados como 'Excel without cruise date': 10



