In [2]:
import pandas as pd
import numpy as np
import os
import sys

# STEP 1 Extract data from files

In [2]:
import os
import json
import pandas as pd

DATADIR = "../data/saq/products/"

def extract_saq_fields_from_record(item):
    pdets = item.get("product_details", {})
    return {
        "url": item.get("url"),
        "product_name": item.get("product_name"),
        "price": item.get("price"),
        "breadcrumb": item.get("breadcrumb"),
        "product_details_pays": pdets.get("Pays"),
        "product_details_region": pdets.get("Région"),
        "product_details_appellation": pdets.get("Appellation d'origine"),
        "product_details_designation": pdets.get("Désignation réglementée"),
        "product_details_cepage": pdets.get("Cépage") or pdets.get("Cépages"),
        "product_details_degre_alcool": pdets.get("Degré d'alcool"),
        "product_details_couleur": pdets.get("Couleur"),
        "product_details_format": pdets.get("Format"),
        "product_details_producteur": pdets.get("Producteur"),
        "product_details_agent": pdets.get("Agent promotionnel"),
        "product_details_code_saq": pdets.get("Code SAQ"),
        "product_details_code_cup": pdets.get("Code CUP"),
        "product_details_tasting_notes": item.get("tasting_notes"),
        "product_details_pairings": item.get("pairings"),
        "product_details": pdets
    }

def load_data2(batch_start=1, batch_end=10):
    data = []
    for i in range(batch_start, batch_end):
        file_path = os.path.join(DATADIR, f'batch_{i}.json')
        print(f"Loading {file_path}")
        if os.path.exists(file_path):
            with open(file_path, 'r') as f:
                records = json.load(f)
                cleaned = [extract_saq_fields_from_record(rec) for rec in records]
                data.append(pd.DataFrame(cleaned))
        else:
            print(f"File {file_path} does not exist.")
    return pd.concat(data, ignore_index=True)




In [None]:
df = load_data2(11)
df.head()

Loading ../data/saq/products/batch_1.json
Loading ../data/saq/products/batch_2.json
Loading ../data/saq/products/batch_3.json
Loading ../data/saq/products/batch_4.json
Loading ../data/saq/products/batch_5.json
Loading ../data/saq/products/batch_6.json
Loading ../data/saq/products/batch_7.json
Loading ../data/saq/products/batch_8.json
Loading ../data/saq/products/batch_9.json


Unnamed: 0,url,product_name,price,breadcrumb,product_details_pays,product_details_region,product_details_appellation,product_details_designation,product_details_cepage,product_details_degre_alcool,product_details_couleur,product_details_format,product_details_producteur,product_details_agent,product_details_code_saq,product_details_code_cup,product_details_tasting_notes,product_details_pairings,product_details
0,https://www.saq.com/fr/14099363,Domaine François Raveneau Chablis Premier Cru ...,83.75,Produits > Vin > Vin blanc,France,"Bourgogne, Chablis et Grand Auxerrois",Chablis Premier Cru,(AOC/AOP) Appellation origine controlée/protégée,Chardonnay 100 %,13 %,Blanc,750 ml,Domaine François Raveneau,Oenopole Inc.,14099363,4000140993633,{},[],"{'Pays': 'France', 'Région': 'Bourgogne, Chabl..."
1,https://www.saq.com/fr/14099371,Domaine François Raveneau Chablis 2015,56.0,Produits > Vin > Vin blanc,France,"Bourgogne, Chablis et Grand Auxerrois",Chablis,(AOC/AOP) Appellation origine controlée/protégée,Chardonnay 100 %,13 %,Blanc,750 ml,Domaine François Raveneau,Oenopole Inc.,14099371,4000140993718,{},[],"{'Pays': 'France', 'Région': 'Bourgogne, Chabl..."
2,https://www.saq.com/fr/14099380,Domaine François Raveneau Chablis 2016,56.0,Produits > Vin > Vin blanc,France,"Bourgogne, Chablis et Grand Auxerrois",Chablis,(AOC/AOP) Appellation origine controlée/protégée,Chardonnay 100 %,13 %,Blanc,750 ml,Domaine François Raveneau,Oenopole Inc.,14099380,4000140993800,{},[],"{'Pays': 'France', 'Région': 'Bourgogne, Chabl..."
3,https://www.saq.com/fr/14099398,Domaine François Raveneau Chablis Grand Cru Le...,126.5,Produits > Vin > Vin blanc,France,"Bourgogne, Chablis et Grand Auxerrois",Chablis Grand Cru,(AOC/AOP) Appellation origine controlée/protégée,Chardonnay 100 %,13 %,Blanc,750 ml,Domaine François Raveneau,Oenopole Inc.,14099398,4000140993985,{},[],"{'Pays': 'France', 'Région': 'Bourgogne, Chabl..."
4,https://www.saq.com/fr/14113541,Domaine François Raveneau Chablis Premier Cru ...,83.75,Produits > Vin > Vin blanc,France,"Bourgogne, Chablis et Grand Auxerrois",Chablis Premier Cru,(AOC/AOP) Appellation origine controlée/protégée,Chardonnay 100 %,13 %,Blanc,750 ml,Domaine François Raveneau,Oenopole Inc.,14113541,4000141135414,{},[],"{'Pays': 'France', 'Région': 'Bourgogne, Chabl..."


# Step 2 insert into products

In [8]:
import psycopg2
DB_URL = "postgresql://postgres.oqvdwtiwrzyjpnouwxch:f87JpR9Uvud6NR3HwbP@aws-0-ca-central-1.pooler.supabase.com:5432/postgres"

conn = psycopg2.connect(DB_URL)

In [17]:
import json
from uuid import uuid4
from datetime import datetime, timezone
from tqdm import tqdm

def upsert_wines(df, conn):
    cursor = conn.cursor()

    wines = df[df['breadcrumb'].str.startswith("Produits > Vin", na=False)].copy()
    wines['product_details_couleur'] = wines['product_details_couleur'].str.strip()

    for _, row in tqdm(wines.iterrows(), total=len(wines), desc="Upserting wines"):
        try:
            code_saq = row['product_details_code_saq']
            cursor.execute("SELECT 1 FROM product WHERE producer_code = %s", (code_saq,))
            if cursor.fetchone():
                continue  # Product already exists

            # Find subcategory_id
            cursor.execute("""
                SELECT s.subcategory_id
                FROM subcategory2 s
                JOIN category c ON s.category_id = c.category_id
                JOIN family f ON f.family_id = c.family_id
                WHERE f.family_name_fr = 'Vin'
                  AND c.category_name_fr = %s
                  AND s.subcategory_name_fr = %s
            """, (row['product_details_couleur'], row['product_details_appellation']))
            result = cursor.fetchone()
            if not result:
                print(f"Subcategory not found for {row['product_name']}")
                continue

            subcategory_id = result[0]
            product_id = str(uuid4())
            product_name = row['product_name']
            details_json = json.dumps(row['product_details'], default=str)
            now = datetime.now(timezone.utc).isoformat()

            cursor.execute("""
                INSERT INTO product (
                    product_id, product_name_fr, product_name_en, product_name_es,
                    product_upc, producer_code, subcategory_id, details,
                    unit_equivalence_qty, product_code, refresh_date
                )
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """, (
                product_id, product_name, product_name, product_name,
                row.get('product_details_code_cup'), code_saq,
                subcategory_id, details_json, 1, code_saq,
                now
            ))
            conn.commit()
            print(f"Inserted {product_name}")

        except Exception as e:
            conn.rollback()
            print(f"Error inserting {row.get('product_name', '')}: {e}")

    cursor.close()



In [12]:
conn.rollback()

In [18]:
upsert_wines(df, conn)


Upserting wines:   2%|▏         | 3/170 [00:00<00:43,  3.88it/s]

Inserted Domaine François Raveneau Chablis Premier Cru Montmains 2016
Inserted Domaine François Raveneau Chablis 2015
Inserted Domaine François Raveneau Chablis 2016
Inserted Domaine François Raveneau Chablis Grand Cru Les Clos 2015


Upserting wines:   4%|▍         | 7/170 [00:01<00:18,  9.04it/s]

Inserted Domaine François Raveneau Chablis Premier Cru Montée de Tonnerre 2016
Inserted Schrader Cabernet-Sauvignon GIII Beckstoffer Georges III Vineyard Rutherford 2015
Inserted Schrader RBS Beckstoffer To Kalon Vineyard Cabernet-Sauvignon Napa Valley 2015
Inserted Jean-Louis Chave Hermitage 2016


Upserting wines:   8%|▊         | 14/170 [00:01<00:09, 16.63it/s]

Inserted Domaine Jean-Louis Chave Saint-Joseph Clos Florentin 2016
Inserted Domaine Jean-Louis Chave Saint-Joseph 2016
Inserted Signorello Chardonnay Hope's Cuvee 2021
Inserted Jean-Louis Chave Hermitage 2016
Inserted Mas Doix Doix Priorat 2015


Upserting wines:  12%|█▏        | 20/170 [00:01<00:07, 20.09it/s]

Inserted Biondi Santi Brunello di Montalcino Riserva 2006
Subcategory not found for Sierra de Gredos Pegaso Barrancos de Pizarra 2021
Subcategory not found for Sierra de Gredos Pegaso Granito 2021
Inserted Azelia Barolo Margheria 2017
Inserted Azelia Bricco Voghera Barolo Riserva 2010
Subcategory not found for Giuseppe Quintarelli Rosso del Bepi 2014
Subcategory not found for Giuseppe Quintarelli Amarone della Valpolicella Classico 2011


Upserting wines:  16%|█▋        | 28/170 [00:01<00:05, 25.96it/s]

Subcategory not found for Giuseppe Quintarelli Alzero Cabernet 2009
Inserted Domaine Joblot Givry Premier Cru Clos Grand Marole 2017
Inserted Castello di Ama Vigneto Bellavista Chianti Classico 2015
Subcategory not found for Raul Perez Ultreia Paluezas 2016
Inserted Raul Pérez Ultreia Valtuille 2016


Upserting wines:  20%|██        | 34/170 [00:02<00:05, 25.52it/s]

Inserted Clos Mogador Priorat 2016
Subcategory not found for Domaine Joblot Givry Mademoiselle 2017
Subcategory not found for Giuseppe Quintarelli Recioto della Valpolicella 2007
Inserted Cayuse Wallah Wallah Syrah Special #10 2016
Inserted Bodegas Muga Aro Rioja 2015


Upserting wines:  24%|██▍       | 41/170 [00:02<00:04, 28.76it/s]

Inserted Domaine Didier Dagueneau Buisson Renard 2016
Inserted Didier Dagueneau Pouilly-Fumé Silex 2016
Subcategory not found for Domaine Didier Dagueneau Le Mont Damné 2016
Inserted Bachelder Chardonnay Wismer Foxcroft 2015
Subcategory not found for Shafer Hillside Select Stags Leap District 2014
Subcategory not found for Bachelder Johnson Vineyard Yamhill-Carlton 2013


Upserting wines:  26%|██▋       | 45/170 [00:02<00:04, 29.00it/s]

Inserted Dal Forno Romano Valpolicella Superiore Monte Lodoletta 2012
Inserted Torres Mas La Plana Cabernet-sauvignon 2011
Inserted Donnhoff Niederhausser Hermannshohle Riesling Auslese 2018
Inserted Long Shadows Merlot Pedestal Columbia Valley 2015


Upserting wines:  31%|███       | 53/170 [00:02<00:03, 31.08it/s]

Inserted Long Shadows Pirouette Columbia Valley 2015
Inserted Herencia Altés Cupatge Negre Terra Alta 2022
Subcategory not found for Francis Ford Coppola Pinot Noir Bee's Box 2017
Subcategory not found for Quinta do Boiçao Reserva Lisboa 2021


Upserting wines:  43%|████▎     | 73/170 [00:03<00:01, 49.91it/s]

Subcategory not found for Domaine Porto Carras Malagouzia 2021
Subcategory not found for Pascual Toso Barrancas Mendoza 2018
Inserted Roux Père et Fils Bourgogne Chardonnay 2020
Inserted Cono Sur 20 Barrels Cabernet-Sauvignon Valle Del Maipo 2018


Upserting wines:  47%|████▋     | 80/170 [00:03<00:01, 49.63it/s]

Subcategory not found for Cono Sur Single Vineyard Chardonnay 2024
Subcategory not found for Château de Marsannay Bourgogne En Montre Cul 2020
Inserted Henri Bourgeois Pouilly-Fumé En Travertin 2021
Inserted Jean Bouchard Bourgogne 2019


Upserting wines:  54%|█████▍    | 92/170 [00:03<00:02, 38.83it/s]

Inserted Louis Jadot Meursault 2016
Inserted Louis Jadot Meursault Premier Cru Genevrières 2017
Inserted Louis Jadot Pommard 2015
Inserted Maison Louis Jadot Chambolle-Musigny 2010
Subcategory not found for Louis Jadot Musigny Grand Cru 2007
Subcategory not found for Louis Jadot Musigny Grand Cru 2011
Inserted Louis Jadot Corton Pougets Grand Cru 2016
Inserted Louis Jadot Santenay Clos de Malte 2016
Inserted Château des Jacques Morgon Côte du Py 2015


Upserting wines:  59%|█████▉    | 101/170 [00:04<00:02, 28.25it/s]

Inserted Château des Jacques Morgon Côte du Py 2016
Inserted Château des Jacques Morgon Côte du Py 2016
Inserted ChÃ¢teau des Jacques Morgon CÃ´te du Py 2017
Inserted Château des Jacques Morgon Côte du Py 2017
Inserted Louis Jadot Clos Vougeot Grand Cru 2014


Upserting wines:  62%|██████▏   | 105/170 [00:04<00:02, 26.82it/s]

Inserted Louis Jadot Château des Jacques Grand Clos de Loyse 2017
Subcategory not found for None
Inserted Maison Louis Jadot Gevrey-Chambertin Premier Cru Petite Chapelle 2016
Inserted Louis Jadot Puligny-Montrachet Premier Cru Clos de la Garenne 2016
Inserted Louis Jadot Puligny-Montrachet Premier Cru Clos de la Garenne 2017


Upserting wines:  66%|██████▌   | 112/170 [00:04<00:02, 25.63it/s]

Inserted Louis Jadot Chambolle-Musigny Premier Cru Les Amoureuses 2015
Inserted Louis Jadot Gevrey-Chambertin Premier Cru Clos Saint-Jacques 2013
Inserted Domaine du Duc de Magenta Chassagne-Montrachet Premier Cru Morgeot Monopole Clos de la Chapelle 2017
Inserted Louis Jadot Bourgogne Pinot Noir Couvent des Jacobins 2015


Upserting wines:  68%|██████▊   | 115/170 [00:04<00:02, 23.83it/s]

Inserted Louis Jadot Pommard Premier Cru Charmots 1999
Inserted Château des Jacques Moulin-à-Vent 2016
Inserted Louis Jadot Beaune 1er Cru Clos des Ursules 2016
Inserted Louis Jadot Beaune Premier Cru Clos des Ursules 2017
Inserted Louis Jadot Beaune Grèves Le Clos Blanc Premier Cru 2010


Upserting wines:  71%|███████   | 121/170 [00:04<00:02, 23.98it/s]

Inserted Côte de Beaune Volnay Santenots Premier Cru Louis Jadot 2017
Inserted Moulin-à-Vent Clos des Thorins Château des Jacques 2017
Inserted Louis-Jadot  Célébration Beaune 1er Cru 2015
Inserted Château des Jacques Moulin-à-Vent La Roche 2015
Inserted Château des Jacques Moulin-à-Vent La Roche 2016


Upserting wines:  75%|███████▍  | 127/170 [00:05<00:01, 22.70it/s]

Inserted Château des Jacques Moulin-à-Vent La Roche 2017
Inserted Louis Jadot, Chablis Grand Cru Blanchot 2016
Inserted Louis Jadot, Chablis Grand Cru Blanchot 2017
Inserted Louis Jadot Chablis Grand Cru Les Clos 2015
Inserted Louis Jadot Chablis Grand Cru Les Clos 2015


Upserting wines:  78%|███████▊  | 133/170 [00:05<00:01, 22.09it/s]

Inserted Louis Jadot Chablis Grand Cru Les Clos 2016
Inserted Louis Jadot Chablis Grand Cru Les Clos 2017
Inserted Louis Jadot Chablis 1er Cru Montée de Tonnerre 2015
Inserted Louis Jadot Chablis 1er Cru Montée de Tonnerre 2016
Inserted Louis Jadot Chablis Premier Cru Montée de Tonnerre 2017


Upserting wines:  80%|████████  | 136/170 [00:05<00:01, 21.83it/s]

Inserted Louis Jadot Chablis Grand Cru Preuses 2015
Inserted Louis Jadot Chablis Grand Cru Preuses 2015
Inserted Louis Jadot Chablis Grand Cru Vaudésir 2015
Inserted Louis Jadot Chablis Grand Cru Vaudésir 2016
Inserted Louis Jadot Chablis Grand Cru Vaudésir 2017


Upserting wines:  84%|████████▎ | 142/170 [00:05<00:01, 22.99it/s]

Inserted Louis Jadot Chablis 1er Cru Fourchaume 2015
Inserted Louis Jadot Chablis 1er Cru Fourchaume 2017
Inserted Louis Jadot Nuits-Saint-Georges Premier Cru 2015
Subcategory not found for Louis Jadot Vosne-Romanée Premier Cru Les Suchots 2015
Subcategory not found for Louis Jadot Vosne-Romanée Premier Cru Les Suchots 2017


Upserting wines:  87%|████████▋ | 148/170 [00:06<00:00, 23.11it/s]

Inserted Louis Jadot Morey-Saint-Denis Premier Cru Clos des Ormes 2016
Inserted Louis Jadot Saint-Aubin 2015
Inserted Louis Jadot Puligny-Montrachet 2017
Inserted Château des Jacques Moulin-à-Vent Clos de Rochegrès 2015
Inserted Château des Jacques Moulin-à-Vent Clos de Rochegrès 2016


Upserting wines:  91%|█████████ | 154/170 [00:06<00:00, 21.90it/s]

Inserted Château des Jacques Moulin-à-Vent Clos de Rochegrès 2016
Inserted Château des Jacques Moulin-à-Vent Clos de Rochegrès 2017
Inserted Louis Jadot Nuits-Saint-Georges Premier Cru Les Boudots 2016
Inserted Louis Jadot Puligny-Montrachet Premier Cru Les Referts 2016
Inserted Louis Jadot Puligny-Montrachet Premier Cru Les Referts 2015


Upserting wines:  94%|█████████▍| 160/170 [00:06<00:00, 22.74it/s]

Inserted Château des Jacques Morgon 2016
Inserted Château des Jacques Moulin-à-vent Clos du Grand Carquelin 2015
Inserted Château des Jacques Moulin-à-vent Clos du Grand Carquelin 2016
Inserted Château des Jacques Moulin-à-vent Clos du Grand Carquelin 2017
Inserted Louis Jadot Nuits Saint-Georges 1er Cru Les Vaucrains 2016


Upserting wines:  96%|█████████▌| 163/170 [00:06<00:00, 22.37it/s]

Inserted Louis Jadot Nuits Saint-Georges Premier Cru Les Vaucrains 2017
Inserted L Jadot Chassagne Montrachet 1er Cru Les Embazées 2016
Inserted Louis Jadot Aloxe-Corton 2016
Inserted Louis Jadot Meursault Premier Cru Perrières 2016
Inserted Louis Jadot Meursault Premier Cru Perrières 2017


Upserting wines: 100%|██████████| 170/170 [00:07<00:00, 23.98it/s]

Inserted Louis Jadot Monthélie Premier Cru Champs Fulliot 2016
Subcategory not found for Louis Jadot Coteaux Bourguignons 2017
Inserted Louis Jadot Corton-Pougets Grand Cru 2017
Inserted Louis Jadot Côtes-de-Nuits-Villages Le Vaucrain 2017
Inserted Louis Jadot Beaune Premier Cru Grèves Le Clos Blanc 2017





# STEP 3 update Volumes

In [21]:
import psycopg2
DB_URL = "postgresql://postgres.oqvdwtiwrzyjpnouwxch:f87JpR9Uvud6NR3HwbP@aws-0-ca-central-1.pooler.supabase.com:5432/postgres"

conn = psycopg2.connect(DB_URL)

In [23]:
import re

def update_product_volumes(df, conn):
    cursor = conn.cursor()

    # Clean and filter rows with valid formats
    df = df[df['product_details_format'].notnull()].copy()

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Updating volume & unit info"):
        fmt = row['product_details_format'].replace(',', '.').strip()

        # Match format like "750 ml", "1 L", "1.5 L", etc.
        match = re.match(r'([\d\.]+)\s*([a-zA-Z]+)', fmt)
        if not match:
            print(f"Could not parse format: {fmt}")
            continue

        volume_qty = float(match.group(1))
        unit = match.group(2).lower()

        # Get lookup_id from measure_lkp
        try:
            cursor.execute("""
                SELECT lookup_id
                FROM measure_lkp
                WHERE lower(lookup_abbreviation_fr) = %s
            """, (unit,))
            result = cursor.fetchone()
            if not result:
                print(f"No lookup_id found for unit: {unit}")
                continue

            volume_equivalence_id = result[0]
            fixed_unit_id = 'a9e5e8dd-098c-4f76-86f8-08f36620ac0c'  # fixed UUID

            cursor.execute("""
                UPDATE product
                SET volume_equivalence_qty = %s,
                    unit_equivalence_id = %s,
                    volume_equivalence_id = %s
                WHERE product_upc = %s
            """, (
                volume_qty,
                fixed_unit_id,
                volume_equivalence_id,
                row.get('product_details_code_cup')
            ))
            conn.commit()
        except Exception as e:
            conn.rollback()
            print(f"Error updating {row.get('product_name', '')}: {e}")

    cursor.close()


In [24]:
update_product_volumes(df, conn)

Updating volume & unit info: 100%|██████████| 179/179 [00:04<00:00, 35.90it/s]


# Step 4 insert product_sizes

In [50]:
import psycopg2
DB_URL = "postgresql://postgres.oqvdwtiwrzyjpnouwxch:f87JpR9Uvud6NR3HwbP@aws-0-ca-central-1.pooler.supabase.com:5432/postgres"

conn = psycopg2.connect(DB_URL)

In [48]:
from uuid import uuid4

def populate_product_sizes(df, conn):
    cursor = conn.cursor()

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Populating product_size"):
        upc = row.get("product_details_code_cup")

        if not upc:
            continue

        try:
            # Retrieve product info
            cursor.execute("""
                SELECT product_id, product_upc,
                    unit_equivalence_qty, unit_equivalence_id,   
                    volume_equivalence_qty, volume_equivalence_id                           
                FROM product
                WHERE product_upc = %s
            """, (upc,))
            product = cursor.fetchone()

            if not product:
                continue  # Product not found

            product_id, db_upc, unit_qty, unit_id,  vol_qty, vol_eq_id = product

            # Check if already exists for UPC
            cursor.execute("SELECT 1 FROM product_sizes WHERE upc = %s", (upc,))
            if cursor.fetchone():
                continue  # Already exists for this UPC

            
            # Insert new entry
            size_id = str(uuid4())
            cursor.execute("""
                INSERT INTO product_sizes (
                    product_size_id, product_id,
                    upc,
                    unit_equivalence_qty, unit_equivalence_id,
                    volume_equivalence_measure_id, volume_equivalence_qty
                )
                VALUES (%s, %s, %s, %s, %s,%s, %s)
            """, (
                size_id, product_id,
                upc,
                unit_qty, unit_id,
                vol_eq_id, vol_qty
            ))
            conn.commit()

        except Exception as e:
            conn.rollback()
            print(f"Error inserting product_size for {row.get('product_name')}: {e}")

    cursor.close()




In [51]:
populate_product_sizes(df, conn)


Populating product_size: 100%|██████████| 180/180 [00:06<00:00, 29.06it/s]


In [None]:
# Insert link into prod_dist_link