In [None]:
import os
import csv
import json
import collections
from openai import OpenAI
from supabase import create_client
from pydantic import BaseModel
from typing import Union
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

client_supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
client_openai = OpenAI()

In [2]:
SCHEMA_DESCRIPTION = """
We have a procurement database with the following tables:

**Table 1: dispatch_parameters**
Description: This table contains information about the minimum stock levels, reorder quantities, and reorder intervals for different parts.
Columns:
- part_id
- min_stock_level
- reorder_quantity
- reorder_interval_days

**Table 2: material_master**
Description: This table provides detailed information about each part, including its name, type, models it is used in, dimensions, weight, and any related parts.
Columns:
- part_id
- part_name
- part_type
- used_in_models
- dimensions
- weight
- blocked_parts
- successor_parts
- comment

**Table 3: material_orders**
Description: This table records purchase orders for parts, including order details, supplier information, and delivery status.
Columns:
- order_id
- part_id
- quantity_ordered
- order_date
- expected_delivery_date
- supplier_id
- status
- actual_delivered_at

**Table 4: sales_orders**
Description: This table contains information about sales orders, including the model, version, quantity, order type, and dates related to the order process.
Columns:
- sales_order_id
- model
- version
- quantity
- order_type
- requested_date
- created_at
- accepted_request_date

**Table 5: stock_levels**
Description: This table provides the current inventory levels of parts in different warehouse locations.
Columns:
- part_id
- part_name
- location
- quantity_available

**Table 6: stock_movements**
Description: This table records transactions related to inventory movements, including inbound and outbound quantities.
Columns:
- date
- part_id
- type
- quantity

**Table 7: suppliers**
Description: This table contains information about suppliers, including pricing, lead times, minimum order quantities, and reliability ratings for different parts.
Columns:
- supplier_id
- part_id
- price_per_unit
- lead_time_days
- min_order_qty
- reliability_rating

**Table 7: specs**
Description: This table contains information about the required parts to assemble a product, including the part name and the quantity required.
Columns:
- product_id
- product_name
- part_id
- quantity
"""

In [1]:
from file_reader import read_eml

with open('hugo_data_samples/emails/email_001_Delay_on_O5007_S1_V1_500W_Brushless_Motor.eml', 'rb') as file_obj:
    read = read_eml(file_obj)
    print(read)

From: logistics@supA.com
To: warehouse_manager@voltway.co
Subject: Delay on O5007 – S1 V1 500W Brushless Motor
Date: 2025-03-18 09:24

Hi Team,

I’m writing to let you know that Purchase Order **O5007** (S1 V1 500W Brushless Motor, part P300) is now delayed.  
Our production line experienced a tooling issue last week, so the expected delivery date has shifted from **2025-03-20** to **2025-04-05**.

We apologize for the inconvenience—this is the first time we’ve had a delay of this length.  
Please let me know if you need any interim stock or partial shipment.

Best,  
Ana Torres  
Supply Chain Coordinator  
SupA



In [65]:
def read_csv(filepath):
    with open(filepath, mode="r", newline='', encoding="utf-8") as file:
        reader = csv.DictReader(file)
        return f"{json.dumps(list(reader))}"

In [67]:
rows = read_csv("hugo_data_samples/dispatch_parameters.csv")

In [None]:
class Column(BaseModel):
    column_name: str
    value: Union[str, int, float, None]

class Row(BaseModel):
    table_name: str
    columns: list[Column]

class Response(BaseModel):
    rows: list[Row]

prompt = f"""
{SCHEMA_DESCRIPTION}

Here is the input data:
{json.dumps(rows)}
"""

completion = client_openai.beta.chat.completions.parse(
    model="gpt-4.1-2025-04-14",
    response_format=Response,
    messages=[
        {
            "role": "system", 
            "content": 
            """
            You are a procurement assistant. Decide the schema rows to edit,
            using provided schema description and input data.
            In addition, determine the columns to edit and their values.
            """
        },
        {
            "role": "user",
            "content": prompt
        }
    ]
)

result = completion.choices[0].message.parsed

In [64]:
FOREIGN_KEYS = {"part_id": "material_master"}

def verify_foreign_key(foreign_key, rows):
    foreign_keys = set()
    for row in rows:
        if foreign_key in row:
            foreign_keys.add(row[foreign_key])
    
    if not foreign_keys:
        return set()
    
    response = client_supabase.table(FOREIGN_KEYS[foreign_key]).select(foreign_key).in_(foreign_key, list(foreign_keys)).execute()
    
    existing_keys = set()
    if response.data:
        existing_keys = {item[foreign_key] for item in response.data}

    missing_keys = foreign_keys - existing_keys
    missing_data = [{foreign_key: key} for key in missing_keys]

    response = client_supabase.table(FOREIGN_KEYS[foreign_key]).insert(missing_data).execute()
    

def upsert(rows):
    tables = collections.defaultdict(list)
    for row in rows:
        data = {}
        for column in row.columns:
            data[column.column_name] = column.value
        tables[row.table_name].append(data)


    for table_name, data in tables.items():
        for foreign_key in FOREIGN_KEYS:
            verify_foreign_key(foreign_key, data)
        client_supabase.table(table_name).upsert(data).execute()

upsert(result.rows)

In [61]:
result.rows

[Row(table_name='dispatch_parameters', columns=[Column(column_name='part_id', value='P300'), Column(column_name='min_stock_level', value=63), Column(column_name='reorder_quantity', value=79), Column(column_name='reorder_interval_days', value=18)]),
 Row(table_name='dispatch_parameters', columns=[Column(column_name='part_id', value='P301'), Column(column_name='min_stock_level', value=39), Column(column_name='reorder_quantity', value=54), Column(column_name='reorder_interval_days', value=13)]),
 Row(table_name='dispatch_parameters', columns=[Column(column_name='part_id', value='P302'), Column(column_name='min_stock_level', value=41), Column(column_name='reorder_quantity', value=146), Column(column_name='reorder_interval_days', value=13)]),
 Row(table_name='dispatch_parameters', columns=[Column(column_name='part_id', value='P303'), Column(column_name='min_stock_level', value=79), Column(column_name='reorder_quantity', value=124), Column(column_name='reorder_interval_days', value=15)]),
 R

In [51]:
tables = collections.defaultdict(list)
for row in result.rows:
    data = {}
    for column in row.columns:
        data[column.column_name] = column.value
    tables[row.table_name].append(data)

for table, data in tables.items():
    print(data)

[{'part_id': 'P300', 'min_stock_level': 63, 'reorder_quantity': 79, 'reorder_interval_days': 18}, {'part_id': 'P301', 'min_stock_level': 39, 'reorder_quantity': 54, 'reorder_interval_days': 13}, {'part_id': 'P302', 'min_stock_level': 41, 'reorder_quantity': 146, 'reorder_interval_days': 13}, {'part_id': 'P303', 'min_stock_level': 79, 'reorder_quantity': 124, 'reorder_interval_days': 15}, {'part_id': 'P304', 'min_stock_level': 45, 'reorder_quantity': 119, 'reorder_interval_days': 14}, {'part_id': 'P305', 'min_stock_level': 75, 'reorder_quantity': 62, 'reorder_interval_days': 7}, {'part_id': 'P306', 'min_stock_level': 25, 'reorder_quantity': 66, 'reorder_interval_days': 18}, {'part_id': 'P307', 'min_stock_level': 52, 'reorder_quantity': 187, 'reorder_interval_days': 8}, {'part_id': 'P308', 'min_stock_level': 30, 'reorder_quantity': 58, 'reorder_interval_days': 10}, {'part_id': 'P309', 'min_stock_level': 49, 'reorder_quantity': 193, 'reorder_interval_days': 17}, {'part_id': 'P310', 'min_s

In [None]:
data = [
    {"part_id": "1"}
]
response = client_supabase.table("dispatch_parameters").upsert(data).execute()

APIError: {'code': '23503', 'details': 'Key (part_id)=(1) is not present in table "material_master".', 'hint': None, 'message': 'insert or update on table "dispatch_parameters" violates foreign key constraint "dispatch_parameters_part_id_fkey"'}

In [14]:
for row in result.rows[0:1]:
    print(row.table_name)
    print(row.columns)

dispatch_parameters
[Column(column_name='part_id', value='P300'), Column(column_name='min_stock_level', value=63), Column(column_name='reorder_quantity', value=79), Column(column_name='reorder_interval_days', value=18)]
