In [33]:
import os
import csv
import json
from openai import OpenAI
from supabase import create_client, Client
from pydantic import BaseModel
from typing import Union
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
client = OpenAI()

In [14]:
SCHEMA_DESCRIPTION = """
We have a procurement database with the following tables:

**Table 1: dispatch_parameters**
Description: This table contains information about the minimum stock levels, reorder quantities, and reorder intervals for different parts.
Columns:
- part_id
- min_stock_level
- reorder_quantity
- reorder_interval_days

**Table 2: material_master**
Description: This table provides detailed information about each part, including its name, type, models it is used in, dimensions, weight, and any related parts.
Columns:
- part_id
- part_name
- part_type
- used_in_models
- dimensions
- weight
- blocked_parts
- successor_parts
- comment

**Table 3: material_orders**
Description: This table records purchase orders for parts, including order details, supplier information, and delivery status.
Columns:
- order_id
- part_id
- quantity_ordered
- order_date
- expected_delivery_date
- supplier_id
- status
- actual_delivered_at

**Table 4: sales_orders**
Description: This table contains information about sales orders, including the model, version, quantity, order type, and dates related to the order process.
Columns:
- sales_order_id
- model
- version
- quantity
- order_type
- requested_date
- created_at
- accepted_request_date

**Table 5: stock_levels**
Description: This table provides the current inventory levels of parts in different warehouse locations.
Columns:
- part_id
- part_name
- location
- quantity_available

**Table 6: stock_movements**
Description: This table records transactions related to inventory movements, including inbound and outbound quantities.
Columns:
- date
- part_id
- type
- quantity

**Table 7: suppliers**
Description: This table contains information about suppliers, including pricing, lead times, minimum order quantities, and reliability ratings for different parts.
Columns:
- supplier_id
- part_id
- price_per_unit
- lead_time_days
- min_order_qty
- reliability_rating

**Table 7: specs**
Description: This table contains information about the required parts to assemble a product, including the part name and the quantity required.
Columns:
- product_id
- product_name
- part_id
- quantity
"""

In [4]:
def read_csv(filepath):
    with open(filepath, mode="r", newline='', encoding="utf-8") as file:
        reader = csv.DictReader(file)
        return list(reader)

In [12]:
rows = read_csv("hugo_data_samples/material_master.csv")

In [39]:
class Column(BaseModel):
    column_name: str
    value: Union[str, int, float, None]

class Row(BaseModel):
    table_name: str
    data: list[Column]

class Response(BaseModel):
    tables: list[Row]

prompt = f"""
{SCHEMA_DESCRIPTION}

Here is the input data:
{json.dumps(rows)}
"""

completion = client.beta.chat.completions.parse(
    model="gpt-4.1-2025-04-14",
    response_format=Response,
    messages=[
        {
            "role": "system", 
            "content": 
            """
            You are a procurement assistant. Decide the schema rows to edit,
            using provided schema description and input data.
            In addition, determine the values of the newly edited columns.
            """
        },
        {
            "role": "user",
            "content": prompt
        }
    ]
)

result = completion.choices[0].message.parsed

In [40]:
result

Response(tables=[Row(table_name='material_master', data=[Column(column_name='part_id', value='P300'), Column(column_name='part_name', value='S1 V1 500W Brushless Motor'), Column(column_name='part_type', value='assembly'), Column(column_name='used_in_models', value='S1_V1'), Column(column_name='dimensions', value='N/A'), Column(column_name='weight', value='3.79'), Column(column_name='blocked_parts', value=''), Column(column_name='successor_parts', value='P304'), Column(column_name='comment', value='Obsolete V1 motor superseded by V2')]), Row(table_name='material_master', data=[Column(column_name='part_id', value='P301'), Column(column_name='part_name', value='S1 V1 Li-Ion 36V 10Ah Battery Pack'), Column(column_name='part_type', value='assembly'), Column(column_name='used_in_models', value='S1_V1'), Column(column_name='dimensions', value='N/A'), Column(column_name='weight', value='4.84'), Column(column_name='blocked_parts', value=''), Column(column_name='successor_parts', value='P305'), 

In [None]:
def read_file(filepath):
    """Read a CSV or TXT file."""
    ext = os.path.splitext(filepath)[-1].lower()
    if ext == ".csv":
        with open(filepath, mode="r", newline='', encoding="utf-8") as file:
            reader = csv.DictReader(file)
            return list(reader)
    elif ext == ".txt":
        with open(filepath, mode="r", encoding="utf-8") as file:
            lines = file.readlines()
            # Turn each line into a dict with one field: "text"
            return [{"text": line.strip()} for line in lines if line.strip()]
    else:
        raise ValueError("Unsupported file type: must be .csv or .txt")