In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import lingam

from graphviz import Digraph
from lingam.utils import make_dot
from IPython.display import Image, display, Markdown
import matplotlib.dates as mdates

In [2]:
# Edges
df_edges_plant = pd.read_csv("../data/SupplyGraph/Edges/Edges (Plant).csv")
df_edges_product_group = pd.read_csv("../data/SupplyGraph/Edges/Edges (Product Group).csv")
df_edges_product_subgroup = pd.read_csv("../data/SupplyGraph/Edges/Edges (Product Sub-Group).csv")
df_edges_storage_location = pd.read_csv("../data/SupplyGraph/Edges/Edges (Storage Location).csv")

# Nodes
df_nodes_productgroup_and_subgroup = pd.read_csv("../data/SupplyGraph/Nodes/Node Types (Product Group and Subgroup).csv")
df_nodes_plant_and_storage = pd.read_csv("../data/SupplyGraph/Nodes/Nodes Type (Plant & Storage).csv")
df_nodes = pd.read_csv("../data/SupplyGraph/Nodes/Nodes.csv")
df_nodes_index = pd.read_csv("../data/SupplyGraph/Nodes/NodesIndex.csv")

# Temporal
df_delivery_to_distributor = pd.read_csv("../data/SupplyGraph/Temporal Data/Unit/Delivery To distributor.csv")
df_factory_issue = pd.read_csv("../data/SupplyGraph/Temporal Data/Unit/Factory Issue.csv")
df_production = pd.read_csv("../data/SupplyGraph/Temporal Data/Unit/Production.csv")
df_sales_order = pd.read_csv("../data/SupplyGraph/Temporal Data/Unit/Sales Order.csv")

## Node Definitions

- **Product**: `df_nodes` (contains all products)
- **ProductGroup**: `df_nodes_productgroup_and_subgroup` (Product Group types)
- **Production Facility**: `df_nodes_plant_and_storage` (Plant nodes)
- **Storage Location**: `df_nodes_plant_and_storage` (Storage nodes)
- **Distributor**: Not explicitly present in the loaded dataframes
- **SalesOrder**: `df_sales_order`, `df_weight_sales_order` (temporal data)
- **Delivery**: `df_delivery_to_distributor`, (temporal data)
- **FactoryIssue**: `df_unit_factory_issue`, (temporal data)

## Edge Definitions

- **Product → ProductGroup** (belongs to): `df_edges_product_group`
- **Product → ProductionFacility** (produced at): `df_edges_plant`
- **Product → Storage** (stored at): `df_edges_storage_location`
- **Product → Distributor** (delivered to): `df_delivery_to_distributor`
- **ProductGroup → ProductionFacility** (uses raw materials): `df_edges_product_sub_group`
- **ProductionFacility → Storage** (sends): `df_edges_plant`, `df_edges_storage_location`
- **Storage → Distributor** (ships to): `df_delivery_to_distributor`,
- **SalesOrder → Product** (requested for): `df_sales_order`, 
- **ProductionFacility → Delivery** (dispatches): `df_delivery_to_distributor`
- **ProductionFacility → FactoryIssue** (logs issue): `df_factory_issue`
- **Delivery → Distributor** (received by): `df_delivery_to_distributor`
- **FactoryIssue → Storage** (issues to): `df_factory_issue`

<br> <br> <br>


### 1. Sales Order

In [107]:
df_sales_order['Date'] = pd.to_datetime(df_sales_order['Date'])

# filter rows where Date is 2023-01-03 and show the selected cols
def return_rows(product="all", entry_date="all"):
    # if product == "all", return all columns; otherwise return Date + product
    if product == "all":
        cols = df_sales_order.columns.tolist()
    else:
        if product not in df_sales_order.columns:
            raise ValueError(f"Product '{product}' not found in df_sales_order columns")
        cols = ["Date", product]
    
    if entry_date == "all":
        rows = df_sales_order[cols]
    else:
        filter_rows = df_sales_order['Date'].dt.date == pd.to_datetime(entry_date).date()
        rows = df_sales_order.loc[filter_rows, cols]
    
    return rows


# display(return_rows(product="all", entry_date='all'))
# display(return_rows(product="all", entry_date='2023-01-05'))
# display(return_rows(product="SOS005L04P", entry_date="2023-02-10"))

sales_order_row = return_rows(product="SOS005L04P", entry_date='2023-01-05')
display(sales_order_row)

# pivot wide -> long (one row to product/quantity pairs)
sales_order_long = sales_order_row.melt(id_vars='Date', var_name='Product', value_name='Quantity').dropna()
display(sales_order_long)



Unnamed: 0,Date,SOS005L04P
4,2023-01-05,14864.0


Unnamed: 0,Date,Product,Quantity
0,2023-01-05,SOS005L04P,14864.0


<br> <br> <br>

### Sales -> Product

In [44]:
# handle multiple products (and drop NaN)
products = sales_order_long['Product'].dropna().unique()

for product in products:
    if product not in df_nodes['Node'].values:
        raise ValueError(f"Product '{product}' not found in the Product list")
    else:
        # show matching row(s) in df_nodes for the product
        print(df_nodes[df_nodes['Node'] == product])

         Node
1  SOS005L04P


<br> <br> <br>

### Product -> Product Group / Product Sub Group

Product **belongs to** what Product Group and Product Sub Group 

In [83]:
# return rows in df_nodes_productgroup_and_subgroup matching the product(s) in sales_order_long
matched_nodes = df_nodes_productgroup_and_subgroup[df_nodes_productgroup_and_subgroup['Node'].isin(sales_order_long['Product'])]

display(matched_nodes)

Unnamed: 0,Node,Group,Sub-Group
1,SOS005L04P,S,SOS


<br> <br> <br>

### product -> Production Facility

Products **produced at** Production facility <br>
Products **use raw materials** Production facility

In [80]:
matched_nodes = df_nodes_plant_and_storage[df_nodes_plant_and_storage['Node'].isin(sales_order_long['Product'])]

# display(matched_nodes)
display(matched_nodes.drop_duplicates())

Unnamed: 0,Node,Plant,Storage Location
10,SOS005L04P,2120,2030.0
21,SOS005L04P,2111,1130.0
32,SOS005L04P,2121,2130.0
66,SOS005L04P,2119,1930.0
82,SOS005L04P,2114,1430.0
88,SOS005L04P,2116,1630.0
182,SOS005L04P,2112,1230.0
185,SOS005L04P,2117,1730.0
250,SOS005L04P,2115,1530.0
332,SOS005L04P,2103,330.0


<br> <br> <br>

### product -> Storage Location

Products **stored at** Storage Location


In [84]:
df_nodes_plant_and_storage[df_nodes_plant_and_storage['Node'].isin(sales_order_long['Product'])].drop_duplicates()

Unnamed: 0,Node,Plant,Storage Location
10,SOS005L04P,2120,2030.0
21,SOS005L04P,2111,1130.0
32,SOS005L04P,2121,2130.0
66,SOS005L04P,2119,1930.0
82,SOS005L04P,2114,1430.0
88,SOS005L04P,2116,1630.0
182,SOS005L04P,2112,1230.0
185,SOS005L04P,2117,1730.0
250,SOS005L04P,2115,1530.0
332,SOS005L04P,2103,330.0


<br> <br> <br>

Production Facility -> Storage Location

Production Facility **sends to** Storage Location

In [104]:
sales_order_long = df_sales_order.melt(id_vars='Date', var_name='Product', value_name='Quantity').dropna()
display(sales_order_long)

Unnamed: 0,Date,Product,Quantity
0,2023-01-01,SOS008L02P,1355.0
1,2023-01-02,SOS008L02P,338.0
2,2023-01-03,SOS008L02P,226.0
3,2023-01-04,SOS008L02P,223.5
4,2023-01-05,SOS008L02P,233.0
...,...,...,...
9056,2023-08-05,EEA200G24P,0.0
9057,2023-08-06,EEA200G24P,0.0
9058,2023-08-07,EEA200G24P,0.0
9059,2023-08-08,EEA200G24P,0.0
