# Class TemporalGraph

In [1]:
from functools import lru_cache
import os
import json
import networkx as nx
class TemporalGraph:
    def __init__(self, files):
        """
        Initialize TemporalGraph with a list of JSON files representing graphs at different timestamps.
        """
        self.files = files  # List of JSON file paths

    @lru_cache(maxsize=10)  # Cache the last 10 accessed timestamps
    def load_graph_at_timestamp(self, timestamp):
        """
        Load the graph for a specific timestamp from JSON and convert it to a NetworkX graph.
        """
        with open(self.files[timestamp], 'r') as f:
            data = json.load(f)
        return self._json_to_graph(data)

    def _json_to_graph(self, data):
        """
        Convert JSON data to a NetworkX graph.
        """
        graph = nx.DiGraph() if data["directed"] else nx.Graph()
        
        # Add nodes
        
        for node_type, nodes in data["node_values"].items():
            for node in nodes:
                node_id = node[-1]  # Assuming the node ID is the last element in the list
                node_attributes = dict(zip(data["node_types"][node_type], node))
                graph.add_node(node_id, **node_attributes)  # Add the node with its attributes
            
                
        # Add edges
        all_edge_types = data["relationship_types"]

        for i in data["relationship_values"] :
            
            if i[0] in all_edge_types :
                
                attributes = {}
                for j in range(len(i)-2) :
                    key = all_edge_types[i[0]][j]
                    attributes[key] = i[j]

                graph.add_edge(i[-2],i[-1],**attributes)
            else :
                graph.add_edge(i[0],i[1])
    
        
        return graph



# Loading and Wrapper

In [3]:
import time  # Ensure this is imported properly
import tracemalloc
import functools

def time_and_memory(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Start tracking memory and time
        tracemalloc.start()
        start_time = time.time()  # Ensure time module is used correctly
        
        try:
            # Call the actual function
            result = func(*args, **kwargs)
        finally:
            # Calculate memory and time usage
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            end_time = time.time()
            elapsed_time = end_time - start_time

            # Print results
            print(f"Time taken by '{func.__name__}': {elapsed_time:.2f} seconds")
            print(f"Memory used by '{func.__name__}': {current / 1024:.2f} KiB (Current), {peak / 1024:.2f} KiB (Peak)")

        return result
    return wrapper

In [4]:
import glob
import re

# Natural sorting function
def natural_sort(files):
    # Extract numeric parts from filenames for sorting
    return sorted(files, key=lambda x: int(re.search(r'timestamp_(\d+)', x).group(1)))

# Get files and sort
files = glob.glob("data/supply_chain_export_1000/timestamp_*.json")
files = natural_sort(files)

# Initialize TemporalGraph
temporal_graph = TemporalGraph(files)
temporal_graph.files

['data/supply_chain_export_1000\\timestamp_0.json',
 'data/supply_chain_export_1000\\timestamp_1.json',
 'data/supply_chain_export_1000\\timestamp_2.json',
 'data/supply_chain_export_1000\\timestamp_3.json',
 'data/supply_chain_export_1000\\timestamp_4.json',
 'data/supply_chain_export_1000\\timestamp_5.json',
 'data/supply_chain_export_1000\\timestamp_6.json',
 'data/supply_chain_export_1000\\timestamp_7.json',
 'data/supply_chain_export_1000\\timestamp_8.json',
 'data/supply_chain_export_1000\\timestamp_9.json',
 'data/supply_chain_export_1000\\timestamp_10.json',
 'data/supply_chain_export_1000\\timestamp_11.json']

# Queries

### Valid Products in a given date range

In [5]:
import json
from datetime import datetime

# Assuming the TemporalGraph class has a method `load_graph_at_timestamp` for loading graphs at specific timestamps.

@time_and_memory
def query_valid_parts_nx(temporal_graph, timestamp, start_date: str, end_date: str):
    """
    Retrieve valid parts based on their validity period from the NetworkX graph at a specific timestamp.

    Parameters:
        temporal_graph (TemporalGraph): The TemporalGraph object.
        timestamp (str): The timestamp for which to retrieve the graph.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.

    Returns:
        list: List of node IDs representing valid parts within the specified date range.
    """
    # Convert start and end dates to datetime objects
    try:
        start_date = datetime.strptime(start_date, "%Y-%m-%d")
        end_date = datetime.strptime(end_date, "%Y-%m-%d")
    except ValueError as e:
        print(f"Error parsing dates: {e}")
        return []

    # Load the graph at the given timestamp
    graph = temporal_graph.load_graph_at_timestamp(timestamp)
    
    # List to store valid parts (node IDs)
    valid_parts = []
    
    # Iterate through the nodes in the graph
    for node, attributes in graph.nodes(data=True):
        # Extract valid_from and valid_till, with default empty string if not present
        valid_from_str = attributes.get('valid_from', '')
        valid_till_str = attributes.get('valid_till', '')
        
        # Only process valid nodes with valid dates
        if valid_from_str and valid_till_str:
            try:
                valid_from = datetime.strptime(valid_from_str, "%Y-%m-%d")
                valid_till = datetime.strptime(valid_till_str, "%Y-%m-%d")
                
                # Check if the node is valid within the given date range
                if valid_from <= end_date and valid_till >= start_date:
                    valid_parts.append(node)
            except ValueError:
                # Handle any invalid date format gracefully
                print(f"Skipping node {node} due to invalid date format.")
                continue
    
    return valid_parts


@time_and_memory
def query_valid_parts_json(temporal_graph, timestamp: int, start_date: str, end_date: str):
    """
    Retrieve valid parts based on their validity period from a JSON-like structure at a specific timestamp.

    Parameters:
        temporal_graph (TemporalGraph): The TemporalGraph object.
        timestamp (int): The timestamp (as an integer) for which to retrieve the JSON data.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.

    Returns:
        list: List of node IDs representing valid parts within the specified date range.
    """
    # Convert start and end dates to datetime objects
    try:
        start_date = datetime.strptime(start_date, "%Y-%m-%d")
        end_date = datetime.strptime(end_date, "%Y-%m-%d")
    except ValueError as e:
        print(f"Error parsing dates: {e}")
        return []

    # Load the JSON data at the given timestamp
    with open(temporal_graph.files[timestamp], 'r') as f:
        data = json.load(f)
    
    # List to store valid parts (node IDs)
    valid_parts = []

    # Access the node values from the data
    node_values = data.get("node_values", {}).get("Parts", [])
    
    # Iterate over nodes to check validity dates
    for node in node_values:
        # Extract valid_from and valid_till
        try:
            valid_from = datetime.strptime(node[6], "%Y-%m-%d")  # Assuming 'valid_from' is at index 4
            valid_till = datetime.strptime(node[7], "%Y-%m-%d")  # Assuming 'valid_till' is at index 5
        except ValueError:
            print(f"Skipping node due to invalid date format: {node}")
            continue
        
        # Extract node ID (last element in node list)
        node_id = node[-1]

        # If the node is valid within the given range, add it to the list
        if valid_from <= end_date and valid_till >= start_date:
            valid_parts.append(node_id)

    return valid_parts

In [6]:
valid_parts_nx = query_valid_parts_nx(temporal_graph,1, start_date="2026-12-10", end_date="2027-01-17")
print(f"Valid Parts from NetworkX: {valid_parts_nx}")

Time taken by 'query_valid_parts_nx': 0.51 seconds
Memory used by 'query_valid_parts_nx': 6877.46 KiB (Current), 8792.09 KiB (Peak)
Valid Parts from NetworkX: ['P_001', 'P_038', 'P_046', 'P_092', 'P_128', 'P_130', 'P_139', 'P_151', 'P_153', 'P_162', 'P_189', 'P_195', 'P_232', 'P_233', 'P_241', 'P_260', 'P_389', 'P_436', 'P_441', 'P_469', 'P_477', 'P_486']


In [7]:
# Example for querying valid parts from JSON data
valid_parts_json = query_valid_parts_json(temporal_graph, timestamp=0, start_date="2026-10-17", end_date="2026-12-17")
print(f"Valid Parts from JSON: {valid_parts_json}")

Time taken by 'query_valid_parts_json': 0.23 seconds
Memory used by 'query_valid_parts_json': 9.62 KiB (Current), 7092.63 KiB (Peak)
Valid Parts from JSON: ['P_001', 'P_002', 'P_008', 'P_013', 'P_014', 'P_038', 'P_043', 'P_045', 'P_046', 'P_078', 'P_080', 'P_092', 'P_097', 'P_127', 'P_128', 'P_130', 'P_138', 'P_139', 'P_151', 'P_152', 'P_153', 'P_162', 'P_163', 'P_189', 'P_195', 'P_210', 'P_215', 'P_220', 'P_232', 'P_233', 'P_240', 'P_241', 'P_254', 'P_260', 'P_261', 'P_267', 'P_281', 'P_288', 'P_300', 'P_306', 'P_323', 'P_326', 'P_327', 'P_328', 'P_329', 'P_330', 'P_333', 'P_334', 'P_336', 'P_337', 'P_355', 'P_366', 'P_372', 'P_373', 'P_389', 'P_396', 'P_400', 'P_422', 'P_425', 'P_436', 'P_441', 'P_454', 'P_465', 'P_469', 'P_477', 'P_478', 'P_481', 'P_486', 'P_488']


### N most frequent subtype in parts

In [8]:
from collections import Counter
import json

@time_and_memory
def query_most_common_subtypes_json(temporal_graph, timestamp: int, n: int):
    """
    Retrieve the n most common subtypes from the JSON-like structure at a specific timestamp.

    Parameters:
        temporal_graph (TemporalGraph): The TemporalGraph object.
        timestamp (int): The timestamp (as an integer) for which to retrieve the JSON data.
        n (int): The number of most common subtypes to return.

    Returns:
        list: List of tuples with the subtype and its occurrence count.
    """
    # Load the JSON data at the given timestamp
    with open(temporal_graph.files[timestamp], 'r') as f:
        data = json.load(f)
    
    # List to store subtypes
    subtypes = []

    # Access the node values from the data
    node_values = data.get("node_values", {}).get("Parts", [])
    
    # Iterate over nodes and extract the subtypes (index 3 is the position of 'subtype' in the data schema)
    for node in node_values:
        subtypes.append(node[3])

    # Use Counter to count occurrences of each subtype
    subtype_counts = Counter(subtypes)

    # Get the n most common subtypes
    most_common_subtypes = subtype_counts.most_common(n)

    return most_common_subtypes

from collections import Counter

@time_and_memory
def query_most_common_subtypes_nx(temporal_graph, timestamp: int, n: int):
    """
    Retrieve the n most common subtypes from the NetworkX graph at a specific timestamp.

    Parameters:
        temporal_graph (TemporalGraph): The TemporalGraph object.
        timestamp (int): The timestamp (as an integer) for which to retrieve the graph.
        n (int): The number of most common subtypes to return.

    Returns:
        list: List of tuples with the subtype and its occurrence count.
    """
    # Load the graph at the given timestamp
    graph = temporal_graph.load_graph_at_timestamp(timestamp)

    # List to store subtypes
    subtypes = []

    # Iterate through the nodes in the graph and extract the 'subtype' from the node attributes
    for node, attributes in graph.nodes(data=True):
        subtype = attributes.get('subtype', None)
        if subtype:
            subtypes.append(subtype)

    # Use Counter to count occurrences of each subtype
    subtype_counts = Counter(subtypes)

    # Get the n most common subtypes
    most_common_subtypes = subtype_counts.most_common(n)

    return most_common_subtypes



In [9]:
# the most common subtypes for a part from JSON data
most_common_subtypes_json = query_most_common_subtypes_json(temporal_graph, timestamp=0, n=5)
print(f"Most Common Subtypes from JSON: {most_common_subtypes_json}")

# Example for querying the most common subtypes from NetworkX graph
most_common_subtypes_nx = query_most_common_subtypes_nx(temporal_graph, timestamp=0, n=5)
print(f"Most Common Subtypes from NetworkX: {most_common_subtypes_nx}")


Time taken by 'query_most_common_subtypes_json': 0.20 seconds
Memory used by 'query_most_common_subtypes_json': 11.42 KiB (Current), 7091.17 KiB (Peak)
Most Common Subtypes from JSON: [('plastic_component', 72), ('metal_rod', 65), ('electronic_component', 59), ('metal_sheet', 59), ('chemical', 45)]
Time taken by 'query_most_common_subtypes_nx': 0.43 seconds
Memory used by 'query_most_common_subtypes_nx': 6873.45 KiB (Current), 8787.30 KiB (Peak)
Most Common Subtypes from NetworkX: [('plastic_component', 72), ('metal_rod', 65), ('electronic_component', 59), ('metal_sheet', 59), ('chemical', 45)]


### Bottleneck Analysis Based on Part Attributes

In [10]:
from datetime import datetime
@time_and_memory
def bottleneck_parts_temporal(tg, timestamp, importance_threshold, expected_life_threshold):
    """
    Perform bottleneck analysis for parts based on their attributes at a specific timestamp in a temporal graph.

    Parameters:
        tg (TemporalGraph): The temporal graph object.
        timestamp (int): The specific timestamp to analyze.
        importance_threshold (float): The minimum importance factor to consider.
        expected_life_threshold (float): The maximum expected life (in days) to consider.

    Returns:
        list: A list of bottleneck parts, including part ID, importance factor, and expected life in days.
    """
    # Load the graph for the specified timestamp
    graph = tg.load_graph_at_timestamp(timestamp)
    
    bottlenecks = []
    
    # Iterate through nodes to find parts
    for node, data in graph.nodes(data=True):
        if data.get("node_type") == "Parts":
            importance = data.get("importance_factor", 0)
            valid_from = data.get("valid_from", "1970-01-01")
            valid_till = data.get("valid_till", "9999-12-31")

            # Parse valid_from and valid_till as datetime objects
            try:
                valid_from_date = datetime.strptime(valid_from, "%Y-%m-%d")
                valid_till_date = datetime.strptime(valid_till, "%Y-%m-%d")
                expected_life = (valid_till_date - valid_from_date).days
            except ValueError:
                expected_life = float('inf')  # Handle invalid dates gracefully

            # Check if part qualifies as a bottleneck
            if importance >= importance_threshold and expected_life <= expected_life_threshold:
                bottlenecks.append((node, importance, expected_life))
    
    return bottlenecks


In [11]:
# Assuming the TemporalGraph object is initialized
timestamp = 3
importance_threshold = 0.9
expected_life_threshold = 400  # Days

bottlenecks = bottleneck_parts_temporal(temporal_graph, timestamp, importance_threshold, expected_life_threshold)



Time taken by 'bottleneck_parts_temporal': 0.53 seconds
Memory used by 'bottleneck_parts_temporal': 6875.78 KiB (Current), 8790.42 KiB (Peak)


In [12]:
print("Bottleneck Parts:")
for part in bottlenecks:
    print(f"Part ID: {part[0]}, Importance: {part[1]:.2f}, Expected Life: {part[2]} days")


Bottleneck Parts:
Part ID: P_256, Importance: 0.99, Expected Life: 360 days
Part ID: P_377, Importance: 0.91, Expected Life: 390 days


### Suppliers to a Part

In [13]:
@time_and_memory
def query_suppliers_for_part_via_warehouse(temporal_graph,timestamp,part_id):

    G = temporal_graph.load_graph_at_timestamp(timestamp)
    warehouses_with_part = [
        neighbor for neighbor in G.neighbors(part_id)
        if G[part_id][neighbor].get("relationship_type") == "WarehouseToParts"
    ]

    suppliers = set()
    for warehouse in warehouses_with_part:
        for supplier in G.neighbors(warehouse):
            if G[warehouse][supplier].get("relationship_type") == "SupplierToWarehouse":
                suppliers.add(supplier)

    return list(suppliers)


In [14]:
part_id = 'P_001'  # Replace with actual part ID
result = query_suppliers_for_part_via_warehouse(temporal_graph,0, part_id)
print(f"Suppliers for part {part_id}: {result}")

Time taken by 'query_suppliers_for_part_via_warehouse': 0.00 seconds
Memory used by 'query_suppliers_for_part_via_warehouse': 0.59 KiB (Current), 0.86 KiB (Peak)
Suppliers for part P_001: []


### Parts transported to the facility over long distances with high transportation costs.

In [None]:
# Query 5: Distance Impact on Costs
def distance_impact_on_costs(temporal_graph,timestamp, distance_threshold, transport_cost_threshold):
    results = []
    G = temporal_graph.load_graph_at_timestamp(timestamp)
    for u, v, data in G.edges(data=True):

        if data.get("key") == "PartsToFacility":
            distance = data.get("distance_from_warehouse", 0)
            transport_cost = data.get("transport_cost", 0)

            if distance < distance_threshold and transport_cost <= transport_cost_threshold:
                results.append((u, distance, transport_cost))

    return sorted(results, key=lambda x: x[2], reverse=True)

In [None]:
distance_cost_impact = distance_impact_on_costs(temporal_graph,0, distance_threshold=45,transport_cost_threshold=300)
print("Parts transported over large distances:")
for id,distance,trans in distance_cost_impact :
    print("Part ID:", id, ",Distance:", distance, ",Transport Cost:",trans)

### Next