# Class TemporalGraph

In [1]:
from functools import lru_cache
import os
import json
import networkx as nx
class TemporalGraph:
    def __init__(self, files):
        """
        Initialize TemporalGraph with a list of JSON files representing graphs at different timestamps.
        """
        self.files = files  # List of JSON file paths

    @lru_cache(maxsize=10)  # Cache the last 10 accessed timestamps
    def load_graph_at_timestamp(self, timestamp):
        """
        Load the graph for a specific timestamp from JSON and convert it to a NetworkX graph.
        """
        with open(self.files[timestamp], 'r') as f:
            data = json.load(f)
        return self._json_to_graph(data)

    def _json_to_graph(self, data):
        """
        Convert JSON data to a NetworkX graph.
        """
        graph = nx.DiGraph() if data["directed"] else nx.Graph()
        
        # Add nodes
        
        for node_type, nodes in data["node_values"].items():
            for node in nodes:
                node_id = node[-1]  # Assuming the node ID is the last element in the list
                node_attributes = dict(zip(data["node_types"][node_type], node))
                graph.add_node(node_id, **node_attributes)  # Add the node with its attributes
            
                
        # Add edges
        all_edge_types = data["relationship_types"]

        for i in data["relationship_values"] :
            
            if i[0] in all_edge_types :
                
                attributes = {}
                for j in range(len(i)-2) :
                    key = all_edge_types[i[0]][j]
                    attributes[key] = i[j]

                graph.add_edge(i[-2],i[-1],**attributes)
            else :
                graph.add_edge(i[0],i[1])
    
        
        return graph



# Loading and Wrapper

In [2]:
import time  # Ensure this is imported properly
import tracemalloc
import functools

def time_and_memory(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Start tracking memory and time
        tracemalloc.start()
        start_time = time.time()  # Ensure time module is used correctly
        
        try:
            # Call the actual function
            result = func(*args, **kwargs)
        finally:
            # Calculate memory and time usage
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            end_time = time.time()
            elapsed_time = end_time - start_time

            # Print results
            print(f"Time taken by '{func.__name__}': {elapsed_time:.2f} seconds")
            print(f"Memory used by '{func.__name__}': {current / 1024:.2f} KiB (Current), {peak / 1024:.2f} KiB (Peak)")

        return result
    return wrapper

In [3]:
import glob
import re

# Natural sorting function
def natural_sort(files):
    # Extract numeric parts from filenames for sorting
    return sorted(files, key=lambda x: int(re.search(r'timestamp_(\d+)', x).group(1)))

# Get files and sort
files = glob.glob("data/supply_chain_export_1000/timestamp_*.json")
files = natural_sort(files)

# Initialize TemporalGraph
temporal_graph = TemporalGraph(files)
temporal_graph.files

['data/supply_chain_export_1000\\timestamp_0.json',
 'data/supply_chain_export_1000\\timestamp_1.json',
 'data/supply_chain_export_1000\\timestamp_2.json',
 'data/supply_chain_export_1000\\timestamp_3.json',
 'data/supply_chain_export_1000\\timestamp_4.json',
 'data/supply_chain_export_1000\\timestamp_5.json',
 'data/supply_chain_export_1000\\timestamp_6.json',
 'data/supply_chain_export_1000\\timestamp_7.json',
 'data/supply_chain_export_1000\\timestamp_8.json',
 'data/supply_chain_export_1000\\timestamp_9.json',
 'data/supply_chain_export_1000\\timestamp_10.json',
 'data/supply_chain_export_1000\\timestamp_11.json']

# Queries

### Identify nodes with higher operating cost

In [4]:
@time_and_memory
def query_high_operating_cost_nodes(temporal_graph,timestamp, threshold):
    G = temporal_graph.load_graph_at_timestamp(timestamp)
    high_cost_nodes = [
        node for node, attrs in G.nodes(data=True)
        if attrs.get("node_type") == "Facility" and attrs.get("operating_cost", 0) > threshold
    ]
    print(f"Nodes with operating cost greater than {threshold}: {high_cost_nodes}")
    return high_cost_nodes

In [5]:
threshold = 6000  
high_cost_nodes = query_high_operating_cost_nodes(temporal_graph,0, threshold)

Nodes with operating cost greater than 6000: ['F_001', 'F_002', 'F_003', 'F_005', 'F_010', 'F_012', 'F_014', 'F_018', 'F_019', 'F_020', 'F_021', 'F_026', 'F_031', 'F_036', 'F_037', 'F_038', 'F_041', 'F_048', 'F_049', 'F_050', 'F_053', 'F_054', 'F_059', 'F_061', 'F_062', 'F_063', 'F_064', 'F_065', 'F_066', 'F_068', 'F_070', 'F_072', 'F_075', 'F_077', 'F_082', 'F_083', 'F_086', 'F_088', 'F_089', 'F_091', 'F_092', 'F_094', 'F_097', 'F_102', 'F_103', 'F_105', 'F_109', 'F_115', 'F_121', 'F_122', 'F_123', 'F_129', 'F_131', 'F_132', 'F_134', 'F_138', 'F_139', 'F_141', 'F_143', 'F_144', 'F_145', 'F_147', 'F_148', 'F_149', 'F_150']
Time taken by 'query_high_operating_cost_nodes': 0.32 seconds
Memory used by 'query_high_operating_cost_nodes': 6878.77 KiB (Current), 8790.52 KiB (Peak)


### Profitable product offering