In [None]:
import pandas as pd
import networkx as nx

<h3>Graph Structure</h3>

<h4> Nodes </h4>

represent a product

Node features:
* *production* - quantifies product output considering sales orders, customer demand, vehicle fill rate, and delivery urgency.
* *sales order* - signifies distributor-requested quantities, pending approval from the accounts department.
* *delivered* - denotes products dispatched to distributors.
* *factory issue* - covers total products with issues shipped from manufacturing facilities, with some going to distributors and the rest to storage warehouses.



<h4> Edges </h4>

represent relations between products

Edge features:
* *GroupCode* :         1 or 0, if products are in the same product group
* *SubGroupCode* :      1 or 0, if products are in the same product sub-group
* *Plant* :             Number of plants in common
* *Storage Location* :  Number of Storage Locations in common

In [None]:
G = nx.Graph()

In [None]:
nodes = pd.read_csv('Raw Dataset/Homogenoeus/Nodes/Node Types (Product Group and Subgroup).csv')

nodes.head()

In [None]:
G.add_nodes_from(nodes['Node'])

In [None]:
def add_group_edges(path):
    """adds edges and edge features for groupcode and subgroupcode""" 
    df = pd.read_csv(path)
    edge_type = [col for col in df.columns if col[:4] != 'node'][0]
    md = [{edge_type:1} for product in df[edge_type]]
    G.add_edges_from(zip(df['node1'],df['node2'],md))

In [None]:
def add_other_edges(path):
    """adds edges and edge features for Plants and Storage Locations"""
    df = pd.read_csv(path)
    edge_type = [col for col in df.columns if col[:4] != 'node'][0]
    node1 = df['node1'].to_list()
    node2 = df['node2'].to_list()
    pairs = [set(t) for t in zip(node1,node2)]
    distinct_pairs = []
    pair_counts = []

    while len(pairs) > 0:
        p = pairs[0]
        distinct_pairs.append(p)
        n = pairs.count(p)
        pair_counts.append(n)

        for i in range(n):
            pairs.remove(p)
    
    edges = [tuple(s) + ({edge_type:pair_counts[i]},) for i,s in enumerate(distinct_pairs)]
    G.add_edges_from(edges)



In [None]:
add_other_edges('Raw Dataset/Homogenoeus/Edges/Edges (Plant).csv')
add_group_edges('Raw Dataset/Homogenoeus/Edges/Edges (Product Group).csv')
add_group_edges('Raw Dataset/Homogenoeus/Edges/Edges (Product Sub-Group).csv')
add_other_edges('Raw Dataset/Homogenoeus/Edges/Edges (Storage Location).csv')

In [None]:
#fill in attributes on edges where its missing

for node1, node2 in G.edges:
    edge_keys = G[node1][node2].keys()
    for edge_type in ['Plant','GroupCode','SubGroupCode','Storage Location']:
        if not edge_type in edge_keys:
            G[node1][node2][edge_type] = 0


In [None]:
nx.draw(G)

In [None]:
d2d = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Delivery To distributor.csv')
factory_issue = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Factory Issue.csv')
production = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Production .csv')
sales_orders = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Sales Order .csv')

In [None]:
sales_orders.tail()

noticed a lot of zeros so I'm going to check on the proportion of zeros for all the products

In [None]:
temporal = [d2d,factory_issue,production,sales_orders]
names = ['delivery','factory issue','production','sales']
inspect = {

}

for i,feat in enumerate(temporal):
    zeros = [len(feat[feat[col] == 0])/len(feat) for col in nodes['Node']]
    inspect[names[i]] = zeros

In [None]:
pd.DataFrame(inspect,index=nodes['Node'])

I removed 12 products as they had almost all zero values  

In [None]:
G.remove_nodes_from(['EEA200G24P','EEA500G12P','MAC1K25P','MAP1K25P','MAPA1K24P','ATPA1K24P','ATPPCH5X5K','POP015K',
                     'SO0005L04P','SO0002L09P',	'SO0001L12P','SO0500M24P'])

In [None]:
#adding temporal data as node features

for node in G.nodes: 
    G.nodes[node]['delivered'] = d2d[node].to_numpy()
    G.nodes[node]['factory issue'] = factory_issue[node].to_numpy()
    G.nodes[node]['sales_order'] = sales_orders[node].to_numpy()
    G.nodes[node]['production'] = production[node].to_numpy()
    G.nodes[node]['GroupCode'] = nodes[nodes['Node'] == node]['Group'].iloc[0] #feature added for exploratory purposes

In [None]:
len(G.nodes)

In [None]:
len(G.edges)

In [None]:
import pickle

pickle.dump(G,open('graphs/graph.pkl','wb')) #Collab and collab pro
                                             #