In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [None]:
G = pickle.load(open('graphs/graph.pkl','rb'))

Edge Features

1) Product Group
2) Product Sub-Group
3) Plant
4) Storage Location

Node Features

1) Production
2) Factory Issue
3) Sales Order
4) Delivery to distributor

- 4 groups
- 12 subgroups
- 25 plants
- 13 storage locations

<h4>Product Groups and Sub-Groups<h4/>

In [None]:
product_groups = pd.read_csv('Raw Dataset/Homogenoeus/Nodes/Node Types (Product Group and Subgroup).csv')
product_groups.drop_duplicates(inplace=True)
product_groups.head()
removed_products = ['EEA200G24P','EEA500G12P','MAC1K25P','MAP1K25P','MAPA1K24P','ATPA1K24P','ATPPCH5X5K','POP015K',
                     'SO0005L04P','SO0002L09P',	'SO0001L12P','SO0500M24P']
product_groups = product_groups[[False if n in removed_products else True for n in product_groups['Node']]]

In [None]:
sns.barplot(product_groups['Group'].value_counts(),errorbar=('ci', False))
plt.xlabel('Group Code');

In [None]:
sns.histplot(product_groups,x='Sub-Group',hue='Group',)
plt.xticks(rotation=90);

<h4>Edge Demographics</h4>

In [None]:
from matplotlib_venn import venn3
group_set = set([i for i,t in enumerate(G.edges(data=True))if t[2]['GroupCode'] > 0] )
subgroup_set = set([i for i,t in enumerate(G.edges(data=True))if t[2]['SubGroupCode'] > 0] )
plant_set = set([i for i,t in enumerate(G.edges(data=True))if t[2]['Plant'] > 0] )
storage_set = set([i for i,t in enumerate(G.edges(data=True))if t[2]['Storage Location'] > 0] )

venn3([group_set,plant_set,storage_set],['Group','Plant','Storage']);

- This venn diagram represents all 374 edges.
- every edge except one includes atleast one shared storage location. Surprisingly, that one edge is between products of the same group

In [None]:
edges_df = pd.DataFrame([
    [d['GroupCode'] for u,v,d in G.edges(data=True)],
    [d['Plant'] for u,v,d in G.edges(data=True)],
    [d['Storage Location'] for u,v,d in G.edges(data=True)],
],index=['GroupCode','Plant','Storage Location']).T

In [None]:
sns.histplot(data=edges_df[edges_df['Plant'] > 0],x='Plant',hue='GroupCode',multiple='stack')

plt.title('Edges with atleast one shared Plant');

In [None]:
sns.histplot(data=edges_df[edges_df['Storage Location'] > 0],x='Storage Location',hue='GroupCode',multiple='stack')

plt.title('Edges with atleast one shared Storage Location');

In [None]:
sns.scatterplot(data=edges_df,x='Plant',y='Storage Location',hue='GroupCode');

- Products that go to the same plants also go to the same storage locations which makes sense.
- It also looks like Storage Locations take products from different plants. some product(s) even share 12 storage locations without sharing a plant or group

<h4>Node Connectedness

In [None]:
degree_cents = nx.degree_centrality(G)

sns.histplot(list(degree_cents.values()))
plt.xlabel('degree centrality');

In [None]:
bet_cent = nx.betweenness_centrality(G)
sns.histplot(list(bet_cent.values()))
plt.xlabel('betweeness centrality');

The betweenness centrality is generally low, and degree centrality generally high, because almost every possible edge already exists. 28 choose 2 is 378 and there are 374 edges

<h4/>Plants and Storage Locations<h4/>

In [None]:
color_mapping = {     
    'S':'blue',
    'P':'orange',
    'A':'green',
    'M':'red',
    'E':'purple'#
}

for n in G.nodes():
    G.nodes.data()[n]['color'] = color_mapping[G.nodes.data()[n]['GroupCode']]

In [None]:
G_plants = nx.Graph()
G_plants.add_nodes_from(G.nodes(data=True))
G_plants.add_edges_from([(node1,node2,md) for node1,node2,md in G.edges(data=True) if md['Plant'] > 0])

In [None]:
G_storage = nx.Graph()
G_storage.add_nodes_from(G.nodes(data=True))
G_storage.add_edges_from([(node1,node2,md) for node1,node2,md in G.edges(data=True) if md['Storage Location'] > 11])
G_storage.remove_nodes_from([n for n in G.nodes() if G_storage.degree(n) == 0])

In [None]:
node_colors = nx.get_node_attributes(G_plants, "color").values()
colors = list(node_colors)

nx.draw(G_plants,node_color=colors)
plt.title('Edges with atleast one plant');

# product groups legend 
#     S: blue
#     P: orange
#     A: green
#     M: red

- It seems like like that groups M and A share plants while groups S and P share plants

In [None]:
node_colors = nx.get_node_attributes(G_storage, "color").values()
colors = list(node_colors)

nx.draw(G_storage,node_color=colors)
plt.title('Edges with atleast eight shared storage locations');

# product groups legend 
#     S: blue
#     P: orange
#     A: green
#     M: red


<h4>Forecasting

In [None]:
d2d = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Delivery To distributor.csv').drop(removed_products,axis=1)
factory_issue = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Factory Issue.csv').drop(removed_products,axis=1)
production = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Production .csv').drop(removed_products,axis=1)
sales_orders = pd.read_csv('Raw Dataset/Homogenoeus/Temporal Data/Weight/Sales Order .csv').drop(removed_products,axis=1)

In [None]:
d2d.describe()

In [None]:
sns.heatmap(d2d.drop('Date',axis=1).corr(),vmin=-1,vmax=1)

In [None]:
sns.heatmap(production.drop('Date',axis=1).corr(),vmin=-1,vmax=1)

In [None]:
sns.heatmap(sales_orders.drop('Date',axis=1).corr(),vmin=-1,vmax=1)

In [None]:
sns.heatmap(factory_issue.drop('Date',axis=1).corr(),vmin=-1,vmax=1) #the clear line  product had no factory issues