<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#-Forecasting-Demand-for-Optimized-Inventory-Planning-" data-toc-modified-id="-Forecasting-Demand-for-Optimized-Inventory-Planning--1"><center> Forecasting Demand for Optimized Inventory Planning </center></a></span><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1.1">Imports</a></span></li></ul></li><li><span><a href="#Notes" data-toc-modified-id="Notes-2">Notes</a></span></li></ul></div>

<h1><center> Forecasting Demand for Optimized Inventory Planning </center></h1>


## Imports

In [1]:
import itertools

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 50)

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
import seaborn as sns
sns.set()

The 2 cells below can be skipped if not wanted

In [2]:
%%javascript
$('#menubar').toggle();

<IPython.core.display.Javascript object>

In [3]:
# This is optional and can be skipped if not installed
%load_ext autoreload
%autoreload 2

In [4]:
import sys
sys.path.append("../main")

from utils import read_data, process_time, merge_data

In [5]:
infos, items, orders = read_data()

In [6]:
df = merge_data(orders, items, infos)
df.head()

Unnamed: 0,time,transactID,itemID,order,salesPrice,brand,manufacturer,customerRating,category1,category2,category3,recommendedRetailPrice,simulationPrice,promotion
0,2018-01-01 00:01:56,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
1,2018-01-01 00:09:24,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
2,2018-01-01 00:51:59,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
3,2018-01-01 00:56:54,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
4,2018-01-15 17:48:57,78321,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,


In [7]:
df.head()

Unnamed: 0,time,transactID,itemID,order,salesPrice,brand,manufacturer,customerRating,category1,category2,category3,recommendedRetailPrice,simulationPrice,promotion
0,2018-01-01 00:01:56,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
1,2018-01-01 00:09:24,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
2,2018-01-01 00:51:59,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
3,2018-01-01 00:56:54,2278968,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,
4,2018-01-15 17:48:57,78321,450,1,17.42,0,7,0.0,1,2,1,27.0,17.42,


In [8]:
for idx in range(1, 4):
    print(f"cat {idx} nunique = ", df[f"category{idx}"].nunique())

cat 1 nunique =  8
cat 2 nunique =  52
cat 3 nunique =  8


In [9]:
def quick_analysis(col, other_col):
    """Check if a hierarchical structure works given two cols in our DF
    The structure checked is if other_col could be a child column of col.
    
    We check by making sure we never get repeated values of other_col in
        different unique values of col.
    """
    # Use pd Series so I can use "isin" because im used to it
    vals = pd.Series()
    for value in df[col].unique():
        new_vals = df.query("{} == @value".format(col))[other_col].unique()
        if vals.isin(new_vals).any():
            print(f"Column {other_col} not a subset of column {col}")
            break
        vals = vals.append(pd.Series(new_vals))
    else: 
        print(f"We've found it! {other_col} IS a subset of column {col}")

In [10]:
cats = ["category1", "category2", "category3"]
for col, other_col in itertools.permutations(cats, 2):
    quick_analysis(col, other_col)

  if __name__ == '__main__':


We've found it! category2 IS a subset of column category1
Column category3 not a subset of column category1
Column category1 not a subset of column category2
Column category3 not a subset of column category2
Column category1 not a subset of column category3
Column category2 not a subset of column category3


# Notes

Cool! so Category2 is hierarchical inside category1 - Let's view the hierarchy!

In [11]:
!python3 -m pip install pyvis --user --quiet

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [12]:
import networkx as nx

# https://pyvis.readthedocs.io/en/latest/tutorial.html
from pyvis.network import Network

In [13]:
def my_plot(df, testing=False):
    if testing:
        df = df.sample(n=10)
    net = Network()
    counts = df.groupby(["category1", "category2"], as_index=False).size()
    cat1_counts = df.groupby("category1").size()
    cat2_counts = df.groupby("category2").size()
    
    print("Amount of edges", len(counts), "\n")

    for (node1, node2), count in counts.iteritems():
        # We convert everything to int since np.int doesn't play
        #     well the network library
        node1_name = f"cat1-{node1}"
        node2_name = f"cat2-{node2}"
        
        # We don't add size cos this fucks ups the view
        net.add_node(node1_name, label="-", 
                     title=f"{node1_name}, size={cat1_counts.loc[node1]}",
                     #size=int(cat1_counts.loc[node1])
                    )
        net.add_node(node2_name, label="-",
                     title=f"{node2_name}, size={cat2_counts.loc[node2]}",
                    )
        net.add_edge(node1_name, node2_name, value=int(count))
    return net

In [14]:
net = my_plot(df)
net.show("1.2_graph.html")

Amount of edges 52 

