## Viz Categorical with Periodogram

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import lightgbm as lgb
from statsmodels.tsa.deterministic import (CalendarFourier,
                                           CalendarSeasonality,
                                           CalendarTimeTrend,
                                           DeterministicProcess)
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tqdm import tqdm

In [3]:
DIRECTORY = './data/'
calendar = pd.read_csv(DIRECTORY + 'calendar.csv', parse_dates=['date'])
inventory = pd.read_csv(DIRECTORY + 'inventory.csv')
test = pd.read_csv(DIRECTORY + 'sales_test.csv', parse_dates=['date'])
train = pd.read_csv(DIRECTORY + 'sales_train.csv', parse_dates=['date'])
solution = pd.read_csv(DIRECTORY + 'solution.csv')
test_weights = pd.read_csv(DIRECTORY + 'test_weights.csv')

In [7]:
inventory['name_first'] = inventory['name'].str.split('_').str[0]
inventory.head()

Unnamed: 0,unique_id,product_unique_id,name,L1_category_name_en,L2_category_name_en,L3_category_name_en,L4_category_name_en,warehouse,name_first
0,5255,2583,Pastry_196,Bakery,Bakery_L2_14,Bakery_L3_26,Bakery_L4_1,Prague_3,Pastry
1,4948,2426,Herb_19,Fruit and vegetable,Fruit and vegetable_L2_30,Fruit and vegetable_L3_86,Fruit and vegetable_L4_1,Prague_3,Herb
2,2146,1079,Beet_2,Fruit and vegetable,Fruit and vegetable_L2_3,Fruit and vegetable_L3_65,Fruit and vegetable_L4_34,Prague_1,Beet
3,501,260,Chicken_13,Meat and fish,Meat and fish_L2_13,Meat and fish_L3_27,Meat and fish_L4_5,Prague_1,Chicken
4,4461,2197,Chicory_1,Fruit and vegetable,Fruit and vegetable_L2_17,Fruit and vegetable_L3_33,Fruit and vegetable_L4_1,Frankfurt_1,Chicory


In [None]:
# Anytree
from anytree import Node, RenderTree
from anytree.exporter import DotExporter

root = Node("Products")
nodes = {"root": root}

for i, row in inventory[['L1_category_name_en', 'L2_category_name_en', 'L3_category_name_en', 'L4_category_name_en']].drop_duplicates().iterrows():
    level1 = row['L1_category_name_en']
    level2 = row['L2_category_name_en']
    level3 = row['L3_category_name_en']
    level4 = row['L4_category_name_en']

    if level1 not in nodes:
        nodes[level1] = Node(level1, parent=root)
    if level2 not in nodes:
        nodes[level2] = Node(level2, parent=nodes[level1])
    if level3 not in nodes:
        nodes[level3] = Node(level3, parent=nodes[level2])
    if level4 not in nodes:
        nodes[level4] = Node(level4, parent=nodes[level3])
        
for pre, _, node in RenderTree(root):
    print(f"{pre}{node.name}")

Products
├── Bakery
│   └── Bakery_L2_14
│       └── Bakery_L3_26
│           └── Bakery_L4_1
├── Fruit and vegetable
│   └── Fruit and vegetable_L2_30
│       └── Fruit and vegetable_L3_86
│           └── Fruit and vegetable_L4_1
├── Fruit and vegetable
│   └── Fruit and vegetable_L2_3
│       └── Fruit and vegetable_L3_65
│           └── Fruit and vegetable_L4_34
├── Meat and fish
│   └── Meat and fish_L2_13
│       └── Meat and fish_L3_27
│           └── Meat and fish_L4_5
├── Fruit and vegetable
│   └── Fruit and vegetable_L2_17
│       └── Fruit and vegetable_L3_33
│           └── Fruit and vegetable_L4_1
├── Bakery
│   └── Bakery_L2_18
│       └── Bakery_L3_77
│           └── Bakery_L4_1
├── Meat and fish
│   └── Meat and fish_L2_25
│       └── Meat and fish_L3_125
│           └── Meat and fish_L4_50
├── Meat and fish
│   └── Meat and fish_L2_13
│       └── Meat and fish_L3_32
│           └── Meat and fish_L4_7
├── Fruit and vegetable
│   └── Fruit and vegetable_L2_1
│       └── 

In [25]:
# Plotly Treemap
# Very good for hierarchical data

import plotly.express as px
# Ensure the data reflects the hierarchy (dropping duplicates if necessary)
df_hierarchy = inventory[['L1_category_name_en', 'L2_category_name_en', 'L3_category_name_en', 'L4_category_name_en']]

# Create an interactive treemap
fig = px.treemap(
    df_hierarchy,
    path=['L1_category_name_en', 'L2_category_name_en', 'L3_category_name_en', 'L4_category_name_en'],
    title="Product Categories Hierarchy"
)
fig.show()

In [24]:
inventory[['L1_category_name_en', 'L2_category_name_en', 'L3_category_name_en', 'L4_category_name_en']].nunique()

L1_category_name_en      3
L2_category_name_en     47
L3_category_name_en    177
L4_category_name_en     68
dtype: int64