In [2]:
import pandas as pd
import numpy as np
import os

from collections import defaultdict

In [3]:
dir_data = "./data"
data_file = "raspberry_praline_composition.csv"

df = raw_data = pd.read_csv(os.path.join(dir_data, data_file))
print(raw_data.columns)
raw_data

Index(['Component', 'Qty', 'Level'], dtype='object')


Unnamed: 0,Component,Qty,Level
0,Chocolate Praline,100.0,1
1,Dark Chocolate 55% Cocoa,55.0,2
2,Cocoa Liquor,22.0,3
3,Cocoa Butter,5.5,3
4,Sugar,24.2,3
5,Refined Cane Sugar,24.19725,4
6,Anti-Caking Agent,0.00275,4
7,Alkalised Cocoa Powder,2.75,3
8,Cocoa Powder,2.73625,4
9,Potassium Carbonate,0.01375,4


In [4]:
params = {"ing": "Component", "qty": "Qty", "lvl": "Level"}

def default_dict(params):
    return {key:[] for key in params.keys()}

In [5]:
def default_dict(params):
    return {key:[] for key in params.keys()}

rec_dict = default_dict(params)

rec_dict

{'ing': [], 'qty': [], 'lvl': []}

In [6]:
from collections import OrderedDict

In [7]:
from collections import defaultdict
import numpy as np
import pandas as pd

class Recipe:

    def __init__(self, df: pd.DataFrame, params: dict = None, exclude_highest_level = "auto", name: str = None):
        
        self.df_ = df
        self.df_.columns = [x.strip() for x in self.df_.columns]
        
        if params is None:
            self.params = {"ing": "Component", "qty": "Qty", "lvl": "Level"}
        else:
            self.params = params

        self.ing_key, self.qty_key, self.lvl_key = list(self.params.keys())[:3]
        self.ing_col, self.qty_col, self.lvl_col = list(self.params.values())[:3]
        
        if exclude_highest_level == "auto":
            pass
            
        self.highest_level = self.df_[self.lvl_col].min()
        self.recipe_ = self.df_[self.df_[self.lvl_col] > 1].reset_index(drop=True)
       
        if name is None:
            self.name = df.loc[df[self.lvl_col] == self.highest_level, self.ing_col][0]
        else:
            self.name = name
        
        self.rec_dict = defaultdict(self.dict_template)
   
    def dict_template(self):
       return {key: [] for key in self.params.keys()}
   
    def copy_to_dict(self, idx, level):
        for key, colname in self.params.items():
            self.rec_dict[level][key].append(self.recipe_.loc[idx][colname])
    
    def __repr__(self):
        return f"Recipe(df)"
    
    def __str__(self):
        n_components = self.recipe_.shape[0]
        return "{} recipe with {} components".format(self.name, n_components)
    
    def traverse_recipe(self, idx = 0, start_level = 1, max_level = np.inf):

        current_level = self.recipe_.loc[idx, self.lvl_col]
            
        if (idx == self.recipe_.index.max()):
            previous_level = self.recipe_.loc[idx-1, self.lvl_col]
            
            if (current_level <= max_level) and (previous_level >= current_level):
                self.copy_to_dict(idx, level = max_level)
                return None # default_dict
            
            elif (idx == self.recipe_.index.max()) and (current_level > max_level):
                return None # default_dict
            
        else:
            next_level = self.recipe_.loc[(idx+1), self.lvl_col]
            
            if ~(next_level <= current_level) and ~(current_level > max_level) and (next_level > max_level):
                self.copy_to_dict(idx, level = max_level)
                self.traverse_recipe(idx + 1, start_level, max_level)
                
            elif (next_level <= current_level) and ~(current_level > max_level) and ~(next_level > max_level):
                self.copy_to_dict(idx, level = max_level)
                self.traverse_recipe(idx + 1, start_level, max_level)
                
            else:
                self.traverse_recipe(idx + 1, start_level, max_level)

    def extract_recipe(self, max_level = np.inf, aggregate = False, aggregate_by = None, sort = True, sort_by = None, ascending = False):

        if max_level in self.rec_dict.keys():
            res = pd.DataFrame(self.rec_dict[max_level])
        else:
            self.traverse_recipe(idx = 0, max_level = max_level)
            res = pd.DataFrame(self.rec_dict[max_level])
            
        if aggregate:
            if aggregate_by is None:
                aggregate_by = self.ing_key

            res = res.groupby(aggregate_by).sum().reset_index()[[self.ing_key, self.qty_key]]
            if sort:
                if sort_by is None:
                    sort_by = self.qty_key
        
                res = res.sort_values(sort_by, ascending = ascending).reset_index(drop=True)

        return res
    
    @property
    def df(self):
        return self.df_
    
    @property
    def recipe(self):
        return self.recipe_
    
    @classmethod
    def from_csv(cls, filepath: str, *args, **kwargs):
        df = pd.read_csv(filepath, *args, **kwargs)
        return cls(df)

    @classmethod
    def from_excel(cls, filepath: str, *args, **kwargs):
        df = pd.read_excel(filepath, *args, **kwargs)
        return cls(df)

In [20]:
rec = Recipe.from_excel("./data/praline_composition.xlsx")

print("Highest Level:", rec.highest_level)

level_count = rec.df.groupby("Level").count()["Component"]

# print(level_count)

t = level_count[level_count == rec.highest_level].iloc[0]
# print(level_count[level_count == rec.highest_level])
t == 1


Highest Level: 1


True

In [15]:
rec = Recipe(raw_data)

rec.extract_recipe(4, aggregate=True, sort=True)

print(rec)
print(repr(rec))
# out = rec.extract_recipe(4)
# print(out.sum()["qty"])
# out.groupby("ing").sum().reset_index()[["ing", "qty"]].sort_values(by=["qty"], ascending=False)

Chocolate Praline recipe with 28 components
Recipe(df)


In [34]:

def dict_template():
    return {"ing": [], "qty": [], "lvl": []}

rec_dict = defaultdict(dict_template)
rec_dict[1]

rec_dict

defaultdict(<function __main__.dict_template()>,
            {1: {'ing': [], 'qty': [], 'lvl': []}})

In [None]:
df.groupby("Level").count()["Component"]

In [None]:
def default_dict():
    return {"ing": [], "qty": [], "lvl": []}

rec_dict = defaultdict(default_dict)

idx = 0
recipe.loc[idx]["Qty"]

rec_dict[2]
current_level = 2
lower_level = higher_level + 1

# for key, colname in params.items():
#     rec_dict[higher_level][key].append(recipe.loc[idx][colname])

copy_to_dict(idx=1, df=recipe, default_dict=rec_dict, level = level, params=params)
copy_to_dict(idx=2, df=recipe, default_dict=rec_dict, level = level, params=params)
copy_to_dict(idx=3, df=recipe, default_dict=rec_dict, level = level, params=params)
copy_to_dict(idx=4, df=recipe, default_dict=rec_dict, level = level, params=params)
    
print(rec_dict[higher_level])

In [None]:
params = {"ing": "Component", "qty": "Qty", "lvl": "Level"}

def copy_to_dict(idx: int, df: pd.DataFrame, level: int, default_dict: dict, params: dict = params):
    
    for key, colname in params.items():
        default_dict[level][key].append(recipe.loc[idx][colname])
        
    return default_dict
    

In [None]:
def default_dict():
    return {"ing": [], "qty": [], "lvl": []}

rec_dict = defaultdict(default_dict)

recipe = rec.recipe
# nrow = recipe.shape[0]

def traverse_recipe(df, default_dict, params = params, start_idx = 0, start_level = 1, end_level = 2):

    for idx in range(start_idx, df.shape[0]):
        
        if idx == (df.shape[0]-1):
            print(f"{idx} | level {current_level} | last row of dataframe: capturing data")
            copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
        else:   
            current_level = df.loc[idx, "Level"]
            next_level = df.loc[(idx+1), "Level"]
            
            if next_level > current_level:
                print(f"{idx} | level {current_level} | skipping to next row")
                continue
            elif (next_level <= current_level):
                print(f"{idx} | level {current_level} | capturing data")
                copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)

    return default_dict
            
res_dict = traverse_recipe(recipe, rec_dict, params)
res = pd.DataFrame(res_dict[1])
print(f"Total: {res['qty'].sum()}")
res

In [None]:
def default_dict():
    return {"ing": [], "qty": [], "lvl": []}

rec_dict = defaultdict(default_dict)

recipe = rec.recipe

def traverse_recipe(df, default_dict, params = params, start_idx = 0, start_level = 1, max_level = 3):

    smallest_level = start_level

    for idx in range(start_idx, df.index.max()+1):
        
        current_level = df.loc[idx, "Level"]
        
        if (idx == df.index.max()) and (current_level <= max_level):
            # print(f"{idx} | level {current_level} | last row of dataframe: capturing data")
            copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
        elif (idx == df.index.max()) and (current_level > max_level):
            # print(f"{idx} | level {current_level} | last row of dataframe: skipping data")
            continue # * CHECK may need to replace with "break" instead
        else:   
            next_level = df.loc[(idx+1), "Level"]
            
            # if current_level == smallest_level:
            #     continue
            if ~(next_level <= current_level) and ~(current_level > max_level) and (next_level > max_level):
                copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
            elif (next_level <= current_level) and ~(current_level > max_level) and ~(next_level > max_level):
                copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
            else:
                continue
            # elif (next_level <= current_level) and (current_level > max_level) and (next_level > max_level):
            #     continue
            # elif ~(next_level <= current_level) and (current_level > max_level) and (next_level > max_level):
            #     continue
            # elif ~(next_level <= current_level) and ~(current_level > max_level) and ~(next_level > max_level):
            #     continue
            # elif (next_level <= current_level) and (current_level > max_level) and ~(next_level > max_level):
            #     continue
            # elif (next_level <= current_level) and (current_level > max_level) and ~(next_level > max_level):
            #     continue

    return default_dict
            
res_dict = traverse_recipe(recipe, rec_dict, params, max_level=5)
res = pd.DataFrame(res_dict[1])
print(f"Total: {res['qty'].sum()}")
res

In [None]:
def default_dict():
    return {"ing": [], "qty": [], "lvl": []}

rec_dict = defaultdict(default_dict)

recipe = rec.recipe

In [None]:
def traverse_recipe(idx, df, default_dict, params, start_level = 1, max_level = np.inf):

    current_level = df.loc[idx, "Level"]
        
    if (idx == df.index.max()):
        previous_level = df.loc[idx-1, "Level"]
        
        if (current_level <= max_level) and (previous_level == current_level):
            copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
            return default_dict
        elif (idx == df.index.max()) and (current_level > max_level):
            return default_dict
    else:
        next_level = df.loc[(idx+1), "Level"]
        
        if ~(next_level <= current_level) and ~(current_level > max_level) and (next_level > max_level):
            copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
            traverse_recipe(idx + 1, df, default_dict, params, start_level, max_level)
        elif (next_level <= current_level) and ~(current_level > max_level) and ~(next_level > max_level):
            copy_to_dict(idx, df = df, default_dict = default_dict, level = start_level, params = params)
            traverse_recipe(idx + 1, df, default_dict, params, start_level, max_level)
        else:
            traverse_recipe(idx + 1, df, default_dict, params, start_level, max_level)

    return default_dict
            
res_dict = traverse_recipe(0, recipe, rec_dict, params, max_level=3)
res = pd.DataFrame(res_dict[1])
print(f"Total: {res['qty'].sum()}")
res