# Get List of Items & Quantities from All Orders

In [1]:
import pandas as pd
import re
from pathlib import Path

In [47]:
FILE = Path("C:/Playground/bhaiya-orders/June_26th_Bhor_Farms_-_Fruits_Vegetable_Order_Fo.xlsx")

Try to read valid `csv` and `xlsx` files into a dataframe `df`. 

In [48]:
try:
    if 'csv' in FILE.suffix: df = pd.read_csv(FILE)
    else: df = pd.read_excel(FILE)
except OSError:
    print("Error occured, invalid file!")

## Identify the Orders Column

Pick out the relevant column for orders.

**Assumptions:** 
1. If a column header contains the substring `"Your Order"`, it is deemed to be the required column. Take care that no other colummn contains the substring.

In [49]:
for col in df.columns:
    if "Your Order" in col:
        your_order = col

## Prepare Keys

Pick out all the possible unique orders from the given spreadsheet.

**Assumptions**: 
1. Every order for a particular user is separated by a closing paranthesis `)`.
2. The key-value pair in an item is ordered as `key (value)`. 
3. The last item in a list of orders is always the column describing total cost.

In [50]:
k = []
for order in df[your_order]:
    split = order.split(')')[:-1] # last item is always total cost
    split = [s.replace('\n', '') for s in split] # remove newlines if present
    k.append(split)

In [51]:
items = []
for order in k:
    for item in order:
        if len(item.split('(')) > 1: items.append(item.split('(')[0].strip())

In [52]:
all_items = sorted(list(set(items)))

## Prepare Count Dictionary

Count total orders for a particular item in the given spreadsheet.

**Assumptions**:

1. The value count for a particular order is sandwiched as an `integer` as `: INT)`. For example, _'Potato 1 Kg (Amount: 36.00 INR, Special Quantity: 10)'_

In [53]:
# init order dictionary
all_orders = {}
for item in all_items:
    all_orders[item] = 0

In [54]:
def get_qty(txt):
    
    """
        Returns the Special Quantity key for a given order. 
            Sample input string: '...no. of 500 gm packets required: 1)'
            Returns: 1
    """
    
    match = re.search(": ([0-9]+?)\)", txt)
    if match:
        return int(match.group(1))

In [55]:
for order in k:
    for i in order:
        i = i + ')'
        sp1 = i.split("(")
        if len(sp1) < 2:
            continue
        
        key = sp1[0].strip()
        val = get_qty(sp1[1])
        all_orders[key] += val

## Create DataFrame

In [56]:
# create empty dataframe
cleaned_df = pd.DataFrame.from_dict(all_orders, orient="index")
cleaned_df.reset_index(inplace=True)
cleaned_df.columns = ["item", "num_units"]

### Quantity Per Packet

Pull out the quantity in each order.

**Assumptions**:
1. If integers are present in the key, then the last integer value is chosen as the packet size for that particular order. For example, *'Tondli 300-400 gms'* returns a packet size of 400 (unit is extracted later).

In [57]:
def extract_num(txt):
    
    """
        Searches a string and returns the last number found, if present. 
        Otherwise return 1.
        Use case: For an item key such as 'Tondli 300 gms ', return 300 (to calculate total qty required)
    """
    
    match = re.findall("([0-9]+)", txt)
    if match:
        return int(match[-1])
    else:
        return 1

In [58]:
# find quantity per item (amount of stuff in one packet)
packet = []
for item in cleaned_df.item:
    packet.append(extract_num(item))
cleaned_df["qty_per_packet"] = packet

### Total Quantity to be Purchased

Self explanatory. `(Total quantity to be purchased) = (Size of one packet) * (Total number of packets ordered)`

In [59]:
# total quantity = #units ordered * qty in 1 unit
cleaned_df["total_qty"] = cleaned_df.num_units * cleaned_df.qty_per_packet

### Unit of Measurement

Extract unit of measurement for particular item.

**Approach**:
1. If ` gm` is in item: grams
2. Else, if ` pc` is in item: pieces
3. Else, if ` bunch` is in item: bunches
4. Default to kilograms

In [60]:
def get_unit(txt):
    """
        Gets unit from an item string.
        Assumption: standard occurence of units all across the board. 
    """
    txt = txt.lower()
    if txt.find(' gm') != -1: unit = 'gms'
    elif txt.find(' pc') != -1: unit = 'pcs'
    elif txt.find(' bunch') != -1: unit = 'bunches'
    else: unit = 'kgs'
    return unit

In [61]:
# extract qty unit from item
cleaned_df["unit"] = cleaned_df.item.apply(get_unit)

### Handling the Curious Case of Bell Peppers

The key `'Red & Yellow Bell Pepper 2 each in a pack'` implies two yellow bell peppers and two red bell peppers.

**Approach**:

1. Replace `x` units ordered with `2x` units each of '`Red Bell Pepper per pc`' and '`Yellow Bell Pepper per pc`'.

In [62]:
# manually handle bell peppers

for index, row in cleaned_df.iterrows():
    if row["item"] == 'Red & Yellow Bell Pepper 2 each in a pack':

        # add red bell peppers
        cleaned_df = cleaned_df.append({"item": "Red Bell Pepper per pc",
                                        "num_units": row["num_units"]*2,
                                        "qty_per_packet": 1,
                                        "total_qty": row["num_units"]*2,
                                        "unit": "pcs"}, ignore_index=True)

        # add yellow bell peppers
        cleaned_df = cleaned_df.append({"item": "Yellow Bell Pepper per pc",
                                        "num_units": row["num_units"]*2,
                                        "qty_per_packet": 1,
                                        "total_qty": row["num_units"]*2,
                                        "unit": "pcs"}, ignore_index=True)

        # remove packet entry
        cleaned_df.drop(index=index, inplace=True)

## Save to Disk!

Save file to disk as `listified-filename.csv`

In [63]:
cleaned_df.to_csv('listified-' + str(FILE.stem) + '.csv', index=False)

In [64]:
cleaned_df

Unnamed: 0,item,num_units,qty_per_packet,total_qty,unit
0,Apples Royal Gala Per Kg,11,1,11,kgs
1,Apples Washington Per Kg,12,1,12,kgs
2,Baby Corn 300 gms,6,300,1800,gms
3,Banana Regular Per 6 Pcs,26,6,156,pcs
4,Basil Leaves 100 gms,18,100,1800,gms
5,Beetroot 500 Gms,21,500,10500,gms
6,Bhindi/Lady's Finger 500 Gms,38,500,19000,gms
7,Bitter Gourd 300 Gms,19,300,5700,gms
8,Brinjal Bharta Per Pc,23,1,23,pcs
9,Brinjal Kateri 500 Gms,16,500,8000,gms
