# Customer Wise | Unavailable Translation Files

In [1]:
import pandas as pd
import re
from pathlib import Path
import numpy as np
import os

In [2]:
OUTPUT_DIR = Path("C:/Playground/bhaiya-orders/outputs/")

In [3]:
from ipyfilechooser import FileChooser

In [4]:
fc = FileChooser("C:/Playground/bhaiya-orders/inputs/")
fc.use_dir_icons = True
display(fc)

FileChooser(path='C:/Playground/bhaiya-orders/inputs/', filename='', show_hidden='False')

In [5]:
FILE = Path(fc.selected)

Try to read valid `csv` and `xlsx` files into a dataframe `df`. 

## Read Translations

In [6]:
df_trans = pd.read_excel("C:/Playground/bhaiya-orders/translations.xlsx")

### Drop Kiwi Translation

In [7]:
df_trans = df_trans.set_index('Final Item Name').drop('Kiwi Imported').reset_index()

In [8]:
eng2marathi = df_trans.set_index('Final Item Name').to_dict()['Final Marathi Name']
marathi2eng = df_trans.set_index('Final Marathi Name').to_dict()['Final Item Name']
marathi2eng[np.nan] = ''

In [9]:
try:
    if 'csv' in FILE.suffix: df = pd.read_csv(FILE)
    else: df = pd.read_excel(FILE)
except OSError:
    print("Error occured, invalid file!")

In [10]:
df.tail(2)

Unnamed: 0,Submission Date,Order Number#,Token,First Name,Last Name,Select Drop Point and Building,Select Wing,Flat No.,Phone Number,Email,Delivery Date,Your Order: Products
74,2020-07-21 12:41:28,BHORS-0574,75,Rohan,Goel,Sector 1/2 - Topaz,,202,9619935777,rohangoel68@gmail.com,Wed 22nd July,"Chausa Mangoes - 2 Kg Box (Amount: 390.00 INR,..."
75,2020-07-21 08:28:54,BHORS-0558,76,Mithila,Naik,Sector 1/2 - Topaz,,601,9870851416,mithila_lad@yahoo.com,Wed 22nd July,Sitaphal/Custard Apple Per KG (Amount: 130.00 ...


In [11]:
df.fillna('', inplace=True)

In [12]:
for col in df.columns:
    if "Your Order" in col:
        your_order = col
        
    if "token" in col.lower():
        token = col
        
    if "flat" in col.lower():
        flat = col
        
    if "delivery" in col.lower() and "date" in col.lower():
        delivery_date = col

In [13]:
def get_item_name(order):
    '''
    Return the Item Name, Final Weight, Num Packets, Unit
    '''
    
    stopwords = ['per', 'gms', 'gm', 'kgs', 'kg', 'pack', 'each']
    
    # parse the key, stop when you encounter a number, unit, keyword like 'per', 'pack'
    item = order.split('(')[0].strip()
    item = re.sub('[0-9]+', ' ', item)
    item = item.split()
#     print(item)
    final_item = ''
    for token in item:
        if token.lower() in stopwords:
            break
        final_item += ' ' + token
    
    return final_item.strip()


def get_qty(txt):
    
    """
        Returns the Special Quantity key for a given order. 
            Sample input string: '...no. of 500 gm packets required: 1)'
            Returns: 1
    """
    
    if txt[-1] != ')':  txt = txt + ')'
    
    match = re.search(": ([0-9]+?)\)", txt)

    # temporary fix for VNR Guava
    if "VNR Guava Per Kg (" in txt: match = re.search("\(([0-9]+?)", txt)
    
    if match:
        return int(match.group(1))
    
    
def extract_num(txt):
    
    """
        Searches a string and returns the last number found, if present. 
        Otherwise return 1.
        Use case: For an item key such as 'Tondli 300 gms ', return 300 (to calculate total qty required)
    """
    
    match = re.findall("([0-9]+)", txt)
    if match:
        return int(match[-1])
    else:
        return 1
    
    
    
def get_unit(txt):
    """
        Gets unit from an item string.
        Assumption: standard occurence of units all across the board. 
    """
    txt = txt.lower()
    if txt.find(' gm') != -1: unit = 'gms'
    elif (txt.find(' pc') != -1) or (txt.find(' pack') != -1): unit = 'pcs'
    elif txt.find(' bunch') != -1: unit = 'bunches'
    else: unit = 'kgs'
    return unit

In [14]:
def parse_order(order):
    '''
    Return the Item Name, Final Weight, Num Packets, Unit
    '''
    if len(order.split('(')) < 2: return None
    
    item_name = get_item_name(order)
    num_packets = get_qty(order)
    unit = get_unit(order)
    qty_per_pack = extract_num(order.split('(')[0])
    final_weight = qty_per_pack * num_packets

    return {"item_name": item_name,
            "num_packets": num_packets,
            "unit": unit,
            "final_weight": final_weight}

In [15]:
from fuzzywuzzy import fuzz

def get_trans(x, thresh=85):
    rmax = -1
    trans = ''
    for k in eng2marathi.keys():
        if fuzz.ratio(k, x) > rmax:
            rmax = fuzz.ratio(k, x)
            trans = eng2marathi[k]

    if rmax > thresh:
        return trans
    return np.nan



## For Thane

In [16]:
# rows_list = []
# for index, row in df.iterrows():
#     for order in row[your_order].split(')'):
#         parsed = parse_order(order)
#         if parsed:
#             row_dict = {}
            
#             row_dict['token_no'] = row[token]
#             row_dict['first_name'] = row["First Name"]
#             row_dict['last_name'] = row["Last Name"]
# #             row_dict['delivery_date'] = row['Select Delivery Date']
#             row_dict['item_name'] = parsed['item_name']
#             row_dict['marathi_name'] = get_trans(row_dict['item_name'])
#             row_dict['num_packets'] = parsed['num_packets']
#             row_dict['final_weight'] = parsed['final_weight']
#             row_dict['unit'] = parsed['unit']
#             row_dict['drop_point'] = row['Enter Building Name']
# #             row_dict['wing'] = row['Select Wing']
#             row_dict['flat_no'] = row[flat]
            
#             rows_list.append(row_dict)

## For Non Thane

In [17]:
rows_list = []
for index, row in df.iterrows():
    for order in row[your_order].split(')'):
        parsed = parse_order(order)
        if parsed:
            row_dict = {}
            
            row_dict['token_no'] = row[token]
            row_dict['first_name'] = row["First Name"]
            row_dict['last_name'] = row["Last Name"]
            row_dict['delivery_date'] = row[delivery_date]
            row_dict['item_name'] = parsed['item_name']
            row_dict['marathi_name'] = get_trans(row_dict['item_name'])
            row_dict['num_packets'] = parsed['num_packets']
            row_dict['final_weight'] = parsed['final_weight']
            row_dict['unit'] = parsed['unit']
            if row['Select Drop Point and Building'] != '': 
                row_dict['drop_point'] = row['Select Drop Point and Building'].split('-')[0]
            row_dict['flat_no'] = row[flat]
            
            rows_list.append(row_dict)

In [18]:
new_df = pd.DataFrame(rows_list)

In [19]:
# # checker for translations
# new_df['checker'] = new_df.marathi_name.apply(lambda x: marathi2eng[x])
# new_df.groupby('item_name').first()['checker']

## Set Up SavePath

In [20]:
save_pth = OUTPUT_DIR/FILE.stem
os.makedirs(save_pth, exist_ok=True)

## Subset Item Files

In [40]:
items = ['coconut', 'kiwi', 'brown rice']

In [43]:
def get_match(x, thresh=65):
    x = x.lower()
    rmax = -1
    for k in items: 
        if fuzz.partial_ratio(k, x) > rmax:
            rmax = fuzz.partial_ratio(k, x)

    if rmax > thresh:
        return True
    return False

In [44]:
prefix = "".join(items)
unavl_pth = save_pth/f'{prefix}-{str(FILE.stem)}.csv'; unavl_pth.stem

'cocontkivibrown rice-Input File Final'

In [49]:
# all items detected
new_df[new_df.item_name.apply(get_match)].item_name.unique()

array(['Kiwi', 'Coconut Mangalore Small', 'Boiled Brown Rice Mangalore',
       'Coconut Mangalore Medium', 'Coconut Mangalore Big'], dtype=object)

In [47]:
new_df[new_df.item_name.apply(get_match)].drop('marathi_name', axis=1).to_csv(unavl_pth, index=False)

## Unavailable Translations

In [28]:
notranslist = new_df[new_df.marathi_name.isna()].item_name.unique(); notranslist

array(['Sitaphal/Custard Apple', 'Elaichi Banana One Dozen', 'Muskmelon',
       'Kiwi', 'Papaya', 'Coconut Mangalore Small',
       'Assorted Box of Mangoes -', 'Boiled Brown Rice Mangalore',
       'Chausa Mangoes -', 'Langra Mangoes -', 'Coconut Mangalore Medium',
       'Pear red Indian', 'Coconut Mangalore Big'], dtype=object)

### Save Unavailable Translations into Separate File

In [90]:
ids = set([i.lower()[:4] for i in notranslist])
prefix = "".join(ids)

unavl_pth = save_pth/f'{prefix}-{str(FILE.stem)}.csv'; unavl_pth.stem

'cocokiwi-9th July - Fruits & Vegetable Order Form(2020-07-07)'

In [91]:
new_df[new_df.marathi_name.isna()].drop(columns='marathi_name').to_csv(unavl_pth, index=False)

### Drop Unavailable Translations

In [92]:
new_df.dropna(subset=['marathi_name'], inplace=True)

## Build SaveFile

In [93]:
new_df['name'] = new_df.first_name + ' ' + new_df.last_name

In [94]:
highlight_col = 'num_packets'

In [97]:
# split on customer name + token no
custs = new_df.name + '_' + new_df.token_no.astype(str)
custs = custs.unique()

In [98]:
def f(x):
    col = highlight_col
    r = 'background-color: none'
    g = 'background-color: #37FDFC'
    c = np.where(x[col] > 1, g, r)
    y = pd.DataFrame('', index=x.index, columns=x.columns)
    for h in all_cols:
        if h != "name": y[h] = c
    
    return y

In [99]:
fname = save_pth/('customerwise-' + str(FILE.stem) + '.xlsx')

In [100]:
all_cols = new_df.columns

In [101]:
new_df.head()

Unnamed: 0,first_name,last_name,delivery_date,item_name,marathi_name,num_packets,final_weight,unit,drop_point,flat_no,name,token_no
0,Payal,Sharma,Thursday 9th July,Baby Corn,बेबी कॉर्न,1,300,gms,Cosmic,1904,Payal Sharma,0
1,Payal,Sharma,Thursday 9th July,Broccoli,ब्रोकोली,1,250,gms,Cosmic,1904,Payal Sharma,1
2,Payal,Sharma,Thursday 9th July,Red & Yellow Bell Pepper,लाल आणि पिवळी बेल मिरपूड,1,2,pcs,Cosmic,1904,Payal Sharma,2
3,Payal,Sharma,Thursday 9th July,Basil Leaves,तुळशीची पाने,1,100,gms,Cosmic,1904,Payal Sharma,3
4,Priyanka,Pipada,Thursday 9th July,Potato,बटाटा,1,1,kgs,Aeon/Zeon,2901,Priyanka Pipada,4


In [102]:
all_data = 0

with pd.ExcelWriter(fname) as writer:
    for cust in custs:
        data = new_df[new_df.name == cust.split('_')[0]]
        data = data[data.token_no == int(cust.split('_')[1])]
        all_data += len(data.index)
        styled = data.drop(columns=["name"]).style.apply(f, axis=None)
        styled.to_excel(writer, sheet_name=cust[:31], engine='openpyxl')
        
assert all_data == len(new_df)

## Finished!

In [57]:
# new_df.item_name.unique()

In [58]:
# items = ['VNR Guava', 'Pear', 'Drumsticks']

In [59]:
# new_df[new_df.item_name.isin(items)].to_csv(f'guava-pear-drums-{str(FILE.stem)}.csv', index=False)

In [30]:
# new_df[new_df.item_name.isin(items)]