In [None]:
import glob, re, pandas as pd

In [None]:
def four_four_five_cal(date):
    """Generates four-four-five calendar based on date given as argument.
       Enter first Monday of the fiscal year's first month."""
    
    start_date = pd.Timestamp(date)
    idx = pd.MultiIndex(levels=[[],[]], codes=[[],[]], names=['month', 'date'])
    for i in list(range(1, 13)):
        if i % 3 != 0:
            weeks_in_month = 4
            ts = pd.date_range(start=start_date, periods=(7 * weeks_in_month), freq='d')
            update_idx = pd.MultiIndex.from_arrays([['month_' + str(i).zfill(2)] * (7 * weeks_in_month), ts])
        else:
            weeks_in_month = 5
            ts = pd.date_range(start=start_date, periods=(7 * weeks_in_month), freq='d')
            update_idx = pd.MultiIndex.from_arrays([['month_' + str(i).zfill(2)] * (7 * weeks_in_month), ts])
        idx = idx.append(update_idx)
        start_date = ts[-1] + pd.Timedelta("1 days")
        
    return idx

In [None]:
def summarize_daily_trans(file_name, usecols):
    """Load DailyTransActivity file and produce summary of activity by RP user ID."""
    
    g01d = (pd.read_csv(file_name, dtype={'prtnum':object}, parse_dates=['trndte'], usecols=usecols)
     .assign(trndte=lambda x: x.trndte - pd.Timedelta(hours=3))
     .assign(previous_frstol=lambda x: x.groupby('usr_id')['frstol'].transform(lambda x: x.shift(1).fillna(method='bfill')))
     .sort_values('trndte')
    .reset_index(drop=True))
    
    process_dictionary = {
        'Case Picking (FG)':g01d[(g01d['actcod'].isin(['CASPCK', 'LSTPCK'])) & (g01d['frstol'].apply(lambda x: re.search(digit_loc_regex, str(x)) is not None)) & (g01d['fr_arecod']).isin(['CAS020', 'PAL010', 'CAS010', 'CAS030', 'CAS040'])].index,
        'Pallet Picking (FG)':g01d[(g01d['actcod'] == 'PALPCK') & (g01d['frstol'].apply(lambda x: re.search(digit_loc_regex, str(x)) is not None)) & (g01d['fr_arecod']).isin(['CAS020', 'PAL010', 'CAS010', 'CAS030', 'CAS040'])].index,
        'Each Picking (SPA)':g01d[(g01d['actcod'].isin(['CASPCK', 'LSTPCK'])) & (g01d['frstol'].apply(lambda x: re.search(digit_loc_regex, str(x)) is not None)) & (g01d['fr_arecod']).isin(['CAS050', 'PCE010'])].index,
        'Pick Deposit':g01d[(g01d['frstol'].apply(lambda x: re.search(rdt_loc_regex, str(x)) is None)) & (g01d['actcod'].isin(['CASPCK', 'LSTPCK', 'PALPCK']))].index,
        'Packing (SPA)':g01d[(g01d['tostol'] == 'QA01') & (~g01d['ship_id'].isna())]['usr_id'].index,
        'Receiving':g01d[(g01d['actcod'] == 'RCV') & (g01d['tostol'].apply(lambda x: re.search(rdt_loc_regex, str(x)) is None))].index,
        'FG Putaway':g01d[(g01d['tostol'].apply(lambda x: re.search(digit_loc_regex, str(x)) is not None)) & (g01d['previous_frstol'].apply(lambda x: re.search(rec_loc_regex, str(x)) is not None)) & (g01d['to_arecod']).isin(['CAS020', 'PAL010', 'CAS010', 'CAS030', 'CAS040'])].index,
        'SPA Putaway':g01d[(g01d['tostol'].apply(lambda x: re.search(digit_loc_regex, str(x)) is not None)) & (g01d['previous_frstol'].apply(lambda x: re.search(rec_loc_regex, str(x)) is not None)) & (g01d['to_arecod']).isin(['CAS050', 'PCE010'])].index
        }
    
    g01d['process_name'] = pd.concat([pd.Series([k] * len(v), index=v, name='process')
                                      for k, v in process_dictionary.items()]).sort_index()
    
    g01d['cases'] = g01d['trnqty'] / g01d['unit_per_cas']
    
    g01d['pallets'] = g01d['trnqty'] / g01d['unit_per_pal']
    
    rp_activity_summary = g01d.groupby(['usr_id', 'process_name'])[['trnqty', 'lodnum', 'cases', 'pallets']].agg({
        'trnqty':sum,
        'lodnum':'nunique',
        'cases':sum,
        'pallets':sum
    }).unstack()
    
    rp_activity_summary.columns = rp_activity_summary.columns.map('{0[1]}|{0[0]}'.format)
    
    empty_df_cols = []
    for i in list(process_dictionary.keys()):
        for j in ['trnqty', 'lodnum', 'cases', 'pallets']:
            empty_df_cols.append(i + '|' + j)
            
    rp_activity_summary = pd.concat([pd.DataFrame([], columns=empty_df_cols), rp_activity_summary], sort=False)
    
    rp_activity_summary['Packing (SPA)|lodnum'] = g01d[g01d['process_name'] == 'Packing (SPA)'].groupby('usr_id')['lodnum'].nunique()
    
    rp_activity_summary.index.name = 'rp_usr_id'
    
    rp_activity_summary.reset_index(inplace=True)
    
    rp_activity_summary.insert(0, 'date', (pd.Timestamp(file_name[-17:-7]) - pd.Timedelta('1 day')))
    
    return rp_activity_summary
    

In [None]:
usecols = ['dlytrn_id', 'trndte', 'oprcod', 'actcod', 'lodnum',
       'prtnum', 'trnqty', 'traknm', 'fr_arecod', 'frstol',
       'to_arecod', 'tostol', 'ship_id', 'trlr_num', 'usr_id', 'unit_per_cas', 'unit_per_pal', 'cas_per_pal']

# RegEx Statements
digit_loc_regex = re.compile('^\d.*|COSTCO1')
floor_loc_regex = re.compile('^\d.*A')
upper_loc_regex = re.compile('^\d.*[B-Z]')
rdt_loc_regex = re.compile('^((?!RDT).)*$')
rec_loc_regex = re.compile('REC.*|\d{10}')

In [None]:
file_path = r'file_path_prefix_here-'

In [None]:
file_ls = glob.glob(file_path + '*')
file_ls = file_ls[-5:]
file_ls

In [None]:
idx = four_four_five_cal(pd.Timestamp('2018-12-31'))

In [None]:
daily_trans_summary_df = pd.read_pickle(r'cumulative_pickle_file_path_here.pkl')

In [None]:
## Transaction Summary by 4-4-5 Calendar for Customer Budget

In [None]:
df = (pd.read_pickle(r'cumulative_pickle_file_path_here.pkl')
      .drop(columns=['rp_usr_id']))

In [None]:
actuals_uom_2019 = (pd.DataFrame(index=idx).reset_index(level=1).merge(
    df.fillna(0).groupby('date').sum(),
    left_on='date',
    right_index=True,
    how='left')
 .fillna(0)).groupby('month').sum().T

In [None]:
actuals_uom_2019.to_csv(r'output_path_here.csv')

In [None]:
## Metric Received by 4-4-5 Calendar for Customer Budget

In [None]:
receiving_df = pd.read_csv(r'output_path_here.csv', parse_dates=['dte'])

In [None]:
receiving_df = receiving_df.set_index(['dte', 'prtfam']).unstack(level=1)

In [None]:
receiving_df.columns = receiving_df.columns.map('{0[1]}|{0[0]}'.format)

In [None]:
receiving_df[:3]

In [None]:
received_uom_for_budget_2019 = (pd.DataFrame(index=idx).reset_index(level=1).merge(
    receiving_df,
    left_on='date',
    right_index=True,
    how='left')
.fillna(0)).groupby('month').sum().T

In [None]:
received_uom_for_budget_2019.to_csv(r'output_path_here.csv')

In [None]:
daily_trans_summary_df

In [None]:
df = pd.concat([summarize_daily_trans(file_ls[f], usecols=usecols) for f in list(range(len(file_ls)))], ignore_index=True)

In [None]:
df

In [None]:
combined_daily_trans_summary_df = pd.concat([daily_trans_summary_df, df], ignore_index=True)

In [None]:
combined_daily_trans_summary_df

In [None]:
combined_daily_trans_summary_df.to_pickle(r'output_file_path_here.pkl')