In [None]:
## This script is run every morning to archive previous day's files
## and update cumulative files used for analysis of trends and different time periods. 

In [None]:
import os, glob, shutil, time, pandas as pd

In [None]:
# Moving Files

In [None]:
## I have a macro in Outlook running (in VBA folder of portfolio) which automatically downloads and file attachments, here.
raw_reports_path = r'raw_reports_folder_path_here'
pickle_path = r'path_to_share_drive_python_pickle_directory'

In [None]:
# Updating Pickle Files

In [None]:
# Punch Summary

In [None]:
# Loading muliple Punch Summary Files
df = pd.concat([(pd.read_excel(f, header=0, skiprows=8)
 .assign(total_hours=lambda x: x.groupby(['Employee Name', 'Date'])['Total Amount'].transform('sum'))
 .drop_duplicates(subset=['Employee Name', 'Date'], keep='last')) for f in glob.glob(raw_reports_path + '\\punch_summary_rolling' + '*.xls')], ignore_index=True)

df['Total Amount'] = df.total_hours

# This dataframe will be combined with previously saved data to have an updated cumulative file
punch_summary_update = (df.drop_duplicates(subset=['Employee Name', 'Date'], keep='last')
 .sort_values(['Employee Name', 'Date'])
 .drop(columns=['total_hours']))

In [None]:
punch_summary_cum = pd.read_pickle(pickle_path + '\punch_summary_cum.pkl')

In [None]:
(pd.concat([punch_summary_cum, punch_summary_update], ignore_index=True)
 .drop_duplicates(subset=['Date', 'Employee Name'], keep='last')
 .sort_values(['Date', 'Employee Name'])
 .reset_index(drop=True)).to_pickle(pickle_path + '\punch_summary_cum.pkl')

In [None]:
# Metric Shipped

metric_shipped_update = (pd.concat([pd.read_csv(f, dtype={'ordnum':object, 'prtnum':object},
                           parse_dates=['dispatch_dte', 'order_add_dte'])
            for f in glob.glob(raw_reports_path + '\\' + 'optredprairieTREKPRDleslogMetric-Shipped' + '*.csv')], ignore_index=True)
 .dropna(thresh=2)
 .sort_values(by='dispatch_dte')
 .reset_index(drop=True))

In [None]:
metric_shipped_cum = pd.read_pickle(pickle_path + '\metric_shipped_cum.pkl')

In [None]:
combined_metric_shipped_reports = pd.concat([metric_shipped_cum, metric_shipped_update], ignore_index=True)

In [None]:
combined_metric_shipped_reports.to_pickle(pickle_path + '\metric_shipped_cum.pkl')

In [None]:
# Metric Received

metric_received_update = pd.concat([pd.read_csv(f,
                                     dtype={'prtnum':object, 'prtfam':'category', 'invnum':object},
                                     parse_dates=['trndte']) for f in glob.glob(raw_reports_path + '\\' + 'optredprairieTREKPRDleslogMetric-Received' + '*.csv')])

metric_received_update.sort_values(by='trndte', inplace=True)
metric_received_update.reset_index(drop=True, inplace=True)

In [None]:
metric_rec_cum = pd.read_pickle(pickle_path + '\metric_received_cum.pkl')

In [None]:
combined_metric_received_reports = pd.concat([metric_rec_cum, metric_received_update], ignore_index=True)

In [None]:
combined_metric_received_reports.to_pickle(pickle_path + '\metric_received_cum.pkl')

In [None]:
# Picking Summary

picking_summary_update = pd.concat([pd.read_csv(f, dtype={'prt_fam': 'category'}, parse_dates=['pick_date']) for f in glob.glob(raw_reports_path + '\\' + 'optredprairieTREKPRDleslogDailyPickData' + '*.csv')])

picking_summary_cum = pd.read_pickle(pickle_path + '\picking_summary_cum.pkl')

In [None]:
combined_picking_summary_reports = pd.concat([picking_summary_cum, picking_summary_update], ignore_index=True)

In [None]:
combined_picking_summary_reports.to_pickle(pickle_path + '\picking_summary_cum.pkl')

In [None]:
# SPA Carton Packing Summary

spa_pack_summary_update = pd.concat([pd.read_csv(f, dtype=({'ordnum':object, 'traknm':object}),
                      parse_dates=['adddte', 'cmpdte', 'prtdte', 'arcdte', 'moddte']) for f in glob.glob(raw_reports_path + '\\' + 'optredprairieTREKPRDleslogSPAPACKDTL' + '*.csv')])

spa_pack_summary_cum = pd.read_pickle(pickle_path + '\spa_pack_summary_cum.pkl')

In [None]:
combined_spa_pack_summary_reports = pd.concat([spa_pack_summary_cum, spa_pack_summary_update], ignore_index=True)

In [None]:
combined_spa_pack_summary_reports.to_pickle(pickle_path + '\spa_pack_summary_cum.pkl')

In [None]:
# Updated Version of Processing Raw Files
# The file names are already clean
# All this script needs to do is move the files up through 4 AM of the current day to the archive folders.
# Use Windows Task Scheduler to schedule this task to run every day before 8 AM
#     (so I can start running hourly reporting at 8 AM)
#
# 9/4/2019 9:17 AM

In [None]:
import os, time, datetime, re, shutil

In [None]:
def get_file_archive_path(raw_file_path):
    """Create file path to archive items in raw_reports folder"""
    
    subfolder_search_result = re.search(subfolder_regex, raw_file_path)
    filename_search_result = re.search(filename_regex, raw_file_path)
    
    if raw_file_path.endswith('.csv'):
        return (r'my_username_path\Data\UC_RPT_EMAIL' +
                '\\' +
                raw_file_path[subfolder_search_result.start():subfolder_search_result.end()] +
                '\\' +
                raw_file_path[filename_search_result.start():filename_search_result.end()])
    elif raw_file_path.endswith('.xls'):
        return (r'my_username_path\Data\timekeeping_reports' +
                '\\' +
                raw_file_path[subfolder_search_result.start():subfolder_search_result.end()] +
                '\\' +
                raw_file_path[filename_search_result.start():filename_search_result.end()])
    else:
        return raw_file_path

In [None]:
# Defined Regular Expressions
# https://regex101.com/
# https://www.regular-expressions.info/

subfolder_regex = re.compile('(?<=raw_reports\\\\)(.*?)(?=(-|_)\d)')
filename_regex = re.compile('(?<=raw_reports\\\\).*')

In [None]:
# Current Date at 4:30 AM. Archive files before this time.
# Leave remaining files to run hourly productivity report.

datetime_cutoff = datetime.datetime.combine(datetime.datetime.now().date(), datetime.time(4, 30))

In [None]:
# Path to unprocessed reports saved from email

raw_reports_folder_path = r'my_username_path\Downloads\raw_reports'

In [None]:
# List of files in raw_reports folder

raw_reports_folder_files = [raw_reports_folder_path +
                            '\\' +
                            os.listdir(raw_reports_folder_path)[f]
                            for f in list(range(len(os.listdir(raw_reports_folder_path))))]

In [None]:
# List of files before datetime_cutoff to be fed to shutil.move function

files_to_move = [raw_reports_folder_files[f] for f in list(range(len(raw_reports_folder_files)))
                 if datetime.datetime.strptime(time.ctime(os.path.getmtime(raw_reports_folder_files[f])), '%a %b %d %H:%M:%S %Y')
                 < datetime_cutoff]

In [None]:
# Moving files in files_to_move list to archive locations
# using shutil.move and get_file_archive_path functions.

[shutil.move(files_to_move[f],
       get_file_archive_path(files_to_move[f]))
        for f in list(range(len(files_to_move)))]

In [None]:
# Save a copy of the Roster from the share drive

roster_file_path = glob.glob(r'share_drive_path\Daily Attendance\Daily Attendance' + '*.xlsx')[0]
roster_destination_path = r'my_username_path\Data\timekeeping_reports\roster\Daily_Attendance_'

if pd.Timestamp.now().weekday() in [1, 2, 3, 4, 5, 6]:
    roster_file_name_date = (pd.Timestamp.now().date() - pd.Timedelta('1 day')).strftime('%y%m%d')
else:
    roster_file_name_date = (pd.Timestamp.now().date() - pd.Timedelta('3 day')).strftime('%y%m%d')

shutil.copy2(roster_file_path, roster_destination_path + roster_file_name_date + '.xlsx')