In [1]:
## pip install sharepy <- run this in python command line 

In [2]:
########################################################
# Original Author: Prasanna Jagadeesan
# Created Date: 30/07/2019
# Modification Author: Ben Fletcher
# Modified Date: 30/10/2019
# Description: This program downloads Waitrose data from FPS SharePoint to local machine, extracts and 
# collates data, and uploads back to SharePoint
########################################################
import os
import pyodbc
import pandas as pd
from datetime import datetime
from os import walk

In [3]:
################################
# Function to run query in db
# Inputs: 
#       cnxn -> connection object to DB
#       query -> query string
#       params -> tuple of query parameters
# Output:
#       rows -> query result. Rows of data from DB
#################################
def run_query(cnxn,query,params):
    cursor = cnxn.cursor()
    cursor.execute(query,params)
    rows = []
    if "INSERT" not in query:
        rows = cursor.fetchall()
    return rows

In [4]:
################################
# Function to find index of cell (from excel data)
# where the value starts with "Total Route".
# This index will then be used to find the route
# data and order data which will be below the index found
# Inputs: 
#       data -> excel data
#       total_rows -> num. rows in excel
#       i,j -> starting position of excel cells
# Output:
#       i -> Row index of "Total Route"
#################################
def find_index_total_route(data,total_rows,i,j):
    found_total_route = False
    while not found_total_route:
        cell_value = str(data.iloc[i,j])
        if cell_value is not None:
            if cell_value.startswith("Total Route"):
                found_total_route = True
            else:
                i+=1
                if i > total_rows-1:
                    i = -1 # To identify end of row is reached
                    found_total_route = True
        else:
            i+=1
            if i > total_rows-1:
                i = -1 # To identify end of row is reached
                found_total_route = True
    return i

In [5]:
################################
# Function to find index (i,j) of cell (from excel data)
# where the value starts with "Branch".
#
# Inputs: 
#       data -> excel data
#       total_rows -> num. rows in excel
#       i,j -> starting position of excel cells
# Output:
#       (i,j) -> Tuple index of "Branch"
#################################        
def find_index_branch_id(data,total_rows,i,j):
    found_branch_id = False
    return_param = ()
    while not found_branch_id:
        cell_value = str(data.iloc[i,j])
        if cell_value is not None:
            if cell_value.startswith("Branch"):
                found_branch_id = True
                return_param = (i,j)
            else:
                j+=1
                if j >= 38:
                    j = 3
                    i+=1
                    if i > total_rows-1:
                        found_branch_id = True
                        return_param = (-1,-1)
        else:
            j+=1
            if j >= 38:
                j = 3
                i+=1
                if i > total_rows-1:
                    found_branch_id = True
                    return_param = (-1,-1)
                    
    return return_param

In [6]:
################################
# Function to read the route data with given 
# starting (i,j) positions
#
# Inputs: 
#       data -> excel data
#       total_rows -> num. rows in excel
#       i,j -> starting position of excel cells
# Output:
#       [] -> List of parameters
################################# 
def read_rest_of_route_data(data,total_rows,i,j):
    return_param = []
    # Waitrose uses template and the number of columns of route data 
    # is always 38. Hence looping through 28 columns and fetching route data
    while j <= 38:
        cell_value = data.iloc[i,j]
        if cell_value is not None:            
            return_param.append(cell_value)
        j+=1

    return return_param

In [7]:
################################
# After finding index of "Total Route", usually the cell
# below "Total Route" is the route_id index. This function
# will get the route_id index
# Inputs: 
#       data -> excel data
#       total_rows -> num. rows in excel
#       i,j -> starting position of excel cells
# Output:
#       i -> Row index of "Route ID"
#################################
def find_route_id_index(data,total_rows,i,j):
    found_route_id_index = False
    while not found_route_id_index:
        try:
            cell_value = data.iloc[i,j]
            if cell_value is None:
                i+=1
            else:
                cell_value = int(cell_value)
                found_route_id_index = True
        except ValueError:
            i+=1
            if i > total_rows-1:
                i = -1
                found_route_id_index = True
            pass

    return i

In [8]:
################################
# For the given value this function 
# tries to get date. If not it returns
# "None" value
################################
def get_date(val):
    date_val = None

    if type(val) is str:
        date_val = datetime.strptime(val,"%d/%m/%Y")
    else:
        date_val = val

    return date_val


In [9]:
################################
# For the given value this function 
# tries to get time. If not it returns
# "None" value
################################
def get_time(val):
    time_val = None
    
    if type(val) is str:
        time_val = datetime.strptime(val,"%H:%M:%S")        
    else:
        time_val = val

    return time_val

In [10]:
################################
# For the given value this function 
# tries to get percentage value if
# the input value is a string (e.g. '85%')
# If not it returns "None" value
################################
def get_percentage(val):
    percentage_val = None
    try:
        percentage_val = float(val)
    except:
        percentage_val = val[:-1] # From input value (e.g. '85%'), removes percentage
        percentage_val = percentage_val.replace(",","") # Found some percentage string values with ",". Just removing them
        percentage_val = float(percentage_val) / 100

    return percentage_val

In [11]:
################################
# For the given value this function 
# tries to get int value. If not it 
# returns 0
################################
def get_int(val):
    if not val or val is None:
        return 0
    else:
        return int(val)

In [12]:
################################
# For the given value this function 
# tries to get float value. If not it 
# returns 0
################################
def get_float(val):
    if not val or val is None:
        return 0
    else:
        return float(val)


In [13]:
import sharepy
import json
import time




sharepoint_site = "flexpowerltd.sharepoint.com"

raw_data_folder = "raw_data_07022020"

# Parent directory of all files
xlsx_files_root_dir = "C:/Users/benfl/OneDrive - Flexible Power Systems Ltd/Documents/Python Scripts/Waitrose Collation"

filename_loc = 12

# (1) Authenticate
s = sharepy.connect(sharepoint_site)



Enter your username: ben.fletcher@flexpowerltd.com
Enter your password: ········
Authentication successful   


In [14]:
# first data batch

t1 = time.process_time()

r = s.get("https://flexpowerltd.sharepoint.com/sites/JLP-FPSProject/_api/web\
/GetFolderByServerRelativeUrl('/sites/JLP-FPSProject/Shared Documents\
/Waitrose Data Transfer ALL')/Files")
data = r.json()
for i in range(0, len(data['d']['results'])):
    file_sp_path = str(data['d']['results'][i]['__metadata']['id'])
    path_split = file_sp_path.split('/')
    filename = path_split[filename_loc]
    filename = filename.replace('\')', '')
    
    # (3) Download file
    # DO NOT INDENT BELOW LINES (ANY INDENTATION WILL BE INCLUDED IN STRING)
    r = s.getfile("https://flexpowerltd.sharepoint.com/sites/JLP-FPSProject/_api/web\
/GetFileByServerRelativeUrl('/sites/JLP-FPSProject/Shared Documents\
/Waitrose Data Transfer ALL/" + filename + "')/$value"\
    , filename = raw_data_folder + "/" + filename)

    print("Downloaded ", i+1, "/", len(data['d']['results']))
t2 = time.process_time()
t3 = t2-t1
print('download files: t3 = ', t3)





Downloaded  1 / 778
Downloaded  2 / 778
Downloaded  3 / 778
Downloaded  4 / 778
Downloaded  5 / 778
Downloaded  6 / 778
Downloaded  7 / 778
Downloaded  8 / 778
Downloaded  9 / 778
Downloaded  10 / 778
Downloaded  11 / 778
Downloaded  12 / 778
Downloaded  13 / 778
Downloaded  14 / 778
Downloaded  15 / 778
Downloaded  16 / 778
Downloaded  17 / 778
Downloaded  18 / 778
Downloaded  19 / 778
Downloaded  20 / 778
Downloaded  21 / 778
Downloaded  22 / 778
Downloaded  23 / 778
Downloaded  24 / 778
Downloaded  25 / 778
Downloaded  26 / 778
Downloaded  27 / 778
Downloaded  28 / 778
Downloaded  29 / 778
Downloaded  30 / 778
Downloaded  31 / 778
Downloaded  32 / 778
Downloaded  33 / 778
Downloaded  34 / 778
Downloaded  35 / 778
Downloaded  36 / 778
Downloaded  37 / 778
Downloaded  38 / 778
Downloaded  39 / 778
Downloaded  40 / 778
Downloaded  41 / 778
Downloaded  42 / 778
Downloaded  43 / 778
Downloaded  44 / 778
Downloaded  45 / 778
Downloaded  46 / 778
Downloaded  47 / 778
Downloaded  48 / 778
D

Downloaded  379 / 778
Downloaded  380 / 778
Downloaded  381 / 778
Downloaded  382 / 778
Downloaded  383 / 778
Downloaded  384 / 778
Downloaded  385 / 778
Downloaded  386 / 778
Downloaded  387 / 778
Downloaded  388 / 778
Downloaded  389 / 778
Downloaded  390 / 778
Downloaded  391 / 778
Downloaded  392 / 778
Downloaded  393 / 778
Downloaded  394 / 778
Downloaded  395 / 778
Downloaded  396 / 778
Downloaded  397 / 778
Downloaded  398 / 778
Downloaded  399 / 778
Downloaded  400 / 778
Downloaded  401 / 778
Downloaded  402 / 778
Downloaded  403 / 778
Downloaded  404 / 778
Downloaded  405 / 778
Downloaded  406 / 778
Downloaded  407 / 778
Downloaded  408 / 778
Downloaded  409 / 778
Downloaded  410 / 778
Downloaded  411 / 778
Downloaded  412 / 778
Downloaded  413 / 778
Downloaded  414 / 778
Downloaded  415 / 778
Downloaded  416 / 778
Downloaded  417 / 778
Downloaded  418 / 778
Downloaded  419 / 778
Downloaded  420 / 778
Downloaded  421 / 778
Downloaded  422 / 778
Downloaded  423 / 778
Downloaded

Downloaded  752 / 778
Downloaded  753 / 778
Downloaded  754 / 778
Downloaded  755 / 778
Downloaded  756 / 778
Downloaded  757 / 778
Downloaded  758 / 778
Downloaded  759 / 778
Downloaded  760 / 778
Downloaded  761 / 778
Downloaded  762 / 778
Downloaded  763 / 778
Downloaded  764 / 778
Downloaded  765 / 778
Downloaded  766 / 778
Downloaded  767 / 778
Downloaded  768 / 778
Downloaded  769 / 778
Downloaded  770 / 778
Downloaded  771 / 778
Downloaded  772 / 778
Downloaded  773 / 778
Downloaded  774 / 778
Downloaded  775 / 778
Downloaded  776 / 778
Downloaded  777 / 778
Downloaded  778 / 778
download files: t3 =  109.25


In [15]:
# Parent directory of all excel files
xlsx_files_root_dir = "C:/Users/benfl/OneDrive - Flexible Power Systems Ltd/Documents/Python Scripts/Waitrose Collation/raw_data_07022020"

# Name columns for dataframe
route_columns = ['Route_ID','Branch_ID','FL_Name','Start_Date_of_Route','Start_Time_of_Route','End_Time_of_Route',\
'Break_Duration_mins','Percentage_Shift_Time_used','Percentage_Van_Weight_used','Percentage_Ambient_Capacity_Used',\
'Percentage_Chilled_Capacity_Used','Percentage_Frozen_Capacity_Used','Percentage_Total_Crate_capacity_Used',\
'Planned_total_Route_time','Planned_Total_Wait_time_mins','Planned_total_Mileage','Planned_Total_Driving_time',\
'Number_Orders','Van_Type','Loaded_Ambient_Crates','Loaded_Chilled_Crates','Loaded_Frozen_Crates','Loaded_Kgs',\
'Loaded_Total_Crates','Total_Dwell_Time','Shift_Start_Time','Shift_End_Time','Total_Shift_Hours',\
'Total_Shift_Time_Spent_at_FL','MPO','HPO']

order_columns = ['Route_ID','Order_Sequence','Order_ID','Order_Type','Order_Delivery_Date','Time_Window_Start',\
         'Time_Window_End','Planned_Arrival_Time','Planned_Departure_Time','Break_Duration_Minutes',\
         'Wait_Time_since_Prev_Stop_Minutes','Post_Code','X_Coord','Y_Coord','MPO','Driving_Mins_Per_Order',\
         'Ambient','Chilled','Frozen','Total','Kg','Dwell_Time'] 


In [16]:
Route_ID_route = []
Branch_ID_route = []
FL_Name_route = []
Start_Date_of_Route_route = []
Start_Time_of_Route_route = []
End_Time_of_Route_route = []
Break_Duration_mins_route = []
Percentage_Shift_Time_used_route = []
Percentage_Van_Weight_used_route = []
Percentage_Ambient_Capacity_Used_route = []
Percentage_Chilled_Capacity_Used_route = []
Percentage_Frozen_Capacity_Used_route = []
Percentage_Total_Crate_capacity_Used_route = []
Planned_total_Route_time_route = []
Planned_Total_Wait_time_mins_route = []
Planned_total_Mileage_route = []
Planned_Total_Driving_time_route = []
Number_Orders_route= []
Van_Type_route = []
Loaded_Ambient_Crates_route = []
Loaded_Chilled_Crates_route = []
Loaded_Frozen_Crates_route = []
Loaded_Kgs_route = []
Loaded_Total_Crates_route = []
Total_Dwell_Time_route = []
Shift_Start_Time_route = []
Shift_End_Time_route = []
Total_Shift_Hours_route = []
Total_Shift_Time_Spent_at_FL_route = []
MPO_route = []
HPO_route = []
                
Route_ID_order = []
Order_Sequence_order = []           
Order_ID_order = []                 
Order_Type_order = []              
Order_Delivery_Date_order = []                    
Time_Window_Start_order = []                   
Time_Window_End_order = []                 
Planned_Arrival_Time_order = []                  
Planned_Departure_Time_order = []                   
Break_Duration_Minutes_order = []                  
Wait_Time_since_Prev_Stop_Minutes_order = []           
Post_Code_order = []             
X_Coord_order = []                
Y_Coord_order = []                  
MPO_order = []
Driving_Mins_Per_Order_order = []                 
Ambient_order = []                
Chilled_order = []             
Frozen_order = []                   
Total_order = []                 
Kg_order = []                 
Dwell_Time_order = []

In [17]:
# Initialise row to write data to
rt_idx = 0
od_idx = 0
# Fetches all files in directory and loops thorough each file
t10 = time.process_time()
for (dirpath, dirnames, filenames) in walk(xlsx_files_root_dir):
    for filename in filenames: 
        t4 = time.process_time()
        print(filename)
        start = time.time()
        # Reading excel data
        data = pd.read_excel(os.path.join(dirpath,filename),keep_default_na=False)
        total_rows = len(data)
        i=0
        j=3
        
        while i >= 0:
            j = 3
            i = find_index_total_route(data,total_rows,i,j) # As Waitrose use fixed template, always find "Total routes" first. From that index getting rest will be easy
            if i == -1:
                # End of file
                break
            total_route_cell_value = data.iloc[i,j] # Usually the string is "Total routes: ###"
            cell_split = total_route_cell_value.split(":") # Split the string with ":"
            total_routes_str = cell_split[1].strip() # second part of split string will be total number of routes
            total_routes = int(total_routes_str)
            
            rcnt = 0
            while rcnt < total_routes:  # Loop total_routes number of time to get route and corresponding orders for each route
                t2 = time.process_time()
                j = 3
                
                route_id_index = find_route_id_index(data,total_rows,i,j) # Fetch index of "Route ID"
                if route_id_index == -1:
                    # End of file
                    break
                
                route_id = data.iloc[route_id_index,j] # Get "Route ID"
                
                i = route_id_index

                index = find_index_branch_id(data,total_rows,i,j) # Get index of "Branch ID"
                i = index[0]
                j = index[1]

                first_branh_id_index = i + 1
                branch_id = data.iloc[first_branh_id_index,j] # Get "Branch ID"
                
                i = first_branh_id_index
                
                rest_route_data = read_rest_of_route_data(data,total_rows,i,j) # Get rest of route data
                
                j = 3
                
                # Waitrose use two standard template. In one the total number of route data column is 33 and the other is 32
                # Based on column count, need to set offset_index as data position differs between templates
                offset_index = 0
                if len(rest_route_data) == 33:
                    offset_index = 1
                else:
                    offset_index = 0
                
                # Get all route data
                t0 = time.process_time()
                Route_ID_route.append(route_id)
                Branch_ID_route.append(branch_id)
                FL_Name_route.append(rest_route_data[1])
                Start_Date_of_Route_route.append(get_date(rest_route_data[4]))
                Start_Time_of_Route_route.append(get_time(rest_route_data[5]))
                End_Time_of_Route_route.append(get_time(rest_route_data[7]))
                Break_Duration_mins_route.append(int(rest_route_data[8]))
                Percentage_Shift_Time_used_route.append(get_percentage(rest_route_data[9]))
                Percentage_Van_Weight_used_route.append(get_percentage(rest_route_data[10]))
                Percentage_Ambient_Capacity_Used_route.append(get_percentage(rest_route_data[11]))
                Percentage_Chilled_Capacity_Used_route.append(get_percentage(rest_route_data[12]))
                Percentage_Frozen_Capacity_Used_route.append(get_percentage(rest_route_data[13]))
                Percentage_Total_Crate_capacity_Used_route.append(get_percentage(rest_route_data[14]))
                Planned_total_Route_time_route.append(get_time(rest_route_data[15]))
                Planned_Total_Wait_time_mins_route.append(int(rest_route_data[16]))
                Planned_total_Mileage_route.append(float(rest_route_data[17]))
                Planned_Total_Driving_time_route.append(0)	
                Number_Orders_route.append(int(rest_route_data[18+offset_index]))	
                Van_Type_route.append(rest_route_data[19+offset_index])	
                Loaded_Ambient_Crates_route.append(int(rest_route_data[20+offset_index]))
                Loaded_Chilled_Crates_route.append(int(rest_route_data[21+offset_index]))
                Loaded_Frozen_Crates_route.append(int(rest_route_data[22+offset_index]))
                Loaded_Kgs_route.append(float(rest_route_data[23+offset_index]))
                Loaded_Total_Crates_route.append(int(rest_route_data[24+offset_index]))
                Total_Dwell_Time_route.append(float(rest_route_data[25+offset_index]))
                Shift_Start_Time_route.append(get_time(rest_route_data[26+offset_index]))
                Shift_End_Time_route.append(get_time(rest_route_data[27+offset_index]))
                Total_Shift_Hours_route.append(get_time(rest_route_data[28+offset_index]))
                Total_Shift_Time_Spent_at_FL_route.append(int(rest_route_data[29+offset_index]))
                MPO_route.append(float(rest_route_data[30+offset_index]))
                HPO_route.append(get_time(rest_route_data[31+offset_index]))
                t1 = time.process_time()
                total = t1-t0

                rt_idx += 1

                i = i + 3
                j = j + 4
                jindex = j
                order_data = []
                ocnt=0
                Number_Orders = int(rest_route_data[18+offset_index])
                #print('Numb orders = ', Number_Orders)
                while ocnt < Number_Orders: # For each route, loop "Number_orders" time and get order data
                    colcnt = 0
                    while colcnt < 24:
                        cell_value = data.iloc[i,j]
                        if str(cell_value) != 'NaT':
                            order_data.append(cell_value)
                        j+=1
                        colcnt+=1
                    ocnt+=1
                    
                    # Get order data
                    t0 = time.process_time()
                    Route_ID_order.append(route_id)
                    Order_Sequence_order.append(get_int(order_data[0]))                    
                    Order_ID_order.append(get_int(order_data[2]))                    
                    Order_Type_order.append(order_data[3])                    
                    Order_Delivery_Date_order.append(get_date(order_data[5]))                    
                    Time_Window_Start_order.append(get_time(order_data[6]))                    
                    Time_Window_End_order.append(get_time(order_data[7]))                    
                    Planned_Arrival_Time_order.append(get_time(order_data[8]))                    
                    Planned_Departure_Time_order.append(get_time(order_data[9]))                    
                    Break_Duration_Minutes_order.append(get_int(order_data[10]))                    
                    Wait_Time_since_Prev_Stop_Minutes_order.append(get_float(order_data[11]))                    
                    Post_Code_order.append(order_data[12])                    
                    X_Coord_order.append(get_float(order_data[13]))                    
                    Y_Coord_order.append(get_float(order_data[14]))                    
                    MPO_order.append(get_float(order_data[15]))
                    Driving_Mins_Per_Order_order.append(0)                    
                    Ambient_order.append(get_int(order_data[17]))                    
                    Chilled_order.append(get_int(order_data[18]))                    
                    Frozen_order.append(get_int(order_data[19]))                    
                    Total_order.append(get_int(order_data[20]))                    
                    Kg_order.append(get_float(order_data[21]))                    
                    Dwell_Time_order.append(get_float(order_data[22]))
                    t1 = time.process_time()
                    total = t1-t0

                    od_idx += 1

                    order_data = []
                    i+=1
                    j=jindex
                rcnt+=1
                t3 = time.process_time()
                passt = t3-t2
                #print('this route: passt = ', passt)
                
        t5 = time.process_time()
        filet = t5-t4
        print('this file: filet = ', filet)

t11 = time.process_time()
tott = t11-t10
print("Total run time = ", tott)

Daily_Report_Scheduled-2020_01_31_033101.xlsx
this file: filet =  11.484375
Daily_Report_Scheduled-2020_02_01_033103.xlsx
this file: filet =  14.96875
Daily_Report_Scheduled-2020_02_02_033123.xlsx
this file: filet =  20.15625
Daily_Report_Scheduled-2020_02_03_033116.xlsx
this file: filet =  17.453125
Daily_Report_Scheduled-2020_02_04_033101.xlsx
this file: filet =  13.21875
Daily_Report_Scheduled-2020_02_05_033107.xlsx
this file: filet =  10.640625
Daily_Report_Scheduled-2020_02_06_033059.xlsx
this file: filet =  7.4375
Daily_Report_Scheduled-2020_02_07_033052.xlsx
this file: filet =  7.0625
Daily_Report_Scheduled_2018_01_01_033044.xlsx
this file: filet =  9.65625
Daily_Report_Scheduled_2018_01_02_044846.xlsx
this file: filet =  7.296875
Daily_Report_Scheduled_2018_01_03_033117.xlsx
this file: filet =  0.28125
Daily_Report_Scheduled_2018_01_04_044809.xlsx
this file: filet =  7.421875
Daily_Report_Scheduled_2018_01_05_044912.xlsx
this file: filet =  7.46875
Daily_Report_Scheduled_2018_0

this file: filet =  7.03125
Daily_Report_Scheduled_2018_04_14_033044.xlsx
this file: filet =  8.09375
Daily_Report_Scheduled_2018_04_15_033058.xlsx
this file: filet =  11.921875
Daily_Report_Scheduled_2018_04_16_033046.xlsx
this file: filet =  10.484375
Daily_Report_Scheduled_2018_04_17_033043.xlsx
this file: filet =  7.34375
Daily_Report_Scheduled_2018_04_18_033045.xlsx
this file: filet =  7.671875
Daily_Report_Scheduled_2018_04_19_033049.xlsx
this file: filet =  7.234375
Daily_Report_Scheduled_2018_04_20_033040.xlsx
this file: filet =  6.484375
Daily_Report_Scheduled_2018_04_21_033045.xlsx
this file: filet =  8.484375
Daily_Report_Scheduled_2018_04_22_033054.xlsx
this file: filet =  15.25
Daily_Report_Scheduled_2018_04_23_033057.xlsx
this file: filet =  11.265625
Daily_Report_Scheduled_2018_04_24_033043.xlsx
this file: filet =  7.359375
Daily_Report_Scheduled_2018_04_25_033054.xlsx
this file: filet =  6.984375
Daily_Report_Scheduled_2018_04_26_033051.xlsx
this file: filet =  7.21875


this file: filet =  10.78125
Daily_Report_Scheduled_2018_08_05_033105.xlsx
this file: filet =  11.90625
Daily_Report_Scheduled_2018_08_06_033102.xlsx
this file: filet =  9.859375
Daily_Report_Scheduled_2018_08_07_033046.xlsx
this file: filet =  7.578125
Daily_Report_Scheduled_2018_08_08_033052.xlsx
this file: filet =  8.671875
Daily_Report_Scheduled_2018_08_09_033049.xlsx
this file: filet =  7.9375
Daily_Report_Scheduled_2018_08_10_044841.xlsx
this file: filet =  6.75
Daily_Report_Scheduled_2018_08_11_044952.xlsx
this file: filet =  8.265625
Daily_Report_Scheduled_2018_08_12_044959.xlsx
this file: filet =  14.53125
Daily_Report_Scheduled_2018_08_13_044951.xlsx
this file: filet =  13.328125
Daily_Report_Scheduled_2018_08_14_044946.xlsx
this file: filet =  9.515625
Daily_Report_Scheduled_2018_08_15_044950.xlsx
this file: filet =  8.453125
Daily_Report_Scheduled_2018_08_16_044942.xlsx
this file: filet =  7.234375
Daily_Report_Scheduled_2018_08_17_044946.xlsx
this file: filet =  7.421875
D

this file: filet =  12.46875
Daily_Report_Scheduled_2018_11_19_033106.xlsx
this file: filet =  10.546875
Daily_Report_Scheduled_2018_11_20_033109.xlsx
this file: filet =  8.53125
Daily_Report_Scheduled_2018_11_21_033106.xlsx
this file: filet =  8.953125
Daily_Report_Scheduled_2018_11_22_033108.xlsx
this file: filet =  9.015625
Daily_Report_Scheduled_2018_11_23_033110.xlsx
this file: filet =  7.9375
Daily_Report_Scheduled_2018_11_24_033116.xlsx
this file: filet =  9.125
Daily_Report_Scheduled_2018_11_25_033125.xlsx
this file: filet =  12.53125
Daily_Report_Scheduled_2018_11_26_033123.xlsx
this file: filet =  13.125
Daily_Report_Scheduled_2018_11_27_033105.xlsx
this file: filet =  8.0625
Daily_Report_Scheduled_2018_11_28_033109.xlsx
this file: filet =  8.734375
Daily_Report_Scheduled_2018_11_29_033105.xlsx
this file: filet =  8.390625
Daily_Report_Scheduled_2018_11_30_033106.xlsx
this file: filet =  7.578125
Daily_Report_Scheduled_2018_12_01_033120.xlsx
this file: filet =  9.015625
Daily

this file: filet =  6.8125
Daily_Report_Scheduled_2019_03_09_033104.xlsx
this file: filet =  8.796875
Daily_Report_Scheduled_2019_03_10_033121.xlsx
this file: filet =  12.109375
Daily_Report_Scheduled_2019_03_11_033105.xlsx
this file: filet =  10.265625
Daily_Report_Scheduled_2019_03_12_033056.xlsx
this file: filet =  8.421875
Daily_Report_Scheduled_2019_03_13_033053.xlsx
this file: filet =  8.40625
Daily_Report_Scheduled_2019_03_14_033059.xlsx
this file: filet =  6.984375
Daily_Report_Scheduled_2019_03_15_033054.xlsx
this file: filet =  6.453125
Daily_Report_Scheduled_2019_03_16_033059.xlsx
this file: filet =  8.375
Daily_Report_Scheduled_2019_03_17_033114.xlsx
this file: filet =  12.4375
Daily_Report_Scheduled_2019_03_18_033109.xlsx
this file: filet =  10.296875
Daily_Report_Scheduled_2019_03_19_033055.xlsx
this file: filet =  7.953125
Daily_Report_Scheduled_2019_03_20_033056.xlsx
this file: filet =  11.6875
Daily_Report_Scheduled_2019_03_21_033059.xlsx
this file: filet =  8.0
Daily_

this file: filet =  10.03125
Daily_Report_Scheduled_2019_06_28_033052.xlsx
this file: filet =  9.328125
Daily_Report_Scheduled_2019_06_29_033106.xlsx
this file: filet =  12.4375
Daily_Report_Scheduled_2019_06_30_033117.xlsx
this file: filet =  13.734375
Daily_Report_Scheduled_2019_07_01_033059.xlsx
this file: filet =  10.96875
Daily_Report_Scheduled_2019_07_02_033101.xlsx
this file: filet =  8.515625
Daily_Report_Scheduled_2019_07_03_033055.xlsx
this file: filet =  9.09375
Daily_Report_Scheduled_2019_07_04_033051.xlsx
this file: filet =  7.59375
Daily_Report_Scheduled_2019_07_05_033048.xlsx
this file: filet =  6.796875
Daily_Report_Scheduled_2019_07_06_033055.xlsx
this file: filet =  12.828125
Daily_Report_Scheduled_2019_07_07_033107.xlsx
this file: filet =  12.984375
Daily_Report_Scheduled_2019_07_08_033103.xlsx
this file: filet =  10.03125
Daily_Report_Scheduled_2019_07_09_033051.xlsx
this file: filet =  7.4375
Daily_Report_Scheduled_2019_07_10_033058.xlsx
this file: filet =  8.35937

this file: filet =  15.109375
Daily_Report_Scheduled_2019_10_17_033059.xlsx
this file: filet =  9.203125
Daily_Report_Scheduled_2019_10_18_033104.xlsx
this file: filet =  6.96875
Daily_Report_Scheduled_2019_10_19_033107.xlsx
this file: filet =  8.4375
Daily_Report_Scheduled_2019_10_20_033121.xlsx
this file: filet =  12.328125
Daily_Report_Scheduled_2019_10_21_033117.xlsx
this file: filet =  10.21875
Daily_Report_Scheduled_2019_10_22_233059.xlsx
this file: filet =  7.75
Daily_Report_Scheduled_2019_10_23_033104.xlsx
this file: filet =  8.578125
Daily_Report_Scheduled_2019_10_24_001556.xlsx
this file: filet =  7.515625
Daily_Report_Scheduled_2019_10_24_033050.xlsx
this file: filet =  7.921875
Daily_Report_Scheduled_2019_10_24_233103.xlsx
this file: filet =  9.328125
Daily_Report_Scheduled_2019_10_25_033050.xlsx
this file: filet =  8.25
Daily_Report_Scheduled_2019_10_25_233109.xlsx
this file: filet =  14.28125
Daily_Report_Scheduled_2019_10_26_033055.xlsx
this file: filet =  10.75
Daily_Re

this file: filet =  14.015625
Daily_Report_Scheduled_2020_01_30_033103.xlsx
this file: filet =  10.25
Total run time =  7721.9375


In [18]:
route_dict = {route_columns[0]:Route_ID_route, route_columns[1]:Branch_ID_route, route_columns[2]:FL_Name_route,
                        route_columns[3]:Start_Date_of_Route_route, route_columns[4]:Start_Time_of_Route_route,
                        route_columns[5]:End_Time_of_Route_route, route_columns[6]:Break_Duration_mins_route, 
                        route_columns[7]:Percentage_Shift_Time_used_route, route_columns[8]:Percentage_Van_Weight_used_route,
                        route_columns[9]:Percentage_Ambient_Capacity_Used_route,
                        route_columns[10]:Percentage_Chilled_Capacity_Used_route,
                        route_columns[11]:Percentage_Frozen_Capacity_Used_route,
                        route_columns[12]:Percentage_Total_Crate_capacity_Used_route,
                        route_columns[13]:Planned_total_Route_time_route, route_columns[14]:Planned_Total_Wait_time_mins_route,
                        route_columns[15]:Planned_total_Mileage_route, route_columns[16]:Planned_Total_Driving_time_route,
                        route_columns[17]:Number_Orders_route, route_columns[18]:Van_Type_route,
                        route_columns[19]:Loaded_Ambient_Crates_route, route_columns[20]:Loaded_Chilled_Crates_route,
                        route_columns[21]:Loaded_Frozen_Crates_route, route_columns[22]:Loaded_Kgs_route,
                        route_columns[23]:Loaded_Total_Crates_route, route_columns[24]:Total_Dwell_Time_route,
                        route_columns[25]:Shift_Start_Time_route, route_columns[26]:Shift_End_Time_route,
                        route_columns[27]:Total_Shift_Hours_route, route_columns[28]:Total_Shift_Time_Spent_at_FL_route,
                        route_columns[29]:MPO_route, route_columns[30]:HPO_route}

order_dict = {order_columns[0]:Route_ID_order,
order_columns[1]:Order_Sequence_order,           
order_columns[2]:Order_ID_order,                
order_columns[3]:Order_Type_order,             
order_columns[4]:Order_Delivery_Date_order,                   
order_columns[5]:Time_Window_Start_order,                  
order_columns[6]:Time_Window_End_order,               
order_columns[7]:Planned_Arrival_Time_order,                  
order_columns[8]:Planned_Departure_Time_order,                 
order_columns[9]:Break_Duration_Minutes_order,                 
order_columns[10]:Wait_Time_since_Prev_Stop_Minutes_order,         
order_columns[11]:Post_Code_order,      
order_columns[12]:X_Coord_order,               
order_columns[13]:Y_Coord_order,               
order_columns[14]:MPO_order,
order_columns[15]:Driving_Mins_Per_Order_order,                
order_columns[16]:Ambient_order,               
order_columns[17]:Chilled_order,            
order_columns[18]:Frozen_order,                  
order_columns[19]:Total_order,                
order_columns[20]:Kg_order,               
order_columns[21]:Dwell_Time_order}



In [19]:
route_df = pd.DataFrame.from_dict(route_dict, orient='index')
route_df = route_df.transpose()

order_df = pd.DataFrame.from_dict(order_dict, orient='index')
order_df = order_df.transpose()

In [None]:
order_df.to_csv(r'C:\Users\benfl\OneDrive - Flexible Power Systems Ltd\Documents\Python Scripts\Waitrose Collation\collated_data_all\collated_order_data_all_07022020.csv')

In [None]:
route_df.to_csv(r'C:\Users\benfl\OneDrive - Flexible Power Systems Ltd\Documents\Python Scripts\Waitrose Collation\collated_data_all\collated_route_data_all_07022020.csv')